""" management.py — Management plane FastAPI application (port 9090). Full operator visibility and control: - All REST API endpoints (organism, agents, threads, usage, journal) - Audit log viewer - Configuration management - Static dashboard - WebSocket for real-time monitoring This app should only be accessible to operators, never to agents. """ from __future__ import annotations import time from contextlib import asynccontextmanager from pathlib import Path from typing import TYPE_CHECKING, Any, AsyncGenerator, Optional from fastapi import APIRouter, FastAPI, Query from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from xml_pipeline.server.api import create_router from xml_pipeline.server.state import ServerState from xml_pipeline.server.websocket import create_websocket_router if TYPE_CHECKING: from xml_pipeline.message_bus.stream_pump import StreamPump def create_management_app( pump: "StreamPump", *, title: str = "AgentOS Management", version: str = "1.0.0", cors_origins: Optional[list[str]] = None, ) -> FastAPI: """ Create the management FastAPI app (full operator access). Includes all existing API endpoints plus: - Audit log endpoints - Dashboard static file serving """ state = ServerState(pump) @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: state.set_running() yield state.set_stopping() app = FastAPI( title=title, version=version, description=( "Management plane for AgentOS operators. " "Full monitoring, control, audit, and configuration access." ), lifespan=lifespan, ) # CORS for dashboard if cors_origins is None: cors_origins = ["*"] app.add_middleware( CORSMiddleware, allow_origins=cors_origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Health check @app.get("/health") async def health_check() -> dict[str, Any]: info = state.get_organism_info() return { "status": "healthy", "organism": info.name, "uptime_seconds": info.uptime_seconds, "management": True, } # Include full API router (all endpoints) app.include_router(create_router(state)) # Include WebSocket endpoints (for dashboard real-time updates) app.include_router(create_websocket_router(state)) # Audit log endpoints audit_router = _create_audit_router() app.include_router(audit_router) # Store state app.state.server_state = state app.state.pump = pump # Mount dashboard static files (if directory exists) dashboard_paths = [ Path(__file__).parent.parent.parent / "dashboard", # repo root Path("/app/dashboard"), # container path ] for dashboard_dir in dashboard_paths: if dashboard_dir.is_dir(): app.mount( "/dashboard", StaticFiles(directory=str(dashboard_dir), html=True), name="dashboard", ) break return app def _create_audit_router() -> APIRouter: """Create the audit log API router.""" router = APIRouter(prefix="/api/v1/audit", tags=["audit"]) @router.get("/events") async def get_audit_events( event_type: Optional[str] = Query(None, description="Filter by event type"), listener: Optional[str] = Query(None, description="Filter by listener name"), severity: Optional[str] = Query(None, description="Filter by severity"), since: Optional[float] = Query(None, description="Events since timestamp"), limit: int = Query(100, ge=1, le=1000), offset: int = Query(0, ge=0), ) -> dict[str, Any]: """Query audit log events.""" from xml_pipeline.server.audit import get_entries entries = get_entries( event_type=event_type, listener_name=listener, severity=severity, since=since, limit=limit, offset=offset, ) return {"events": entries, "count": len(entries)} @router.get("/stats") async def get_audit_stats() -> dict[str, Any]: """Get audit log statistics.""" from xml_pipeline.server.audit import get_stats return get_stats() @router.get("/security") async def get_security_events( limit: int = Query(50, ge=1, le=500), ) -> dict[str, Any]: """Get recent security-related events (warnings and above).""" from xml_pipeline.server.audit import get_entries warnings = get_entries(severity="warning", limit=limit) errors = get_entries(severity="error", limit=limit) critical = get_entries(severity="critical", limit=limit) all_events = sorted( warnings + errors + critical, key=lambda e: e["timestamp"], reverse=True, )[:limit] return {"events": all_events, "count": len(all_events)} return router