xml-pipeline/xml_pipeline/server/api.py

"""
api.py — REST API routes for AgentServer.

Provides endpoints for:
- Organism info and config
- Agent listing and details
- Thread listing and management
- Message injection
"""

from __future__ import annotations

import uuid
from typing import TYPE_CHECKING, Optional

from fastapi import APIRouter, HTTPException, Query

from xml_pipeline.server.models import (
    AgentInfo,
    AgentListResponse,
    AgentUsageInfo,
    CapabilityDetail,
    CapabilityInfo,
    CapabilityListResponse,
    ErrorResponse,
    InjectRequest,
    InjectResponse,
    MessageListResponse,
    ModelUsageInfo,
    OrganismInfo,
    ThreadBudgetInfo,
    ThreadBudgetListResponse,
    ThreadInfo,
    ThreadListResponse,
    ThreadStatus,
    UsageOverview,
    UsageResponse,
    UsageTotals,
)

if TYPE_CHECKING:
    from xml_pipeline.server.state import ServerState


def create_router(state: "ServerState") -> APIRouter:
    """Create API router with state dependency."""
    router = APIRouter(prefix="/api/v1")

    # =========================================================================
    # Organism Endpoints
    # =========================================================================

    @router.get("/organism", response_model=OrganismInfo)
    async def get_organism() -> OrganismInfo:
        """Get organism overview and stats."""
        return state.get_organism_info()

    @router.get("/organism/config")
    async def get_organism_config() -> dict:
        """Get sanitized organism configuration (no secrets)."""
        return state.get_organism_config()

    # =========================================================================
    # Capability Introspection Endpoints (for operators, not agents)
    # =========================================================================

    @router.get("/capabilities", response_model=CapabilityListResponse)
    async def list_capabilities() -> CapabilityListResponse:
        """
        List all registered capabilities in the organism.

        This endpoint is for operator introspection only.
        Agents cannot access this - they only know their declared peers.
        """
        capabilities = state.get_capabilities()
        return CapabilityListResponse(
            capabilities=capabilities,
            count=len(capabilities),
        )

    @router.get("/capabilities/{name}", response_model=CapabilityDetail)
    async def get_capability(name: str) -> CapabilityDetail:
        """
        Get detailed capability info including schema and example.

        This endpoint is for operator introspection only.
        """
        capability = state.get_capability(name)
        if capability is None:
            raise HTTPException(
                status_code=404,
                detail=f"Capability not found: {name}",
            )
        return capability

    # =========================================================================
    # Agent Endpoints
    # =========================================================================

    @router.get("/agents", response_model=AgentListResponse)
    async def list_agents() -> AgentListResponse:
        """List all agents with current state."""
        agents = state.get_agents()
        return AgentListResponse(agents=agents, count=len(agents))

    @router.get("/agents/{name}", response_model=AgentInfo)
    async def get_agent(name: str) -> AgentInfo:
        """Get single agent details."""
        agent = state.get_agent(name)
        if agent is None:
            raise HTTPException(status_code=404, detail=f"Agent not found: {name}")
        return agent

    @router.get("/agents/{name}/config")
    async def get_agent_config(name: str) -> dict:
        """Get agent's YAML config section."""
        agent = state.get_agent(name)
        if agent is None:
            raise HTTPException(status_code=404, detail=f"Agent not found: {name}")

        # Return relevant config fields
        return {
            "name": agent.name,
            "description": agent.description,
            "isAgent": agent.is_agent,
            "peers": agent.peers,
            "payloadClass": agent.payload_class,
        }

    @router.get("/agents/{name}/schema")
    async def get_agent_schema(name: str) -> dict:
        """Get agent's payload XML schema."""
        schema = state.get_agent_schema(name)
        if schema is None:
            raise HTTPException(
                status_code=404,
                detail=f"Schema not found for agent: {name}",
            )
        return {"schema": schema, "contentType": "application/xml"}

    # =========================================================================
    # Thread Endpoints
    # =========================================================================

    @router.get("/threads", response_model=ThreadListResponse)
    async def list_threads(
        status: Optional[str] = Query(None, description="Filter by status"),
        agent: Optional[str] = Query(None, description="Filter by participant agent"),
        limit: int = Query(50, ge=1, le=100),
        offset: int = Query(0, ge=0),
    ) -> ThreadListResponse:
        """List threads with optional filtering."""
        thread_status = None
        if status:
            try:
                thread_status = ThreadStatus(status)
            except ValueError:
                raise HTTPException(
                    status_code=400,
                    detail=f"Invalid status: {status}. Valid values: {[s.value for s in ThreadStatus]}",
                )

        threads, total = state.get_threads(
            status=thread_status,
            agent=agent,
            limit=limit,
            offset=offset,
        )
        return ThreadListResponse(
            threads=threads,
            count=len(threads),
            total=total,
            offset=offset,
            limit=limit,
        )

    @router.get("/threads/{thread_id}", response_model=ThreadInfo)
    async def get_thread(thread_id: str) -> ThreadInfo:
        """Get thread details with message history."""
        thread = state.get_thread(thread_id)
        if thread is None:
            raise HTTPException(status_code=404, detail=f"Thread not found: {thread_id}")
        return thread

    @router.get("/threads/{thread_id}/messages", response_model=MessageListResponse)
    async def get_thread_messages(
        thread_id: str,
        limit: int = Query(50, ge=1, le=100),
        offset: int = Query(0, ge=0),
    ) -> MessageListResponse:
        """Get messages in a specific thread."""
        thread = state.get_thread(thread_id)
        if thread is None:
            raise HTTPException(status_code=404, detail=f"Thread not found: {thread_id}")

        messages, total = state.get_messages(
            thread_id=thread_id,
            limit=limit,
            offset=offset,
        )
        return MessageListResponse(
            messages=messages,
            count=len(messages),
            total=total,
            offset=offset,
            limit=limit,
        )

    @router.post("/threads/{thread_id}/kill")
    async def kill_thread(thread_id: str) -> dict:
        """Terminate a thread."""
        thread = state.get_thread(thread_id)
        if thread is None:
            raise HTTPException(status_code=404, detail=f"Thread not found: {thread_id}")

        await state.complete_thread(thread_id, status=ThreadStatus.KILLED)
        return {"success": True, "threadId": thread_id}

    # =========================================================================
    # Message Endpoints
    # =========================================================================

    @router.get("/messages", response_model=MessageListResponse)
    async def list_messages(
        agent: Optional[str] = Query(None, description="Filter by agent (sender or receiver)"),
        limit: int = Query(50, ge=1, le=100),
        offset: int = Query(0, ge=0),
    ) -> MessageListResponse:
        """Get global message history."""
        messages, total = state.get_messages(
            agent=agent,
            limit=limit,
            offset=offset,
        )
        return MessageListResponse(
            messages=messages,
            count=len(messages),
            total=total,
            offset=offset,
            limit=limit,
        )

    # =========================================================================
    # Usage/Gas Tracking Endpoints
    # =========================================================================

    @router.get("/usage", response_model=UsageResponse)
    async def get_usage() -> UsageResponse:
        """
        Get usage overview (gas gauge).

        Returns aggregate token usage, costs, and per-agent/model breakdowns.
        This is the main endpoint for monitoring LLM consumption.
        """
        from xml_pipeline.llm import get_usage_tracker
        from xml_pipeline.message_bus import get_budget_registry

        tracker = get_usage_tracker()
        budget_registry = get_budget_registry()

        # Get aggregate totals
        totals_dict = tracker.get_totals()
        totals = UsageTotals(
            total_tokens=totals_dict["total_tokens"],
            prompt_tokens=totals_dict["prompt_tokens"],
            completion_tokens=totals_dict["completion_tokens"],
            request_count=totals_dict["request_count"],
            total_cost=totals_dict["total_cost"],
            avg_latency_ms=totals_dict["avg_latency_ms"],
        )

        # Get per-agent breakdown
        agent_totals = tracker.get_all_agent_totals()
        by_agent = [
            AgentUsageInfo(
                agent_id=agent_id,
                total_tokens=data["total_tokens"],
                prompt_tokens=data["prompt_tokens"],
                completion_tokens=data["completion_tokens"],
                request_count=data["request_count"],
                total_cost=data["total_cost"],
            )
            for agent_id, data in agent_totals.items()
        ]

        # Get per-model breakdown
        model_totals = tracker.get_all_model_totals()
        by_model = [
            ModelUsageInfo(
                model=model,
                total_tokens=data["total_tokens"],
                prompt_tokens=data["prompt_tokens"],
                completion_tokens=data["completion_tokens"],
                request_count=data["request_count"],
                total_cost=data["total_cost"],
            )
            for model, data in model_totals.items()
        ]

        # Count active threads with budgets
        all_budgets = budget_registry.get_all_usage()
        active_threads = len(all_budgets)

        overview = UsageOverview(
            totals=totals,
            by_agent=by_agent,
            by_model=by_model,
            active_threads=active_threads,
        )

        return UsageResponse(usage=overview)

    @router.get("/usage/threads", response_model=ThreadBudgetListResponse)
    async def get_thread_budgets() -> ThreadBudgetListResponse:
        """
        Get token budgets for all active threads.

        Shows remaining budget per thread for monitoring runaway agents.
        """
        from xml_pipeline.message_bus import get_budget_registry

        registry = get_budget_registry()
        all_budgets = registry.get_all_usage()

        threads = []
        for thread_id, budget_dict in all_budgets.items():
            max_tokens = budget_dict["max_tokens"]
            total = budget_dict["total_tokens"]
            percent = (total / max_tokens * 100) if max_tokens > 0 else 0

            threads.append(
                ThreadBudgetInfo(
                    thread_id=thread_id,
                    max_tokens=max_tokens,
                    prompt_tokens=budget_dict["prompt_tokens"],
                    completion_tokens=budget_dict["completion_tokens"],
                    total_tokens=total,
                    remaining=budget_dict["remaining"],
                    percent_used=round(percent, 1),
                    is_exhausted=budget_dict["remaining"] <= 0,
                )
            )

        # Sort by percent used (descending) - hottest threads first
        threads.sort(key=lambda t: t.percent_used, reverse=True)

        return ThreadBudgetListResponse(
            threads=threads,
            count=len(threads),
            default_max_tokens=registry._max_tokens_per_thread,
        )

    @router.get("/usage/threads/{thread_id}", response_model=ThreadBudgetInfo)
    async def get_thread_budget(thread_id: str) -> ThreadBudgetInfo:
        """Get token budget for a specific thread."""
        from xml_pipeline.message_bus import get_budget_registry

        registry = get_budget_registry()
        budget_dict = registry.get_usage(thread_id)

        if budget_dict is None:
            raise HTTPException(
                status_code=404,
                detail=f"No budget found for thread: {thread_id}",
            )

        max_tokens = budget_dict["max_tokens"]
        total = budget_dict["total_tokens"]
        percent = (total / max_tokens * 100) if max_tokens > 0 else 0

        return ThreadBudgetInfo(
            thread_id=thread_id,
            max_tokens=max_tokens,
            prompt_tokens=budget_dict["prompt_tokens"],
            completion_tokens=budget_dict["completion_tokens"],
            total_tokens=total,
            remaining=budget_dict["remaining"],
            percent_used=round(percent, 1),
            is_exhausted=budget_dict["remaining"] <= 0,
        )

    @router.get("/usage/agents/{agent_id}")
    async def get_agent_usage(agent_id: str) -> AgentUsageInfo:
        """Get usage totals for a specific agent."""
        from xml_pipeline.llm import get_usage_tracker

        tracker = get_usage_tracker()
        data = tracker.get_agent_totals(agent_id)

        return AgentUsageInfo(
            agent_id=agent_id,
            total_tokens=data["total_tokens"],
            prompt_tokens=data["prompt_tokens"],
            completion_tokens=data["completion_tokens"],
            request_count=data["request_count"],
            total_cost=data["total_cost"],
        )

    @router.get("/usage/models/{model}")
    async def get_model_usage(model: str) -> ModelUsageInfo:
        """Get usage totals for a specific model."""
        from xml_pipeline.llm import get_usage_tracker

        tracker = get_usage_tracker()
        data = tracker.get_model_totals(model)

        return ModelUsageInfo(
            model=model,
            total_tokens=data["total_tokens"],
            prompt_tokens=data["prompt_tokens"],
            completion_tokens=data["completion_tokens"],
            request_count=data["request_count"],
            total_cost=data["total_cost"],
        )

    @router.post("/usage/reset")
    async def reset_usage() -> dict:
        """
        Reset all usage tracking (for testing/development).

        WARNING: This clears all usage history. Use with caution.
        """
        from xml_pipeline.llm import reset_usage_tracker

        reset_usage_tracker()
        return {"success": True, "message": "Usage tracking reset"}

    # =========================================================================
    # Control Endpoints
    # =========================================================================

    @router.post("/inject", response_model=InjectResponse)
    async def inject_message(request: InjectRequest) -> InjectResponse:
        """Inject a message to an agent."""
        # Validate target exists
        agent = state.get_agent(request.to)
        if agent is None:
            raise HTTPException(
                status_code=400,
                detail=f"Unknown target agent: {request.to}",
            )

        # Generate or use provided thread ID
        thread_id = request.thread_id or str(uuid.uuid4())

        # Build payload XML from dict
        # For now, we construct a simple wrapper
        payload_type = next(iter(request.payload.keys()), "Payload")

        # Record the message
        msg_id = await state.record_message(
            thread_id=thread_id,
            from_id="api",
            to_id=request.to,
            payload_type=payload_type,
            payload=request.payload,
        )

        # TODO: Actually inject into pump queue
        # This requires building an envelope and calling pump.inject()

        return InjectResponse(thread_id=thread_id, message_id=msg_id)

    @router.post("/agents/{name}/pause")
    async def pause_agent(name: str) -> dict:
        """Pause an agent (stop processing new messages)."""
        agent = state.get_agent(name)
        if agent is None:
            raise HTTPException(status_code=404, detail=f"Agent not found: {name}")

        from xml_pipeline.server.models import AgentState

        await state.update_agent_state(name, AgentState.PAUSED)
        return {"success": True, "agent": name, "state": "paused"}

    @router.post("/agents/{name}/resume")
    async def resume_agent(name: str) -> dict:
        """Resume a paused agent."""
        agent = state.get_agent(name)
        if agent is None:
            raise HTTPException(status_code=404, detail=f"Agent not found: {name}")

        from xml_pipeline.server.models import AgentState

        await state.update_agent_state(name, AgentState.IDLE)
        return {"success": True, "agent": name, "state": "idle"}

    @router.post("/organism/reload")
    async def reload_config() -> dict:
        """
        Hot-reload organism configuration.

        Re-reads organism.yaml and updates listeners:
        - New listeners are registered
        - Removed listeners are unregistered
        - Changed listeners are updated
        """
        result = await state.reload_config()
        if not result["success"]:
            raise HTTPException(
                status_code=500,
                detail=result.get("error", "Reload failed"),
            )
        return result

    @router.post("/organism/stop")
    async def stop_organism() -> dict:
        """Graceful shutdown."""
        state.set_stopping()
        # TODO: Signal pump to stop
        return {"success": True, "status": "stopping"}

    return router