xml-pipeline/xml_pipeline/server/api.py
dullfig 860395cd58 Add usage/gas tracking REST API endpoints
Endpoints:
- GET /api/v1/usage - Overview with totals, per-agent, per-model breakdown
- GET /api/v1/usage/threads - List all thread budgets sorted by usage
- GET /api/v1/usage/threads/{id} - Single thread budget details
- GET /api/v1/usage/agents/{id} - Usage totals for specific agent
- GET /api/v1/usage/models/{model} - Usage totals for specific model
- POST /api/v1/usage/reset - Reset all usage tracking

Models:
- UsageTotals, UsageOverview, UsageResponse
- ThreadBudgetInfo, ThreadBudgetListResponse
- AgentUsageInfo, ModelUsageInfo

Also adds has_budget() method to ThreadBudgetRegistry for checking
if a thread exists without auto-creating it.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-27 21:20:36 -08:00

513 lines
18 KiB
Python

"""
api.py — REST API routes for AgentServer.
Provides endpoints for:
- Organism info and config
- Agent listing and details
- Thread listing and management
- Message injection
"""
from __future__ import annotations
import uuid
from typing import TYPE_CHECKING, Optional
from fastapi import APIRouter, HTTPException, Query
from xml_pipeline.server.models import (
AgentInfo,
AgentListResponse,
AgentUsageInfo,
CapabilityDetail,
CapabilityInfo,
CapabilityListResponse,
ErrorResponse,
InjectRequest,
InjectResponse,
MessageListResponse,
ModelUsageInfo,
OrganismInfo,
ThreadBudgetInfo,
ThreadBudgetListResponse,
ThreadInfo,
ThreadListResponse,
ThreadStatus,
UsageOverview,
UsageResponse,
UsageTotals,
)
if TYPE_CHECKING:
from xml_pipeline.server.state import ServerState
def create_router(state: "ServerState") -> APIRouter:
"""Create API router with state dependency."""
router = APIRouter(prefix="/api/v1")
# =========================================================================
# Organism Endpoints
# =========================================================================
@router.get("/organism", response_model=OrganismInfo)
async def get_organism() -> OrganismInfo:
"""Get organism overview and stats."""
return state.get_organism_info()
@router.get("/organism/config")
async def get_organism_config() -> dict:
"""Get sanitized organism configuration (no secrets)."""
return state.get_organism_config()
# =========================================================================
# Capability Introspection Endpoints (for operators, not agents)
# =========================================================================
@router.get("/capabilities", response_model=CapabilityListResponse)
async def list_capabilities() -> CapabilityListResponse:
"""
List all registered capabilities in the organism.
This endpoint is for operator introspection only.
Agents cannot access this - they only know their declared peers.
"""
capabilities = state.get_capabilities()
return CapabilityListResponse(
capabilities=capabilities,
count=len(capabilities),
)
@router.get("/capabilities/{name}", response_model=CapabilityDetail)
async def get_capability(name: str) -> CapabilityDetail:
"""
Get detailed capability info including schema and example.
This endpoint is for operator introspection only.
"""
capability = state.get_capability(name)
if capability is None:
raise HTTPException(
status_code=404,
detail=f"Capability not found: {name}",
)
return capability
# =========================================================================
# Agent Endpoints
# =========================================================================
@router.get("/agents", response_model=AgentListResponse)
async def list_agents() -> AgentListResponse:
"""List all agents with current state."""
agents = state.get_agents()
return AgentListResponse(agents=agents, count=len(agents))
@router.get("/agents/{name}", response_model=AgentInfo)
async def get_agent(name: str) -> AgentInfo:
"""Get single agent details."""
agent = state.get_agent(name)
if agent is None:
raise HTTPException(status_code=404, detail=f"Agent not found: {name}")
return agent
@router.get("/agents/{name}/config")
async def get_agent_config(name: str) -> dict:
"""Get agent's YAML config section."""
agent = state.get_agent(name)
if agent is None:
raise HTTPException(status_code=404, detail=f"Agent not found: {name}")
# Return relevant config fields
return {
"name": agent.name,
"description": agent.description,
"isAgent": agent.is_agent,
"peers": agent.peers,
"payloadClass": agent.payload_class,
}
@router.get("/agents/{name}/schema")
async def get_agent_schema(name: str) -> dict:
"""Get agent's payload XML schema."""
schema = state.get_agent_schema(name)
if schema is None:
raise HTTPException(
status_code=404,
detail=f"Schema not found for agent: {name}",
)
return {"schema": schema, "contentType": "application/xml"}
# =========================================================================
# Thread Endpoints
# =========================================================================
@router.get("/threads", response_model=ThreadListResponse)
async def list_threads(
status: Optional[str] = Query(None, description="Filter by status"),
agent: Optional[str] = Query(None, description="Filter by participant agent"),
limit: int = Query(50, ge=1, le=100),
offset: int = Query(0, ge=0),
) -> ThreadListResponse:
"""List threads with optional filtering."""
thread_status = None
if status:
try:
thread_status = ThreadStatus(status)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Invalid status: {status}. Valid values: {[s.value for s in ThreadStatus]}",
)
threads, total = state.get_threads(
status=thread_status,
agent=agent,
limit=limit,
offset=offset,
)
return ThreadListResponse(
threads=threads,
count=len(threads),
total=total,
offset=offset,
limit=limit,
)
@router.get("/threads/{thread_id}", response_model=ThreadInfo)
async def get_thread(thread_id: str) -> ThreadInfo:
"""Get thread details with message history."""
thread = state.get_thread(thread_id)
if thread is None:
raise HTTPException(status_code=404, detail=f"Thread not found: {thread_id}")
return thread
@router.get("/threads/{thread_id}/messages", response_model=MessageListResponse)
async def get_thread_messages(
thread_id: str,
limit: int = Query(50, ge=1, le=100),
offset: int = Query(0, ge=0),
) -> MessageListResponse:
"""Get messages in a specific thread."""
thread = state.get_thread(thread_id)
if thread is None:
raise HTTPException(status_code=404, detail=f"Thread not found: {thread_id}")
messages, total = state.get_messages(
thread_id=thread_id,
limit=limit,
offset=offset,
)
return MessageListResponse(
messages=messages,
count=len(messages),
total=total,
offset=offset,
limit=limit,
)
@router.post("/threads/{thread_id}/kill")
async def kill_thread(thread_id: str) -> dict:
"""Terminate a thread."""
thread = state.get_thread(thread_id)
if thread is None:
raise HTTPException(status_code=404, detail=f"Thread not found: {thread_id}")
await state.complete_thread(thread_id, status=ThreadStatus.KILLED)
return {"success": True, "threadId": thread_id}
# =========================================================================
# Message Endpoints
# =========================================================================
@router.get("/messages", response_model=MessageListResponse)
async def list_messages(
agent: Optional[str] = Query(None, description="Filter by agent (sender or receiver)"),
limit: int = Query(50, ge=1, le=100),
offset: int = Query(0, ge=0),
) -> MessageListResponse:
"""Get global message history."""
messages, total = state.get_messages(
agent=agent,
limit=limit,
offset=offset,
)
return MessageListResponse(
messages=messages,
count=len(messages),
total=total,
offset=offset,
limit=limit,
)
# =========================================================================
# Usage/Gas Tracking Endpoints
# =========================================================================
@router.get("/usage", response_model=UsageResponse)
async def get_usage() -> UsageResponse:
"""
Get usage overview (gas gauge).
Returns aggregate token usage, costs, and per-agent/model breakdowns.
This is the main endpoint for monitoring LLM consumption.
"""
from xml_pipeline.llm import get_usage_tracker
from xml_pipeline.message_bus import get_budget_registry
tracker = get_usage_tracker()
budget_registry = get_budget_registry()
# Get aggregate totals
totals_dict = tracker.get_totals()
totals = UsageTotals(
total_tokens=totals_dict["total_tokens"],
prompt_tokens=totals_dict["prompt_tokens"],
completion_tokens=totals_dict["completion_tokens"],
request_count=totals_dict["request_count"],
total_cost=totals_dict["total_cost"],
avg_latency_ms=totals_dict["avg_latency_ms"],
)
# Get per-agent breakdown
agent_totals = tracker.get_all_agent_totals()
by_agent = [
AgentUsageInfo(
agent_id=agent_id,
total_tokens=data["total_tokens"],
prompt_tokens=data["prompt_tokens"],
completion_tokens=data["completion_tokens"],
request_count=data["request_count"],
total_cost=data["total_cost"],
)
for agent_id, data in agent_totals.items()
]
# Get per-model breakdown
model_totals = tracker.get_all_model_totals()
by_model = [
ModelUsageInfo(
model=model,
total_tokens=data["total_tokens"],
prompt_tokens=data["prompt_tokens"],
completion_tokens=data["completion_tokens"],
request_count=data["request_count"],
total_cost=data["total_cost"],
)
for model, data in model_totals.items()
]
# Count active threads with budgets
all_budgets = budget_registry.get_all_usage()
active_threads = len(all_budgets)
overview = UsageOverview(
totals=totals,
by_agent=by_agent,
by_model=by_model,
active_threads=active_threads,
)
return UsageResponse(usage=overview)
@router.get("/usage/threads", response_model=ThreadBudgetListResponse)
async def get_thread_budgets() -> ThreadBudgetListResponse:
"""
Get token budgets for all active threads.
Shows remaining budget per thread for monitoring runaway agents.
"""
from xml_pipeline.message_bus import get_budget_registry
registry = get_budget_registry()
all_budgets = registry.get_all_usage()
threads = []
for thread_id, budget_dict in all_budgets.items():
max_tokens = budget_dict["max_tokens"]
total = budget_dict["total_tokens"]
percent = (total / max_tokens * 100) if max_tokens > 0 else 0
threads.append(
ThreadBudgetInfo(
thread_id=thread_id,
max_tokens=max_tokens,
prompt_tokens=budget_dict["prompt_tokens"],
completion_tokens=budget_dict["completion_tokens"],
total_tokens=total,
remaining=budget_dict["remaining"],
percent_used=round(percent, 1),
is_exhausted=budget_dict["remaining"] <= 0,
)
)
# Sort by percent used (descending) - hottest threads first
threads.sort(key=lambda t: t.percent_used, reverse=True)
return ThreadBudgetListResponse(
threads=threads,
count=len(threads),
default_max_tokens=registry._max_tokens_per_thread,
)
@router.get("/usage/threads/{thread_id}", response_model=ThreadBudgetInfo)
async def get_thread_budget(thread_id: str) -> ThreadBudgetInfo:
"""Get token budget for a specific thread."""
from xml_pipeline.message_bus import get_budget_registry
registry = get_budget_registry()
budget_dict = registry.get_usage(thread_id)
if budget_dict is None:
raise HTTPException(
status_code=404,
detail=f"No budget found for thread: {thread_id}",
)
max_tokens = budget_dict["max_tokens"]
total = budget_dict["total_tokens"]
percent = (total / max_tokens * 100) if max_tokens > 0 else 0
return ThreadBudgetInfo(
thread_id=thread_id,
max_tokens=max_tokens,
prompt_tokens=budget_dict["prompt_tokens"],
completion_tokens=budget_dict["completion_tokens"],
total_tokens=total,
remaining=budget_dict["remaining"],
percent_used=round(percent, 1),
is_exhausted=budget_dict["remaining"] <= 0,
)
@router.get("/usage/agents/{agent_id}")
async def get_agent_usage(agent_id: str) -> AgentUsageInfo:
"""Get usage totals for a specific agent."""
from xml_pipeline.llm import get_usage_tracker
tracker = get_usage_tracker()
data = tracker.get_agent_totals(agent_id)
return AgentUsageInfo(
agent_id=agent_id,
total_tokens=data["total_tokens"],
prompt_tokens=data["prompt_tokens"],
completion_tokens=data["completion_tokens"],
request_count=data["request_count"],
total_cost=data["total_cost"],
)
@router.get("/usage/models/{model}")
async def get_model_usage(model: str) -> ModelUsageInfo:
"""Get usage totals for a specific model."""
from xml_pipeline.llm import get_usage_tracker
tracker = get_usage_tracker()
data = tracker.get_model_totals(model)
return ModelUsageInfo(
model=model,
total_tokens=data["total_tokens"],
prompt_tokens=data["prompt_tokens"],
completion_tokens=data["completion_tokens"],
request_count=data["request_count"],
total_cost=data["total_cost"],
)
@router.post("/usage/reset")
async def reset_usage() -> dict:
"""
Reset all usage tracking (for testing/development).
WARNING: This clears all usage history. Use with caution.
"""
from xml_pipeline.llm import reset_usage_tracker
reset_usage_tracker()
return {"success": True, "message": "Usage tracking reset"}
# =========================================================================
# Control Endpoints
# =========================================================================
@router.post("/inject", response_model=InjectResponse)
async def inject_message(request: InjectRequest) -> InjectResponse:
"""Inject a message to an agent."""
# Validate target exists
agent = state.get_agent(request.to)
if agent is None:
raise HTTPException(
status_code=400,
detail=f"Unknown target agent: {request.to}",
)
# Generate or use provided thread ID
thread_id = request.thread_id or str(uuid.uuid4())
# Build payload XML from dict
# For now, we construct a simple wrapper
payload_type = next(iter(request.payload.keys()), "Payload")
# Record the message
msg_id = await state.record_message(
thread_id=thread_id,
from_id="api",
to_id=request.to,
payload_type=payload_type,
payload=request.payload,
)
# TODO: Actually inject into pump queue
# This requires building an envelope and calling pump.inject()
return InjectResponse(thread_id=thread_id, message_id=msg_id)
@router.post("/agents/{name}/pause")
async def pause_agent(name: str) -> dict:
"""Pause an agent (stop processing new messages)."""
agent = state.get_agent(name)
if agent is None:
raise HTTPException(status_code=404, detail=f"Agent not found: {name}")
from xml_pipeline.server.models import AgentState
await state.update_agent_state(name, AgentState.PAUSED)
return {"success": True, "agent": name, "state": "paused"}
@router.post("/agents/{name}/resume")
async def resume_agent(name: str) -> dict:
"""Resume a paused agent."""
agent = state.get_agent(name)
if agent is None:
raise HTTPException(status_code=404, detail=f"Agent not found: {name}")
from xml_pipeline.server.models import AgentState
await state.update_agent_state(name, AgentState.IDLE)
return {"success": True, "agent": name, "state": "idle"}
@router.post("/organism/reload")
async def reload_config() -> dict:
"""
Hot-reload organism configuration.
Re-reads organism.yaml and updates listeners:
- New listeners are registered
- Removed listeners are unregistered
- Changed listeners are updated
"""
result = await state.reload_config()
if not result["success"]:
raise HTTPException(
status_code=500,
detail=result.get("error", "Reload failed"),
)
return result
@router.post("/organism/stop")
async def stop_organism() -> dict:
"""Graceful shutdown."""
state.set_stopping()
# TODO: Signal pump to stop
return {"success": True, "status": "stopping"}
return router