Endpoints:
- GET /api/v1/usage - Overview with totals, per-agent, per-model breakdown
- GET /api/v1/usage/threads - List all thread budgets sorted by usage
- GET /api/v1/usage/threads/{id} - Single thread budget details
- GET /api/v1/usage/agents/{id} - Usage totals for specific agent
- GET /api/v1/usage/models/{model} - Usage totals for specific model
- POST /api/v1/usage/reset - Reset all usage tracking
Models:
- UsageTotals, UsageOverview, UsageResponse
- ThreadBudgetInfo, ThreadBudgetListResponse
- AgentUsageInfo, ModelUsageInfo
Also adds has_budget() method to ThreadBudgetRegistry for checking
if a thread exists without auto-creating it.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
513 lines
18 KiB
Python
513 lines
18 KiB
Python
"""
|
|
api.py — REST API routes for AgentServer.
|
|
|
|
Provides endpoints for:
|
|
- Organism info and config
|
|
- Agent listing and details
|
|
- Thread listing and management
|
|
- Message injection
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import uuid
|
|
from typing import TYPE_CHECKING, Optional
|
|
|
|
from fastapi import APIRouter, HTTPException, Query
|
|
|
|
from xml_pipeline.server.models import (
|
|
AgentInfo,
|
|
AgentListResponse,
|
|
AgentUsageInfo,
|
|
CapabilityDetail,
|
|
CapabilityInfo,
|
|
CapabilityListResponse,
|
|
ErrorResponse,
|
|
InjectRequest,
|
|
InjectResponse,
|
|
MessageListResponse,
|
|
ModelUsageInfo,
|
|
OrganismInfo,
|
|
ThreadBudgetInfo,
|
|
ThreadBudgetListResponse,
|
|
ThreadInfo,
|
|
ThreadListResponse,
|
|
ThreadStatus,
|
|
UsageOverview,
|
|
UsageResponse,
|
|
UsageTotals,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from xml_pipeline.server.state import ServerState
|
|
|
|
|
|
def create_router(state: "ServerState") -> APIRouter:
|
|
"""Create API router with state dependency."""
|
|
router = APIRouter(prefix="/api/v1")
|
|
|
|
# =========================================================================
|
|
# Organism Endpoints
|
|
# =========================================================================
|
|
|
|
@router.get("/organism", response_model=OrganismInfo)
|
|
async def get_organism() -> OrganismInfo:
|
|
"""Get organism overview and stats."""
|
|
return state.get_organism_info()
|
|
|
|
@router.get("/organism/config")
|
|
async def get_organism_config() -> dict:
|
|
"""Get sanitized organism configuration (no secrets)."""
|
|
return state.get_organism_config()
|
|
|
|
# =========================================================================
|
|
# Capability Introspection Endpoints (for operators, not agents)
|
|
# =========================================================================
|
|
|
|
@router.get("/capabilities", response_model=CapabilityListResponse)
|
|
async def list_capabilities() -> CapabilityListResponse:
|
|
"""
|
|
List all registered capabilities in the organism.
|
|
|
|
This endpoint is for operator introspection only.
|
|
Agents cannot access this - they only know their declared peers.
|
|
"""
|
|
capabilities = state.get_capabilities()
|
|
return CapabilityListResponse(
|
|
capabilities=capabilities,
|
|
count=len(capabilities),
|
|
)
|
|
|
|
@router.get("/capabilities/{name}", response_model=CapabilityDetail)
|
|
async def get_capability(name: str) -> CapabilityDetail:
|
|
"""
|
|
Get detailed capability info including schema and example.
|
|
|
|
This endpoint is for operator introspection only.
|
|
"""
|
|
capability = state.get_capability(name)
|
|
if capability is None:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Capability not found: {name}",
|
|
)
|
|
return capability
|
|
|
|
# =========================================================================
|
|
# Agent Endpoints
|
|
# =========================================================================
|
|
|
|
@router.get("/agents", response_model=AgentListResponse)
|
|
async def list_agents() -> AgentListResponse:
|
|
"""List all agents with current state."""
|
|
agents = state.get_agents()
|
|
return AgentListResponse(agents=agents, count=len(agents))
|
|
|
|
@router.get("/agents/{name}", response_model=AgentInfo)
|
|
async def get_agent(name: str) -> AgentInfo:
|
|
"""Get single agent details."""
|
|
agent = state.get_agent(name)
|
|
if agent is None:
|
|
raise HTTPException(status_code=404, detail=f"Agent not found: {name}")
|
|
return agent
|
|
|
|
@router.get("/agents/{name}/config")
|
|
async def get_agent_config(name: str) -> dict:
|
|
"""Get agent's YAML config section."""
|
|
agent = state.get_agent(name)
|
|
if agent is None:
|
|
raise HTTPException(status_code=404, detail=f"Agent not found: {name}")
|
|
|
|
# Return relevant config fields
|
|
return {
|
|
"name": agent.name,
|
|
"description": agent.description,
|
|
"isAgent": agent.is_agent,
|
|
"peers": agent.peers,
|
|
"payloadClass": agent.payload_class,
|
|
}
|
|
|
|
@router.get("/agents/{name}/schema")
|
|
async def get_agent_schema(name: str) -> dict:
|
|
"""Get agent's payload XML schema."""
|
|
schema = state.get_agent_schema(name)
|
|
if schema is None:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Schema not found for agent: {name}",
|
|
)
|
|
return {"schema": schema, "contentType": "application/xml"}
|
|
|
|
# =========================================================================
|
|
# Thread Endpoints
|
|
# =========================================================================
|
|
|
|
@router.get("/threads", response_model=ThreadListResponse)
|
|
async def list_threads(
|
|
status: Optional[str] = Query(None, description="Filter by status"),
|
|
agent: Optional[str] = Query(None, description="Filter by participant agent"),
|
|
limit: int = Query(50, ge=1, le=100),
|
|
offset: int = Query(0, ge=0),
|
|
) -> ThreadListResponse:
|
|
"""List threads with optional filtering."""
|
|
thread_status = None
|
|
if status:
|
|
try:
|
|
thread_status = ThreadStatus(status)
|
|
except ValueError:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid status: {status}. Valid values: {[s.value for s in ThreadStatus]}",
|
|
)
|
|
|
|
threads, total = state.get_threads(
|
|
status=thread_status,
|
|
agent=agent,
|
|
limit=limit,
|
|
offset=offset,
|
|
)
|
|
return ThreadListResponse(
|
|
threads=threads,
|
|
count=len(threads),
|
|
total=total,
|
|
offset=offset,
|
|
limit=limit,
|
|
)
|
|
|
|
@router.get("/threads/{thread_id}", response_model=ThreadInfo)
|
|
async def get_thread(thread_id: str) -> ThreadInfo:
|
|
"""Get thread details with message history."""
|
|
thread = state.get_thread(thread_id)
|
|
if thread is None:
|
|
raise HTTPException(status_code=404, detail=f"Thread not found: {thread_id}")
|
|
return thread
|
|
|
|
@router.get("/threads/{thread_id}/messages", response_model=MessageListResponse)
|
|
async def get_thread_messages(
|
|
thread_id: str,
|
|
limit: int = Query(50, ge=1, le=100),
|
|
offset: int = Query(0, ge=0),
|
|
) -> MessageListResponse:
|
|
"""Get messages in a specific thread."""
|
|
thread = state.get_thread(thread_id)
|
|
if thread is None:
|
|
raise HTTPException(status_code=404, detail=f"Thread not found: {thread_id}")
|
|
|
|
messages, total = state.get_messages(
|
|
thread_id=thread_id,
|
|
limit=limit,
|
|
offset=offset,
|
|
)
|
|
return MessageListResponse(
|
|
messages=messages,
|
|
count=len(messages),
|
|
total=total,
|
|
offset=offset,
|
|
limit=limit,
|
|
)
|
|
|
|
@router.post("/threads/{thread_id}/kill")
|
|
async def kill_thread(thread_id: str) -> dict:
|
|
"""Terminate a thread."""
|
|
thread = state.get_thread(thread_id)
|
|
if thread is None:
|
|
raise HTTPException(status_code=404, detail=f"Thread not found: {thread_id}")
|
|
|
|
await state.complete_thread(thread_id, status=ThreadStatus.KILLED)
|
|
return {"success": True, "threadId": thread_id}
|
|
|
|
# =========================================================================
|
|
# Message Endpoints
|
|
# =========================================================================
|
|
|
|
@router.get("/messages", response_model=MessageListResponse)
|
|
async def list_messages(
|
|
agent: Optional[str] = Query(None, description="Filter by agent (sender or receiver)"),
|
|
limit: int = Query(50, ge=1, le=100),
|
|
offset: int = Query(0, ge=0),
|
|
) -> MessageListResponse:
|
|
"""Get global message history."""
|
|
messages, total = state.get_messages(
|
|
agent=agent,
|
|
limit=limit,
|
|
offset=offset,
|
|
)
|
|
return MessageListResponse(
|
|
messages=messages,
|
|
count=len(messages),
|
|
total=total,
|
|
offset=offset,
|
|
limit=limit,
|
|
)
|
|
|
|
# =========================================================================
|
|
# Usage/Gas Tracking Endpoints
|
|
# =========================================================================
|
|
|
|
@router.get("/usage", response_model=UsageResponse)
|
|
async def get_usage() -> UsageResponse:
|
|
"""
|
|
Get usage overview (gas gauge).
|
|
|
|
Returns aggregate token usage, costs, and per-agent/model breakdowns.
|
|
This is the main endpoint for monitoring LLM consumption.
|
|
"""
|
|
from xml_pipeline.llm import get_usage_tracker
|
|
from xml_pipeline.message_bus import get_budget_registry
|
|
|
|
tracker = get_usage_tracker()
|
|
budget_registry = get_budget_registry()
|
|
|
|
# Get aggregate totals
|
|
totals_dict = tracker.get_totals()
|
|
totals = UsageTotals(
|
|
total_tokens=totals_dict["total_tokens"],
|
|
prompt_tokens=totals_dict["prompt_tokens"],
|
|
completion_tokens=totals_dict["completion_tokens"],
|
|
request_count=totals_dict["request_count"],
|
|
total_cost=totals_dict["total_cost"],
|
|
avg_latency_ms=totals_dict["avg_latency_ms"],
|
|
)
|
|
|
|
# Get per-agent breakdown
|
|
agent_totals = tracker.get_all_agent_totals()
|
|
by_agent = [
|
|
AgentUsageInfo(
|
|
agent_id=agent_id,
|
|
total_tokens=data["total_tokens"],
|
|
prompt_tokens=data["prompt_tokens"],
|
|
completion_tokens=data["completion_tokens"],
|
|
request_count=data["request_count"],
|
|
total_cost=data["total_cost"],
|
|
)
|
|
for agent_id, data in agent_totals.items()
|
|
]
|
|
|
|
# Get per-model breakdown
|
|
model_totals = tracker.get_all_model_totals()
|
|
by_model = [
|
|
ModelUsageInfo(
|
|
model=model,
|
|
total_tokens=data["total_tokens"],
|
|
prompt_tokens=data["prompt_tokens"],
|
|
completion_tokens=data["completion_tokens"],
|
|
request_count=data["request_count"],
|
|
total_cost=data["total_cost"],
|
|
)
|
|
for model, data in model_totals.items()
|
|
]
|
|
|
|
# Count active threads with budgets
|
|
all_budgets = budget_registry.get_all_usage()
|
|
active_threads = len(all_budgets)
|
|
|
|
overview = UsageOverview(
|
|
totals=totals,
|
|
by_agent=by_agent,
|
|
by_model=by_model,
|
|
active_threads=active_threads,
|
|
)
|
|
|
|
return UsageResponse(usage=overview)
|
|
|
|
@router.get("/usage/threads", response_model=ThreadBudgetListResponse)
|
|
async def get_thread_budgets() -> ThreadBudgetListResponse:
|
|
"""
|
|
Get token budgets for all active threads.
|
|
|
|
Shows remaining budget per thread for monitoring runaway agents.
|
|
"""
|
|
from xml_pipeline.message_bus import get_budget_registry
|
|
|
|
registry = get_budget_registry()
|
|
all_budgets = registry.get_all_usage()
|
|
|
|
threads = []
|
|
for thread_id, budget_dict in all_budgets.items():
|
|
max_tokens = budget_dict["max_tokens"]
|
|
total = budget_dict["total_tokens"]
|
|
percent = (total / max_tokens * 100) if max_tokens > 0 else 0
|
|
|
|
threads.append(
|
|
ThreadBudgetInfo(
|
|
thread_id=thread_id,
|
|
max_tokens=max_tokens,
|
|
prompt_tokens=budget_dict["prompt_tokens"],
|
|
completion_tokens=budget_dict["completion_tokens"],
|
|
total_tokens=total,
|
|
remaining=budget_dict["remaining"],
|
|
percent_used=round(percent, 1),
|
|
is_exhausted=budget_dict["remaining"] <= 0,
|
|
)
|
|
)
|
|
|
|
# Sort by percent used (descending) - hottest threads first
|
|
threads.sort(key=lambda t: t.percent_used, reverse=True)
|
|
|
|
return ThreadBudgetListResponse(
|
|
threads=threads,
|
|
count=len(threads),
|
|
default_max_tokens=registry._max_tokens_per_thread,
|
|
)
|
|
|
|
@router.get("/usage/threads/{thread_id}", response_model=ThreadBudgetInfo)
|
|
async def get_thread_budget(thread_id: str) -> ThreadBudgetInfo:
|
|
"""Get token budget for a specific thread."""
|
|
from xml_pipeline.message_bus import get_budget_registry
|
|
|
|
registry = get_budget_registry()
|
|
budget_dict = registry.get_usage(thread_id)
|
|
|
|
if budget_dict is None:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"No budget found for thread: {thread_id}",
|
|
)
|
|
|
|
max_tokens = budget_dict["max_tokens"]
|
|
total = budget_dict["total_tokens"]
|
|
percent = (total / max_tokens * 100) if max_tokens > 0 else 0
|
|
|
|
return ThreadBudgetInfo(
|
|
thread_id=thread_id,
|
|
max_tokens=max_tokens,
|
|
prompt_tokens=budget_dict["prompt_tokens"],
|
|
completion_tokens=budget_dict["completion_tokens"],
|
|
total_tokens=total,
|
|
remaining=budget_dict["remaining"],
|
|
percent_used=round(percent, 1),
|
|
is_exhausted=budget_dict["remaining"] <= 0,
|
|
)
|
|
|
|
@router.get("/usage/agents/{agent_id}")
|
|
async def get_agent_usage(agent_id: str) -> AgentUsageInfo:
|
|
"""Get usage totals for a specific agent."""
|
|
from xml_pipeline.llm import get_usage_tracker
|
|
|
|
tracker = get_usage_tracker()
|
|
data = tracker.get_agent_totals(agent_id)
|
|
|
|
return AgentUsageInfo(
|
|
agent_id=agent_id,
|
|
total_tokens=data["total_tokens"],
|
|
prompt_tokens=data["prompt_tokens"],
|
|
completion_tokens=data["completion_tokens"],
|
|
request_count=data["request_count"],
|
|
total_cost=data["total_cost"],
|
|
)
|
|
|
|
@router.get("/usage/models/{model}")
|
|
async def get_model_usage(model: str) -> ModelUsageInfo:
|
|
"""Get usage totals for a specific model."""
|
|
from xml_pipeline.llm import get_usage_tracker
|
|
|
|
tracker = get_usage_tracker()
|
|
data = tracker.get_model_totals(model)
|
|
|
|
return ModelUsageInfo(
|
|
model=model,
|
|
total_tokens=data["total_tokens"],
|
|
prompt_tokens=data["prompt_tokens"],
|
|
completion_tokens=data["completion_tokens"],
|
|
request_count=data["request_count"],
|
|
total_cost=data["total_cost"],
|
|
)
|
|
|
|
@router.post("/usage/reset")
|
|
async def reset_usage() -> dict:
|
|
"""
|
|
Reset all usage tracking (for testing/development).
|
|
|
|
WARNING: This clears all usage history. Use with caution.
|
|
"""
|
|
from xml_pipeline.llm import reset_usage_tracker
|
|
|
|
reset_usage_tracker()
|
|
return {"success": True, "message": "Usage tracking reset"}
|
|
|
|
# =========================================================================
|
|
# Control Endpoints
|
|
# =========================================================================
|
|
|
|
@router.post("/inject", response_model=InjectResponse)
|
|
async def inject_message(request: InjectRequest) -> InjectResponse:
|
|
"""Inject a message to an agent."""
|
|
# Validate target exists
|
|
agent = state.get_agent(request.to)
|
|
if agent is None:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Unknown target agent: {request.to}",
|
|
)
|
|
|
|
# Generate or use provided thread ID
|
|
thread_id = request.thread_id or str(uuid.uuid4())
|
|
|
|
# Build payload XML from dict
|
|
# For now, we construct a simple wrapper
|
|
payload_type = next(iter(request.payload.keys()), "Payload")
|
|
|
|
# Record the message
|
|
msg_id = await state.record_message(
|
|
thread_id=thread_id,
|
|
from_id="api",
|
|
to_id=request.to,
|
|
payload_type=payload_type,
|
|
payload=request.payload,
|
|
)
|
|
|
|
# TODO: Actually inject into pump queue
|
|
# This requires building an envelope and calling pump.inject()
|
|
|
|
return InjectResponse(thread_id=thread_id, message_id=msg_id)
|
|
|
|
@router.post("/agents/{name}/pause")
|
|
async def pause_agent(name: str) -> dict:
|
|
"""Pause an agent (stop processing new messages)."""
|
|
agent = state.get_agent(name)
|
|
if agent is None:
|
|
raise HTTPException(status_code=404, detail=f"Agent not found: {name}")
|
|
|
|
from xml_pipeline.server.models import AgentState
|
|
|
|
await state.update_agent_state(name, AgentState.PAUSED)
|
|
return {"success": True, "agent": name, "state": "paused"}
|
|
|
|
@router.post("/agents/{name}/resume")
|
|
async def resume_agent(name: str) -> dict:
|
|
"""Resume a paused agent."""
|
|
agent = state.get_agent(name)
|
|
if agent is None:
|
|
raise HTTPException(status_code=404, detail=f"Agent not found: {name}")
|
|
|
|
from xml_pipeline.server.models import AgentState
|
|
|
|
await state.update_agent_state(name, AgentState.IDLE)
|
|
return {"success": True, "agent": name, "state": "idle"}
|
|
|
|
@router.post("/organism/reload")
|
|
async def reload_config() -> dict:
|
|
"""
|
|
Hot-reload organism configuration.
|
|
|
|
Re-reads organism.yaml and updates listeners:
|
|
- New listeners are registered
|
|
- Removed listeners are unregistered
|
|
- Changed listeners are updated
|
|
"""
|
|
result = await state.reload_config()
|
|
if not result["success"]:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=result.get("error", "Reload failed"),
|
|
)
|
|
return result
|
|
|
|
@router.post("/organism/stop")
|
|
async def stop_organism() -> dict:
|
|
"""Graceful shutdown."""
|
|
state.set_stopping()
|
|
# TODO: Signal pump to stop
|
|
return {"success": True, "status": "stopping"}
|
|
|
|
return router
|