xml-pipeline/agentserver/platform/llm_api.py
dullfig ad081d412c Fix platform.complete() to use correct LLM router API
- Import `complete` not `generate` from agentserver.llm
- Pass `model` parameter required by router
- Return response.content (string) not response object

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 14:05:53 -08:00

175 lines
5.1 KiB
Python

"""
llm_api.py — Platform-controlled LLM interface.
The platform controls all LLM calls. Agents request completions via this API.
The platform assembles the full prompt (system + history + user message)
and enforces rate limits, caching, and cost controls.
Design principles:
- Agent-invisible prompts: agents never see their system prompt
- Thread-scoped history: only messages from the current thread
- Auditable: all calls can be logged/traced
- Rate-limited: platform controls costs
Usage (from handler):
from agentserver.platform import complete
async def handle_greeting(payload, metadata):
response = await complete(
agent_name=metadata.own_name,
thread_id=metadata.thread_id,
user_message=f"Greet {payload.name}",
temperature=0.9,
)
return HandlerResponse(
payload=GreetingResponse(message=response),
to="shouter",
)
"""
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional
from agentserver.platform.prompt_registry import get_prompt_registry
from agentserver.memory import get_context_buffer
logger = logging.getLogger(__name__)
async def complete(
agent_name: str,
thread_id: str,
user_message: str,
*,
temperature: float = 0.7,
max_tokens: int = 1024,
include_history: bool = True,
**kwargs: Any,
) -> str:
"""
Request an LLM completion for an agent.
The platform assembles the full prompt:
1. System prompt from PromptRegistry (invisible to agent)
2. Peer schemas (what messages agent can send)
3. Thread history from ContextBuffer
4. User's message
Args:
agent_name: The calling agent's name (for prompt lookup)
thread_id: Current thread UUID (for history lookup)
user_message: The user/task message to complete
temperature: LLM temperature (0.0-1.0)
max_tokens: Maximum tokens in response
include_history: Whether to include thread history
**kwargs: Additional LLM parameters
Returns:
The LLM's text response
Raises:
KeyError: If agent has no registered prompt
RuntimeError: If LLM call fails
"""
# Get agent's prompt (agent cannot see this)
prompt_registry = get_prompt_registry()
prompt = prompt_registry.get_required(agent_name)
# Build messages array
messages: List[Dict[str, str]] = []
# System prompt (from registry)
if prompt.system_prompt:
messages.append({
"role": "system",
"content": prompt.system_prompt,
})
# Peer schemas (what messages agent can send)
if prompt.peer_schemas:
messages.append({
"role": "system",
"content": prompt.peer_schemas,
})
# Thread history (agent can read, not modify)
if include_history and thread_id:
context_buffer = get_context_buffer()
history = context_buffer.get_thread(thread_id)
for slot in history:
# Determine role: assistant if from this agent, user otherwise
role = "assistant" if slot.from_id == agent_name else "user"
# Serialize payload for LLM context
content = _serialize_for_llm(slot.payload, slot.from_id)
messages.append({
"role": role,
"content": content,
})
# Current user message
messages.append({
"role": "user",
"content": user_message,
})
# Make LLM call via router
try:
from agentserver.llm import complete as llm_complete
# Use model from kwargs or default
model = kwargs.pop("model", "grok-3-mini-beta")
response = await llm_complete(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
**kwargs,
)
logger.debug(
f"platform.complete: agent={agent_name} thread={thread_id[:8]}... "
f"messages={len(messages)} response_len={len(response.content)}"
)
return response.content
except Exception as e:
logger.error(f"LLM call failed for {agent_name}: {e}")
raise RuntimeError(f"LLM completion failed: {e}") from e
def _serialize_for_llm(payload: Any, from_id: str) -> str:
"""
Serialize a payload for LLM context.
Converts structured payloads to a readable format for the LLM.
"""
# Try XML serialization first (for xmlify classes)
if hasattr(payload, 'xml_value'):
from lxml import etree
try:
class_name = type(payload).__name__
tree = payload.xml_value(class_name)
xml_str = etree.tostring(tree, encoding='unicode', pretty_print=True)
return f"[From {from_id}]\n{xml_str}"
except Exception:
pass
# Try to_xml for custom classes
if hasattr(payload, 'to_xml'):
try:
return f"[From {from_id}]\n{payload.to_xml()}"
except Exception:
pass
# Fallback to repr
return f"[From {from_id}] {repr(payload)}"
# Alias for cleaner imports
platform_complete = complete