From e314bb01e8ca386deff4affacba9015972d7ed42 Mon Sep 17 00:00:00 2001 From: dullfig Date: Wed, 7 Jan 2026 20:58:31 -0800 Subject: [PATCH] fixing docs --- README.md | 48 +++++++++ agentserver/message_bus/message_state.py | 12 +++ agentserver/message_bus/steps/c14n.py | 53 ++++++++++ agentserver/message_bus/steps/repair.py | 42 ++++++++ docs/core-principles-v2.1.md | 127 ++++++++++++++++++++++- structure.md | 28 ++--- 6 files changed, 296 insertions(+), 14 deletions(-) create mode 100644 agentserver/message_bus/steps/c14n.py create mode 100644 agentserver/message_bus/steps/repair.py diff --git a/README.md b/README.md index b5747ba..0f20819 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,54 @@ Listener( The organism now speaks `` — fully validated, typed, and discoverable.
Unlike rigid platforms requiring custom mappings or fragile item structures, this is pure Python — typed, testable, and sovereign. +## Security Model + +AgentServer's security is **architectural**, not bolted-on: + +### Two Completely Isolated Channels +- **Main Bus**: Standard `` envelope, all traffic undergoes identical validation pipeline regardless of source +- **OOB Channel**: Privileged commands only, different schema, localhost-bound, used for structural changes + +### Handler Isolation & Trust Boundary +**Handlers are untrusted code.** Even compromised handlers cannot: +- Forge their identity (sender name captured in coroutine scope before execution) +- Escape thread context (thread UUID captured in coroutine, not handler output) +- Route to arbitrary targets (routing computed from peers list, not handler claims) +- Access other threads' data (opaque UUIDs, private path registry) +- Discover topology (only declared peers visible) + +The message pump maintains authoritative metadata in coroutine scope and **never trusts handler output** for security-critical properties. + +### Closed-Loop Validation +ALL messages on the main bus undergo identical security processing: +- External ingress: WSS → pipeline → validation +- Handler outputs: bytes → pipeline → validation (same steps!) +- Error messages: generated → pipeline → validation +- System notifications: generated → pipeline → validation + +No fast-path bypasses. No "trusted internal" messages. Everything validates. + +### Topology Privacy +- Agents see only opaque thread UUIDs, never hierarchical paths +- Private path registry (UUID → `agent.tool.subtool`) maintained by system +- Peers list enforces capability boundaries (no ambient authority) +- Federation gateways are opaque abstractions + +### Anti-Paperclip Architecture +- Threads are ephemeral (complete audit trail, then deleted) +- No persistent cross-thread memory primitives +- Token budgets enforce computational bounds +- Thread pruning prevents state accumulation +- All reasoning visible in message history + +This architecture ensures:
+✅ No privilege escalation (handlers can't forge privileged commands)
+✅ No fast-path bypasses (even system-generated messages validate)
+✅ Physical separation (privileged and regular traffic cannot mix)
+✅ Capability-safe handlers (compromised code still bounded by peers list)
+✅ Complete auditability (thread history is ground truth) + + ## Key Features ### 1. The Autonomous Schema Layer - Dataclass → cached XSD + example + rich tool prompt (mandatory description + field docs). diff --git a/agentserver/message_bus/message_state.py b/agentserver/message_bus/message_state.py index 15a96bb..bc50d5f 100644 --- a/agentserver/message_bus/message_state.py +++ b/agentserver/message_bus/message_state.py @@ -2,6 +2,18 @@ from dataclasses import dataclass, field from lxml.etree import Element from typing import Any +""" +default_listener_steps = [ + repair_step, # raw bytes → repaired bytes + c14n_step, # bytes → lxml Element + envelope_validation_step, # Element → validated Element + payload_extraction_step, # Element → payload Element + xsd_validation_step, # payload Element + cached XSD → validated + deserialization_step, # payload Element → dataclass instance + routing_resolution_step, # attaches target_listeners (or error) +] +""" + @dataclass class HandlerMetadata: """Trustworthy context passed to every handler.""" diff --git a/agentserver/message_bus/steps/c14n.py b/agentserver/message_bus/steps/c14n.py new file mode 100644 index 0000000..66e3333 --- /dev/null +++ b/agentserver/message_bus/steps/c14n.py @@ -0,0 +1,53 @@ +""" +c14n.py — Canonicalization step for the full envelope. + +After repair, the envelope_tree may have different but semantically equivalent +representations (attribute order, namespace prefixes, whitespace, etc.). + +This step produces Exclusive XML Canonicalization (C14N 1.1) bytes that are +identical for equivalent documents — essential for validation and signing. + +Part of AgentServer v2.1 message pump. +""" + +from lxml import etree +from agentserver.message_bus.message_state import MessageState + + +async def c14n_step(state: MessageState) -> MessageState: + """ + Canonicalize the envelope_tree to Exclusive C14N form. + + If repair_step succeeded, this step normalizes the tree so that: + - Validation against envelope.xsd is deterministic + - Future signing/federation comparisons are reliable + + On failure, sets state.error and continues (downstream steps will short-circuit). + """ + if state.envelope_tree is None: + state.error = "c14n_step: no envelope_tree (previous repair failed)" + return state + + try: + # lxml's tostring with method="c14n" implements Exclusive XML Canonicalization + # (the same form we require on egress) + c14n_bytes = etree.tostring( + state.envelope_tree, + method="c14n", # Exclusive C14N 1.0 (lxml default) + exclusive=True, + with_comments=False, # Comments not part of canonical form + strip_text=False, + ) + + # Re-parse the canonical bytes to get a clean tree (prefixes normalized, etc.) + # This ensures downstream steps see a consistent document + clean_tree = etree.fromstring(c14n_bytes) + + state.envelope_tree = clean_tree + # raw_bytes already cleared by repair_step + + except Exception as exc: # pylint: disable=broad-except + state.error = f"c14n_step failed: {exc}" + state.envelope_tree = None + + return state \ No newline at end of file diff --git a/agentserver/message_bus/steps/repair.py b/agentserver/message_bus/steps/repair.py new file mode 100644 index 0000000..5a598d1 --- /dev/null +++ b/agentserver/message_bus/steps/repair.py @@ -0,0 +1,42 @@ +from lxml import etree +from agentserver.message_bus.message_state import MessageState + +# lxml parser configured for maximum tolerance + recovery +_RECOVERY_PARSER = etree.XMLParser( + recover=True, # Try to recover from malformed XML + remove_blank_text=True, # Normalize whitespace + resolve_entities=False, # Security: don't resolve external entities + huge_tree=False, # Default is safe +) + +async def repair_step(state: MessageState) -> MessageState: + """ + First pipeline step: repair malformed ingress bytes into a recoverable lxml ElementTree. + + Takes raw_bytes from ingress (or multi-payload extraction) and attempts to produce + a valid envelope_tree. Uses lxml's recovery mode to tolerate dirty streams. + + Always returns a MessageState (even on total failure — injects diagnostic error). + """ + if state.raw_bytes is None: + state.error = "repair_step: no raw_bytes available" + return state + + try: + # lxml recovery parser turns most garbage into something parseable + tree = etree.fromstring(state.raw_bytes, parser=_RECOVERY_PARSER) + + if tree is None: + raise ValueError("Parser returned None — unrecoverable XML") + + state.envelope_tree = tree + # Optional: free memory early — raw bytes no longer needed after repair + state.raw_bytes = None + + except Exception as exc: + # Even if recovery fails completely, we capture the diagnostic + state.error = f"repair_step failed: {exc}" + # We still set envelope_tree to None so later steps know to short-circuit + state.envelope_tree = None + + return state \ No newline at end of file diff --git a/docs/core-principles-v2.1.md b/docs/core-principles-v2.1.md index fb2fd84..a7492a8 100644 --- a/docs/core-principles-v2.1.md +++ b/docs/core-principles-v2.1.md @@ -72,6 +72,40 @@ These principles are the single canonical source of truth for the project. All d - Opaque thread UUIDs + private path registry prevent topology disclosure. - “No Paperclippers” manifesto injected as first system message for every LLM-based listener. +### Privileged Operations +- Privileged messages (per `privileged-msg.xsd`) handled exclusively on dedicated OOB channel. +- OOB channel bound to localhost by default (safe for local GUI); separate port/socket from main bus. +- Main message pump and dispatcher oblivious to privileged operations – no routing or handling for privileged roots. +- Remote privileged attempts impossible (channel not exposed); any leak to main port logged as security event and dropped. + +### Identity & Cryptography +- Ed25519 identity key used for envelope signing, federation auth, and privileged command verification. +- All traffic on main bus uses mandatory WSS (TLS) + TOTP authentication. + +### Handler Isolation (NEW) +- **Handlers are untrusted code** running in coroutine sandboxes with minimal context. +- Security-critical metadata (sender identity, thread path, routing) captured in coroutine scope before handler execution. +- Handler output never trusted for identity, routing, or thread context – all envelope metadata injected from coroutine-captured state. +- Even compromised handlers cannot forge messages, escape threads, or discover topology beyond declared peers. + +### Topology Privacy +- Opaque thread UUIDs prevent topology disclosure to handlers and agents. +- Private path registry maps UUIDs to hierarchical paths (e.g., `agent.tool.subtool`) for routing and audit. +- Agents receive only opaque UUIDs; system maintains authoritative path mapping. +- Peers list enforces capability boundaries: agents can only call declared tools. + +### Anti-Paperclip Guarantees +- No persistent cross-thread memory (threads are ephemeral audit trails). +- Token budgets per thread enforce computational bounds. +- Thread pruning on delegation return prevents state accumulation. +- All agent reasoning visible in message history (no hidden state machines). +- "No Paperclippers" manifesto injected as first system message for every LLM-based listener. + +### Audit & Forensics +- Complete message history per thread provides full audit trail. +- Privileged introspection (via OOB) can map UUID→path for forensics without exposing to agents. +- All structural changes (hot-reload, listener registration) logged as audit events on main bus. + ## Federation - Gateways declared in YAML with trusted remote public key. - Remote tools referenced by gateway name in agent tool lists. @@ -105,8 +139,97 @@ These principles are the single canonical source of truth for the project. All d These principles are now locked for v2.1. The Message Pump v2.1 specification remains the canonical detail for pump behavior. Future changes require explicit discussion and amendment here first. +## Handler Trust Boundary & Coroutine Isolation + +Handlers are treated as **untrusted code** that runs in an isolated coroutine context. +The message pump maintains authoritative metadata in coroutine scope and never trusts +handler output to preserve security-critical properties. + +### Coroutine Capture Pattern + +When dispatching a message to a handler, the pump captures metadata in coroutine scope +BEFORE handler execution: +```python +async def dispatch(msg: ParsedMessage): + # TRUSTED: Captured before handler runs + thread_uuid = msg.thread_id + sender_name = msg.listener_name + thread_path = path_registry[thread_uuid] + parent = get_parent_from_path(thread_path) + allowed_peers = registry.get_listener(sender_name).peers + + # UNTRUSTED: Handler executes with minimal context + response_bytes = await handler( + payload=msg.deserialized_payload, + meta=HandlerMetadata(thread_id=thread_uuid) # Opaque UUID only + ) + + # TRUSTED: Coroutine scope still has authoritative metadata + # Process response using captured context, not handler claims + await process_response( + response_bytes, + actual_sender=sender_name, # From coroutine, not handler + actual_thread=thread_uuid, # From coroutine, not handler + actual_parent=parent, # From coroutine, not handler + allowed_peers=allowed_peers # From registration, not handler + ) +``` + +### What Handlers Cannot Do + +Even compromised or malicious handlers cannot: + +- **Forge identity**: `` is always injected from coroutine-captured sender name +- **Escape thread context**: `` is always from coroutine-captured UUID +- **Route arbitrarily**: `` is computed from coroutine-captured peers list and thread path +- **Access other threads**: UUIDs are opaque; path registry is private +- **Discover topology**: Only peers list is visible; no access to path structure +- **Spoof system messages**: `core` only injectable by system, never handlers + +### What Handlers Can Do + +Handlers can only: + +- **Call declared peers**: Emit XML matching peer schemas (validated against peers list) +- **Self-iterate**: Emit `` (routes back to sender automatically) +- **Return to caller**: Emit any other payload (routes to parent in thread path) +- **Access thread-scoped storage**: Via opaque UUID (isolated per delegation chain) + +### Response Processing Security + +Handler output (raw bytes) undergoes full security processing: + +1. **Wrap in dummy tags** and parse with repair mode +2. **Extract payloads** via C14N and XSD validation +3. **Determine routing** using coroutine-captured metadata (never handler claims) +4. **Inject envelope** with trusted ``, ``, `` from coroutine scope +5. **Re-inject to pipeline** for identical security processing + +Any envelope metadata in handler output is **ignored and overwritten**. + +### Trust Architecture +``` +┌─────────────────────────────────────────────────────┐ +│ TRUSTED ZONE (System) │ +│ • Path registry (UUID → hierarchical path) │ +│ • Listener registry (name → peers, schema) │ +│ • Thread management (pruning, parent lookup) │ +│ • Envelope injection (, , ) │ +└─────────────────────────────────────────────────────┘ + ↕ + Coroutine Capture Boundary + ↕ +┌─────────────────────────────────────────────────────┐ +│ UNTRUSTED ZONE (Handler) │ +│ • Receives: typed payload + opaque UUID │ +│ • Returns: raw bytes │ +│ • Cannot: forge identity, escape thread, probe │ +│ • Can: call peers, self-iterate, return to caller │ +└─────────────────────────────────────────────────────┘ +``` + +This design ensures handlers are **capability-safe by construction**: even fully +compromised handler code cannot violate security boundaries or topology privacy. --- This integrates the blind self-iteration pattern cleanly—no contradictions, stronger obliviousness, and explicit guidance on ``. The unique-root enforcement for agents is called out in Configuration and Schema layers. - -Ready to roll with this as canonical. If you want any final phrasing tweaks or to add YAML examples, just say. 🚀 \ No newline at end of file diff --git a/structure.md b/structure.md index ed9f5b2..a52b739 100644 --- a/structure.md +++ b/structure.md @@ -17,19 +17,21 @@ xml-pipeline/ │ │ ├── llm_connection.py │ │ └── llm_listener.py │ ├── message_bus/ +│ │ ├── steps/ +│ │ │ ├── __init__.py +│ │ │ └── repair_step.py │ │ ├── __init__.py │ │ ├── bus.py │ │ ├── config.py │ │ ├── envelope.py │ │ ├── errors.py +│ │ ├── message_state.py │ │ ├── scheduler.py │ │ └── thread.py │ ├── prompts/ │ │ ├── grok_classic.py │ │ └── no_paperclippers.py │ ├── schema/ -│ │ ├── payloads/ -│ │ │ └── grok-response.xsd │ │ ├── envelope.xsd │ │ └── privileged-msg.xsd │ ├── utils/ @@ -40,22 +42,24 @@ xml-pipeline/ │ ├── main.py │ └── xml_listener.py ├── docs/ -│ ├── agent-server.md -│ ├── local-privilege-only.md -│ ├── logic-and-iteration.md -│ ├── prompt-no-paperclippers.md -│ └── self-grammar-generation.md -├── scripts/ -│ └── generate_organism_key.py +│ ├── archive-obsolete/ +│ │ ├── logic-and-iteration.md +│ │ ├── thread-management.md +│ │ └── token-scheduling-issues.md +│ ├── configuration.md +│ ├── core-principles-v2.1.md +│ ├── listener-class-v2.1.md +│ ├── message-pump-v2.1.md +│ ├── self-grammar-generation.md +│ └── why-not-json.md ├── tests/ +│ ├── scripts/ +│ │ └── generate_organism_key.py │ └── __init__.py ├── LICENSE ├── README.md -├── README.v0.md -├── README.v1.md ├── __init__.py ├── pyproject.toml ├── setup-project.ps1 └── structure.md - ``` \ No newline at end of file