fixing docs

2026-01-08 12:30:58 -08:00 · 2026-01-08 12:30:58 -08:00 · ab207d8f0b
commit ab207d8f0b
parent e314bb01e8
9 changed files with 415 additions and 18 deletions
--- a/agentserver/message_bus/steps/envelope_validation.py
+++ b/agentserver/message_bus/steps/envelope_validation.py
@ -0,0 +1,57 @@
+"""
+envelope_validation.py — Validates the canonicalized <message> envelope against envelope.xsd.
+
+After repair_step and c14n_step, we have a normalized envelope_tree.
+This step enforces the outer <message> structure (thread, from, optional to, etc.)
+using the strict envelope.xsd schema.
+
+Failure here is serious — invalid envelope means the message is malformed at the protocol level,
+so we set a clear error and let downstream steps handle it (typically route to system pipeline
+for diagnostic <huh>).
+
+Part of AgentServer v2.1 message pump.
+"""
+
+from lxml import etree
+from agentserver.message_bus.message_state import MessageState
+
+# Load envelope.xsd once at module import (startup time)
+# In real implementation, move this to a config loader or bus init
+_ENVELOPE_XSD = etree.XMLSchema(file="agentserver/schema/envelope.xsd")
+
+
+async def envelope_validation_step(state: MessageState) -> MessageState:
+    """
+    Validate the canonicalized envelope_tree against the fixed envelope.xsd schema.
+
+    Requirements:
+    - Must be a valid <message> with required <thread> and <from>
+    - Optional <to>, etc.
+    - Namespace must match https://xml-pipeline.org/ns/envelope/v1
+
+    On failure: sets state.error with schema validation details.
+    Downstream steps should short-circuit if error is set.
+    """
+    if state.envelope_tree is None:
+        state.error = "envelope_validation_step: no envelope_tree (previous step failed)"
+        return state
+
+    try:
+        # lxml schema validation — raises XMLSchemaError on failure
+        _ENVELOPE_XSD.assertValid(state.envelope_tree)
+
+        # Optional extra checks (can be removed later if redundant)
+        if state.envelope_tree.tag != "{https://xml-pipeline.org/ns/envelope/v1}message":
+            raise ValueError("Root element is not <message> in expected namespace")
+
+    except etree.DocumentInvalid as exc:
+        # Schema violation — collect all error messages for diagnostics
+        error_lines = []
+        for error in _ENVELOPE_XSD.error_log:
+            error_lines.append(f"{error.level_name}: {error.message} (line {error.line})")
+        state.error = "envelope_validation_step: invalid envelope\n" + "\n".join(error_lines)
+
+    except Exception as exc:  # pylint: disable=broad-except
+        state.error = f"envelope_validation_step failed: {exc}"
+
+    return state
--- a/agentserver/message_bus/steps/payload_extraction.py
+++ b/agentserver/message_bus/steps/payload_extraction.py
@ -0,0 +1,91 @@
+"""
+payload_extraction.py — Extract the inner payload from the validated <message> envelope.
+
+After envelope_validation_step confirms a correct outer <message> envelope,
+this step removes the envelope elements (<thread>, <from>, optional <to>, etc.)
+and isolates the single child element that is the actual payload.
+
+The payload is expected to be exactly one root element (the capability-specific XML).
+If zero or multiple payload roots are found, we set a clear error — this protects
+against malformed or ambiguous messages.
+
+Part of AgentServer v2.1 message pump.
+"""
+
+from lxml import etree
+from agentserver.message_bus.message_state import MessageState
+
+# Envelope namespace for easy reference
+_ENVELOPE_NS = "https://xml-pipeline.org/ns/envelope/v1"
+_MESSAGE_TAG = f"{{{ _ENVELOPE_NS }}}message"
+
+
+async def payload_extraction_step(state: MessageState) -> MessageState:
+    """
+    Extract the single payload element from the validated envelope.
+
+    Expected structure:
+      <message xmlns="https://xml-pipeline.org/ns/envelope/v1">
+        <thread>uuid</thread>
+        <from>sender</from>
+        <!-- optional <to>receiver</to> -->
+        <payload_root>   ← this is the one we want
+          ...
+        </payload_root>
+      </message>
+
+    On success: state.payload_tree is set to the payload Element.
+    On failure: state.error is set with a clear diagnostic.
+    """
+    if state.envelope_tree is None:
+        state.error = "payload_extraction_step: no envelope_tree (previous step failed)"
+        return state
+
+    # Basic sanity — root must be <message> in correct namespace (already checked by schema,
+    # but we double-check for defence in depth)
+    if state.envelope_tree.tag != _MESSAGE_TAG:
+        state.error = f"payload_extraction_step: root tag is not <message> in envelope namespace"
+        return state
+
+    # Find all direct children that are not envelope control elements
+    # Envelope control elements are: thread, from, to (optional)
+    payload_candidates = [
+        child
+        for child in state.envelope_tree
+        if not (
+            child.tag in {
+                f"{{{ _ENVELOPE_NS }}}thread",
+                f"{{{ _ENVELOPE_NS }}}from",
+                f"{{{ _ENVELOPE_NS }}}to",
+            }
+        )
+    ]
+
+    if len(payload_candidates) == 0:
+        state.error = "payload_extraction_step: no payload element found inside <message>"
+        return state
+
+    if len(payload_candidates) > 1:
+        state.error = (
+            "payload_extraction_step: multiple payload roots found — "
+            "exactly one capability payload element is allowed"
+        )
+        return state
+
+    # Success — exactly one payload element
+    payload_element = payload_candidates[0]
+
+    # Optional: capture provenance from envelope for later use
+    # (these will be trustworthy because envelope was validated)
+    thread_elem = state.envelope_tree.find(f"{{{ _ENVELOPE_NS }}}thread")
+    from_elem = state.envelope_tree.find(f"{{{ _ENVELOPE_NS }}}from")
+
+    if thread_elem is not None and thread_elem.text:
+        state.thread_id = thread_elem.text.strip()
+
+    if from_elem is not None and from_elem.text:
+        state.from_id = from_elem.text.strip()
+
+    state.payload_tree = payload_element
+
+    return state
--- a/agentserver/message_bus/steps/test_c14n.py
+++ b/agentserver/message_bus/steps/test_c14n.py
--- a/agentserver/message_bus/steps/test_repair.py
+++ b/agentserver/message_bus/steps/test_repair.py
--- a/agentserver/message_bus/steps/thread_assignment.py
+++ b/agentserver/message_bus/steps/thread_assignment.py
@ -0,0 +1,57 @@
+"""
+thread_assignment.py — Ensure every message has a valid opaque thread UUID.
+
+The envelope.xsd requires <thread>, but external clients may:
+  - Omit it (first message)
+  - Send invalid format
+  - Send duplicate/malformed UUID
+
+This step enforces:
+  - Presence of a valid UUID v4 string in <thread>
+  - If missing or invalid → generate a new one (new root thread)
+  - Store it in state.thread_id for all downstream use
+
+This guarantees thread continuity and privacy (external parties never see internal hierarchy).
+
+Part of AgentServer v2.1 message pump.
+"""
+
+import uuid
+from agentserver.message_bus.message_state import MessageState
+
+
+def _is_valid_uuid(val: str) -> bool:
+    """Simple UUID v4 validation — accepts standard string formats."""
+    try:
+        uuid_obj = uuid.UUID(val, version=4)
+        return str(uuid_obj) == val  # Ensures canonical lowercase format
+    except ValueError:
+        return False
+
+
+async def thread_assignment_step(state: MessageState) -> MessageState:
+    """
+    Assign or validate the thread UUID.
+
+    - If state.thread_id is already set and valid → keep it
+    - Else → generate new UUID v4
+    - Always normalizes to lowercase canonical string
+
+    This is the source of truth for thread identity throughout the organism.
+    """
+    if state.thread_id and _is_valid_uuid(state.thread_id):
+        # Already valid — nothing to do
+        return state
+
+    # Invalid, missing, or malformed — generate new root thread
+    new_thread_id = str(uuid.uuid4())
+
+    # Optional: log warning if external client sent bad thread
+    if state.thread_id:
+        state.metadata.setdefault("diagnostics", []).append(
+            f"Invalid external thread ID '{state.thread_id}' — replaced with new root thread"
+        )
+
+    state.thread_id = new_thread_id
+
+    return state
--- a/agentserver/message_bus/steps/xsd_validation.py
+++ b/agentserver/message_bus/steps/xsd_validation.py
@ -0,0 +1,91 @@
+"""
+payload_extraction.py — Extract the inner payload from the validated <message> envelope.
+
+After envelope_validation_step confirms a correct outer <message> envelope,
+this step removes the envelope elements (<thread>, <from>, optional <to>, etc.)
+and isolates the single child element that is the actual payload.
+
+The payload is expected to be exactly one root element (the capability-specific XML).
+If zero or multiple payload roots are found, we set a clear error — this protects
+against malformed or ambiguous messages.
+
+Part of AgentServer v2.1 message pump.
+"""
+
+from lxml import etree
+from agentserver.message_bus.message_state import MessageState
+
+# Envelope namespace for easy reference
+_ENVELOPE_NS = "https://xml-pipeline.org/ns/envelope/v1"
+_MESSAGE_TAG = f"{{{ _ENVELOPE_NS }}}message"
+
+
+async def payload_extraction_step(state: MessageState) -> MessageState:
+    """
+    Extract the single payload element from the validated envelope.
+
+    Expected structure:
+      <message xmlns="https://xml-pipeline.org/ns/envelope/v1">
+        <thread>uuid</thread>
+        <from>sender</from>
+        <!-- optional <to>receiver</to> -->
+        <payload_root>   ← this is the one we want
+          ...
+        </payload_root>
+      </message>
+
+    On success: state.payload_tree is set to the payload Element.
+    On failure: state.error is set with a clear diagnostic.
+    """
+    if state.envelope_tree is None:
+        state.error = "payload_extraction_step: no envelope_tree (previous step failed)"
+        return state
+
+    # Basic sanity — root must be <message> in correct namespace (already checked by schema,
+    # but we double-check for defence in depth)
+    if state.envelope_tree.tag != _MESSAGE_TAG:
+        state.error = f"payload_extraction_step: root tag is not <message> in envelope namespace"
+        return state
+
+    # Find all direct children that are not envelope control elements
+    # Envelope control elements are: thread, from, to (optional)
+    payload_candidates = [
+        child
+        for child in state.envelope_tree
+        if not (
+            child.tag in {
+                f"{{{ _ENVELOPE_NS }}}thread",
+                f"{{{ _ENVELOPE_NS }}}from",
+                f"{{{ _ENVELOPE_NS }}}to",
+            }
+        )
+    ]
+
+    if len(payload_candidates) == 0:
+        state.error = "payload_extraction_step: no payload element found inside <message>"
+        return state
+
+    if len(payload_candidates) > 1:
+        state.error = (
+            "payload_extraction_step: multiple payload roots found — "
+            "exactly one capability payload element is allowed"
+        )
+        return state
+
+    # Success — exactly one payload element
+    payload_element = payload_candidates[0]
+
+    # Optional: capture provenance from envelope for later use
+    # (these will be trustworthy because envelope was validated)
+    thread_elem = state.envelope_tree.find(f"{{{ _ENVELOPE_NS }}}thread")
+    from_elem = state.envelope_tree.find(f"{{{ _ENVELOPE_NS }}}from")
+
+    if thread_elem is not None and thread_elem.text:
+        state.thread_id = thread_elem.text.strip()
+
+    if from_elem is not None and from_elem.text:
+        state.from_id = from_elem.text.strip()
+
+    state.payload_tree = payload_element
+
+    return state
--- a/docs/handler-contract-v2.1.md
+++ b/docs/handler-contract-v2.1.md
@ -0,0 +1,77 @@
+# AgentServer v2.1 — Handler Contract
+**January 08, 2026**
+
+This document is the single canonical specification for all capability handlers in AgentServer v2.1.  
+All examples, documentation, and implementation must conform to this contract.
+
+## Handler Signature (Locked)
+
+Every handler **must** be declared with the following exact signature:
+
+```python
+async def handler(
+    payload: PayloadDataclass,      # XSD-validated, deserialized @xmlify dataclass instance
+    metadata: HandlerMetadata       # Minimal trustworthy context provided by the message pump
+) -> bytes:
+    ...
+```
+
+- Handlers **must** be asynchronous (`async def`).
+- Synchronous functions are not permitted and will not be auto-wrapped.
+- The `metadata` parameter is mandatory.
+- The return value **must** be a `bytes` object containing one or more raw XML payload fragments.
+- Returning `None` or any non-`bytes` value is a programming error and will trigger a protective `<huh>` emission.
+
+## HandlerMetadata
+
+```python
+@dataclass(frozen=True)
+class HandlerMetadata:
+    thread_id: str                  # Opaque thread UUID — safe for thread-scoped storage
+    own_name: str | None = None     # Registered name of the executing listener.
+                                    # Populated ONLY for listeners with `agent: true` in organism.yaml
+```
+
+### Field Rationale
+- `thread_id`: Enables isolated per-thread state (e.g., conversation memory, calculator history) without exposing topology.
+- `own_name`: Allows LLM agents to produce self-referential reasoning text while remaining blind to routing mechanics.
+
+No sender identity (`from_id`) is provided — preserving full topology privacy.
+
+## Security Model
+
+The message pump captures all security-critical information (sender name, thread hierarchy, peers list enforcement) in trusted coroutine scope **before** invoking the handler.
+
+Handlers are treated as **untrusted code**. They receive only the minimal safe context defined above and cannot:
+- Forge provenance
+- Escape thread boundaries
+- Probe or leak topology
+- Route arbitrarily
+
+## Example Handlers
+
+**Pure tool (no agent flag):**
+```python
+async def add_handler(payload: AddPayload, metadata: HandlerMetadata) -> bytes:
+    result = payload.a + payload.b
+    return f"<result>{result}</result>".encode("utf-8")
+```
+
+**LLM agent (agent: true):**
+```python
+async def research_handler(payload: ResearchPayload, metadata: HandlerMetadata) -> bytes:
+    own = metadata.own_name or "researcher"  # safe fallback
+    return b"""
+    <thought>I am the """ + own.encode() + b""" agent. Next step...</thought>
+    <calculator.add.addpayload><a>7</a><b>35</b></calculator.add.addpayload>
+    """
+```
+
+## References in Other Documentation
+
+- All code examples in README.md, self-grammar-generation.md, and configuration.md must match this contract.
+- listener-class-v2.1.md now references this file as the authoritative source for signature and metadata.
+
+---
+
+This contract is now **locked** for v2.1
--- a/docs/listener-class-v2.1.md
+++ b/docs/listener-class-v2.1.md
@ -91,24 +91,8 @@ async def add_handler(
    return f"<result>{result}</result>".encode("utf-8")
 ```

-### Handler Signature (Locked)
-```python
-async def handler(
-    payload: PayloadDataclass,      # Deserialized, XSD-validated instance
-    metadata: HandlerMetadata       # Small, trustworthy context
-) -> bytes:
-    ...
-```
-
-### HandlerMetadata (frozen, read-only)
-```python
-@dataclass(frozen=True)
-class HandlerMetadata:
-    thread_id: str                  # Opaque UUID matching <thread/> in envelope
-    from_id: str                    # Registered name of the sender (pump-injected, trustworthy)
-    own_name: str | None = None     # Populated ONLY for listeners with agent: true
-    is_self_call: bool = False      # Convenience flag: from_id == own_name
-```
+### Handler Signature and Metadata (Locked)
+See [handler-contract-v2.1.md](handler-contract-v2.1.md) for the canonical handler signature and metadata definition.

 Typical uses:
 - Stateful tools → key persistent data by `thread_id`
--- a/docs/primitives.md
+++ b/docs/primitives.md
@ -0,0 +1,40 @@
+# AgentServer v2.1 — System Primitives (Magic Tags)
+
+These payload root elements receive special routing and/or side effects in the message pump.  
+They reside in the reserved namespace `https://xml-pipeline.org/ns/core/v1`.
+
+## `<huh>`
+### `<huh>`
+- Emitted exclusively by the system
+- Routes back to the listener that triggered the error
+- Payload structure:
+  ```xml
+  <huh>
+    <error>Brief canned error message (e.g., "Invalid payload structure")</error>
+    <original-attempt>Base64-encoded raw bytes of the failed attempt (truncated if large)</original-attempt>
+  </huh>
+  ```
+- Purpose: Safe, LLM-friendly diagnostic feedback
+- Security note: Error messages are abstract and canned — no raw validator output is exposed to agents
+- Security note:
+  - Certain classes of errors (payload schema violations, unknown root tags, etc.) are intentionally reported with identical abstract messages.
+  - This prevents topology probing: an agent or external caller cannot distinguish between "wrong schema for existing capability" and "capability does not exist".
+  - Authorized introspection is available only via controlled meta queries.
+
+## `<todo-until>`
+- May be emitted by any listener
+- Routes to self (uses the emitting listener's unique root tag mechanism)
+- No side effects
+- Purpose: Optional visible scaffolding for structured reasoning and iteration planning
+
+## `<return>`
+- May be emitted by any listener
+- Routes to the immediate parent listener in the private thread hierarchy
+- Side effect: The Current subthread below the current listener is pruned after successful delivery of message.<br>the current thread tail is the current listener.
+- Purpose: Explicit return-to-caller semantics with automatic cleanup
+
+## `<halt>`
+- May be emitted by any listener
+- Routes to the immediate parent listener in the private thread hierarchy
+- Side effect: The Entire thread is pruned up to and including the current listener.<br>the current thread tail is the parent listener.
+- Purpose: Explicit termination of the current thread and all its subthreads