""" payload_extraction.py — Extract the inner payload from the validated envelope. After envelope_validation_step confirms a correct outer envelope, this step removes the envelope elements (, , optional , etc.) and isolates the single child element that is the actual payload. The payload is expected to be exactly one root element (the capability-specific XML). If zero or multiple payload roots are found, we set a clear error — this protects against malformed or ambiguous messages. Part of AgentServer v2.1 message pump. """ from lxml import etree from agentserver.message_bus.message_state import MessageState # Envelope namespace for easy reference _ENVELOPE_NS = "https://xml-pipeline.org/ns/envelope/v1" _MESSAGE_TAG = f"{{{ _ENVELOPE_NS }}}message" async def payload_extraction_step(state: MessageState) -> MessageState: """ Extract the single payload element from the validated envelope. Expected structure: uuid sender ← this is the one we want ... On success: state.payload_tree is set to the payload Element. On failure: state.error is set with a clear diagnostic. """ if state.envelope_tree is None: state.error = "payload_extraction_step: no envelope_tree (previous step failed)" return state # Basic sanity — root must be in correct namespace (already checked by schema, # but we double-check for defence in depth) if state.envelope_tree.tag != _MESSAGE_TAG: state.error = f"payload_extraction_step: root tag is not in envelope namespace" return state # Find all direct children that are not envelope control elements # Envelope control elements are: thread, from, to (optional) payload_candidates = [ child for child in state.envelope_tree if not ( child.tag in { f"{{{ _ENVELOPE_NS }}}thread", f"{{{ _ENVELOPE_NS }}}from", f"{{{ _ENVELOPE_NS }}}to", } ) ] if len(payload_candidates) == 0: state.error = "payload_extraction_step: no payload element found inside " return state if len(payload_candidates) > 1: state.error = ( "payload_extraction_step: multiple payload roots found — " "exactly one capability payload element is allowed" ) return state # Success — exactly one payload element payload_element = payload_candidates[0] # Optional: capture provenance from envelope for later use # (these will be trustworthy because envelope was validated) thread_elem = state.envelope_tree.find(f"{{{ _ENVELOPE_NS }}}thread") from_elem = state.envelope_tree.find(f"{{{ _ENVELOPE_NS }}}from") if thread_elem is not None and thread_elem.text: state.thread_id = thread_elem.text.strip() if from_elem is not None and from_elem.text: state.from_id = from_elem.text.strip() state.payload_tree = payload_element return state