xml-pipeline/tests/test_todo_registry.py
dullfig 8aa58715df Add TodoUntil watcher system for async confirmation tracking
Implements an observer pattern where agents can register watchers
for conditions on their thread. When the condition is met, the
agent gets "nagged" on subsequent invocations until it explicitly
closes the todo.

Key components:
- TodoRegistry: thread-scoped watcher tracking with eyebrow state
- TodoUntil/TodoComplete payloads and system handlers
- HandlerMetadata.todo_nudge for delivering raised eyebrow notices
- Integration in StreamPump dispatch to check and nudge

Greeter now demonstrates the pattern:
1. Registers watcher for ShoutedResponse from shouter
2. On next invocation, sees nudge and closes completed todos
3. Includes nudge in LLM prompt for awareness

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 16:51:59 -08:00

569 lines
19 KiB
Python

"""
test_todo_registry.py — Tests for the TodoUntil watcher system.
Tests:
1. TodoRegistry basic operations
2. Eyebrow raising on condition match
3. Nudge formatting
4. Integration with StreamPump
"""
import pytest
import asyncio
import uuid
from unittest.mock import AsyncMock, patch
from agentserver.message_bus.todo_registry import TodoRegistry, TodoWatcher, get_todo_registry
from agentserver.message_bus.stream_pump import StreamPump, ListenerConfig, OrganismConfig
from agentserver.message_bus.message_state import HandlerMetadata, HandlerResponse
from agentserver.primitives.todo import (
TodoUntil, TodoComplete, TodoRegistered, TodoClosed,
handle_todo_until, handle_todo_complete,
)
class TestTodoRegistry:
"""Test TodoRegistry basic operations."""
def test_register_creates_watcher(self):
"""register() should create a watcher with correct fields."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
watcher_id = registry.register(
thread_id=thread_id,
issuer="greeter",
wait_for="ShoutedResponse",
from_listener="shouter",
description="waiting for shout",
)
assert watcher_id is not None
assert watcher_id in registry._by_id
assert len(registry._watchers[thread_id]) == 1
watcher = registry._by_id[watcher_id]
assert watcher.thread_id == thread_id
assert watcher.issuer == "greeter"
assert watcher.wait_for == "shoutedresponse" # Normalized to lowercase
assert watcher.from_listener == "shouter"
assert watcher.description == "waiting for shout"
assert watcher.eyebrow_raised is False
def test_check_raises_eyebrow_on_match(self):
"""check() should raise eyebrow when payload type matches."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
watcher_id = registry.register(
thread_id=thread_id,
issuer="greeter",
wait_for="ShoutedResponse",
)
# Check with matching payload type
newly_raised = registry.check(
thread_id=thread_id,
payload_type="ShoutedResponse",
from_id="shouter",
payload={"message": "HELLO!"},
)
assert len(newly_raised) == 1
assert newly_raised[0].id == watcher_id
assert newly_raised[0].eyebrow_raised is True
assert newly_raised[0].triggered_from == "shouter"
def test_check_respects_from_listener_filter(self):
"""check() should only match if from_listener matches."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
registry.register(
thread_id=thread_id,
issuer="greeter",
wait_for="ShoutedResponse",
from_listener="shouter", # Must be from shouter
)
# Check from wrong sender - should NOT raise
newly_raised = registry.check(
thread_id=thread_id,
payload_type="ShoutedResponse",
from_id="other-agent",
)
assert len(newly_raised) == 0
# Check from correct sender - should raise
newly_raised = registry.check(
thread_id=thread_id,
payload_type="ShoutedResponse",
from_id="shouter",
)
assert len(newly_raised) == 1
def test_check_ignores_non_matching_payload(self):
"""check() should not raise eyebrow for non-matching payload."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
registry.register(
thread_id=thread_id,
issuer="greeter",
wait_for="ShoutedResponse",
)
# Check with non-matching payload type
newly_raised = registry.check(
thread_id=thread_id,
payload_type="Greeting",
from_id="console",
)
assert len(newly_raised) == 0
def test_check_ignores_different_thread(self):
"""check() should not match watchers on different threads."""
registry = TodoRegistry()
thread_id_1 = str(uuid.uuid4())
thread_id_2 = str(uuid.uuid4())
registry.register(
thread_id=thread_id_1,
issuer="greeter",
wait_for="ShoutedResponse",
)
# Check on different thread
newly_raised = registry.check(
thread_id=thread_id_2,
payload_type="ShoutedResponse",
from_id="shouter",
)
assert len(newly_raised) == 0
def test_get_raised_for_returns_raised_watchers(self):
"""get_raised_for() should return only raised watchers for agent."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
# Register two watchers for greeter
registry.register(thread_id, "greeter", "ShoutedResponse")
registry.register(thread_id, "greeter", "AnotherPayload")
# Raise one
registry.check(thread_id, "ShoutedResponse", "shouter")
raised = registry.get_raised_for(thread_id, "greeter")
assert len(raised) == 1
assert raised[0].wait_for == "shoutedresponse"
def test_close_removes_watcher(self):
"""close() should remove watcher by ID."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
watcher_id = registry.register(thread_id, "greeter", "ShoutedResponse")
assert watcher_id in registry._by_id
result = registry.close(watcher_id)
assert result is True
assert watcher_id not in registry._by_id
assert thread_id not in registry._watchers # Empty list cleaned up
def test_close_returns_false_for_unknown(self):
"""close() should return False for unknown watcher ID."""
registry = TodoRegistry()
result = registry.close("nonexistent-id")
assert result is False
def test_format_nudge_empty_for_no_raised(self):
"""format_nudge() should return empty string for no raised watchers."""
registry = TodoRegistry()
nudge = registry.format_nudge([])
assert nudge == ""
def test_format_nudge_includes_watcher_info(self):
"""format_nudge() should format raised watchers."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
watcher_id = registry.register(
thread_id=thread_id,
issuer="greeter",
wait_for="ShoutedResponse",
description="waiting for shout",
)
registry.check(thread_id, "ShoutedResponse", "shouter")
raised = registry.get_raised_for(thread_id, "greeter")
nudge = registry.format_nudge(raised)
assert "SYSTEM NOTE" in nudge
assert "shoutedresponse" in nudge.lower()
assert "waiting for shout" in nudge
assert watcher_id in nudge
assert "TodoComplete" in nudge
class TestTodoHandlers:
"""Test TodoUntil and TodoComplete handlers."""
@pytest.mark.asyncio
async def test_handle_todo_until_registers_watcher(self):
"""handle_todo_until should register a watcher."""
# Clear any existing watchers
registry = get_todo_registry()
registry.clear()
payload = TodoUntil(
wait_for="ShoutedResponse",
from_listener="shouter",
description="test watcher",
)
metadata = HandlerMetadata(
thread_id=str(uuid.uuid4()),
from_id="greeter",
)
response = await handle_todo_until(payload, metadata)
assert isinstance(response, HandlerResponse)
assert isinstance(response.payload, TodoRegistered)
assert response.payload.wait_for == "ShoutedResponse"
assert response.to == "greeter"
# Verify watcher was registered
assert response.payload.id in registry._by_id
@pytest.mark.asyncio
async def test_handle_todo_complete_closes_watcher(self):
"""handle_todo_complete should close a watcher."""
registry = get_todo_registry()
registry.clear()
thread_id = str(uuid.uuid4())
# First register a watcher
watcher_id = registry.register(
thread_id=thread_id,
issuer="greeter",
wait_for="ShoutedResponse",
)
# Raise its eyebrow
registry.check(thread_id, "ShoutedResponse", "shouter")
# Now close it
payload = TodoComplete(id=watcher_id)
metadata = HandlerMetadata(thread_id=thread_id, from_id="greeter")
response = await handle_todo_complete(payload, metadata)
assert isinstance(response, HandlerResponse)
assert isinstance(response.payload, TodoClosed)
assert response.payload.id == watcher_id
assert response.payload.was_raised is True
# Verify watcher was removed
assert watcher_id not in registry._by_id
class TestTodoIntegration:
"""Integration tests for TodoUntil with StreamPump."""
@pytest.mark.asyncio
async def test_todo_nudge_appears_in_metadata(self):
"""Raised eyebrows should appear in handler metadata."""
from handlers.hello import Greeting, GreetingResponse, handle_greeting
from agentserver.llm.backend import LLMResponse
# Clear registries
todo_registry = get_todo_registry()
todo_registry.clear()
# Create pump with greeter
config = OrganismConfig(name="todo-test")
pump = StreamPump(config)
pump.register_listener(ListenerConfig(
name="greeter",
payload_class_path="handlers.hello.Greeting",
handler_path="handlers.hello.handle_greeting",
description="Greeting agent",
is_agent=True,
peers=["shouter"],
payload_class=Greeting,
handler=handle_greeting,
))
# Register a todo watcher for greeter
thread_id = str(uuid.uuid4())
watcher_id = todo_registry.register(
thread_id=thread_id,
issuer="greeter",
wait_for="SomeResponse",
description="waiting for something",
)
# Raise the eyebrow
todo_registry.check(thread_id, "SomeResponse", "other")
# Track metadata passed to handler
captured_metadata = []
original_handler = pump.listeners["greeter"].handler
async def capturing_handler(payload, metadata):
captured_metadata.append(metadata)
return HandlerResponse(
payload=GreetingResponse(message="hi", original_sender="test"),
to="shouter",
)
pump.listeners["greeter"].handler = capturing_handler
# Create and inject a message
from agentserver.message_bus.message_state import MessageState
state = MessageState(
payload=Greeting(name="Test"),
thread_id=thread_id,
from_id="console",
target_listeners=[pump.listeners["greeter"]],
)
# Dispatch
responses = []
async for resp in pump._dispatch_to_handlers(state):
responses.append(resp)
# Verify nudge was in metadata
assert len(captured_metadata) == 1
assert "SYSTEM NOTE" in captured_metadata[0].todo_nudge
assert watcher_id in captured_metadata[0].todo_nudge
@pytest.mark.asyncio
async def test_message_raises_eyebrow(self):
"""Incoming message should raise eyebrow on matching watcher."""
from handlers.hello import Greeting, GreetingResponse
from handlers.console import ShoutedResponse
# Clear registry
todo_registry = get_todo_registry()
todo_registry.clear()
config = OrganismConfig(name="eyebrow-test")
pump = StreamPump(config)
# Register a simple handler
async def noop_handler(payload, metadata):
return None
pump.register_listener(ListenerConfig(
name="response-handler",
payload_class_path="handlers.console.ShoutedResponse",
handler_path="handlers.console.handle_shouted_response",
description="Test",
payload_class=ShoutedResponse,
handler=noop_handler,
))
# Register a watcher waiting for ShoutedResponse
thread_id = str(uuid.uuid4())
watcher_id = todo_registry.register(
thread_id=thread_id,
issuer="greeter",
wait_for="ShoutedResponse",
)
# Eyebrow should not be raised yet
watcher = todo_registry._by_id[watcher_id]
assert watcher.eyebrow_raised is False
# Dispatch a ShoutedResponse message
from agentserver.message_bus.message_state import MessageState
state = MessageState(
payload=ShoutedResponse(message="HELLO!"),
thread_id=thread_id,
from_id="shouter",
target_listeners=[pump.listeners["response-handler"]],
)
async for _ in pump._dispatch_to_handlers(state):
pass
# Now eyebrow should be raised
assert watcher.eyebrow_raised is True
assert watcher.triggered_from == "shouter"
class TestGreeterTodoFlow:
"""Test greeter's use of TodoUntil."""
@pytest.mark.asyncio
async def test_greeter_registers_todo_and_eyebrow_raised(self):
"""
Greeter should register a TodoUntil, and eyebrow should raise
when ShoutedResponse arrives.
"""
from handlers.hello import Greeting, GreetingResponse, handle_greeting
from handlers.console import ShoutedResponse
from agentserver.llm.backend import LLMResponse
# Clear registry
todo_registry = get_todo_registry()
todo_registry.clear()
thread_id = str(uuid.uuid4())
# Mock LLM
mock_llm = LLMResponse(
content="Hello there!",
model="mock",
usage={"total_tokens": 5},
finish_reason="stop",
)
with patch('agentserver.llm.complete', new=AsyncMock(return_value=mock_llm)):
# Call greeter handler
metadata = HandlerMetadata(
thread_id=thread_id,
from_id="console-router",
own_name="greeter",
)
response = await handle_greeting(Greeting(name="Alice"), metadata)
# Verify response goes to shouter
assert response.to == "shouter"
assert isinstance(response.payload, GreetingResponse)
# Verify a watcher was registered
watchers = todo_registry._watchers.get(thread_id, [])
assert len(watchers) == 1
assert watchers[0].issuer == "greeter"
assert watchers[0].wait_for == "shoutedresponse"
assert watchers[0].eyebrow_raised is False
# Simulate ShoutedResponse arriving (from shouter)
todo_registry.check(
thread_id=thread_id,
payload_type="ShoutedResponse",
from_id="shouter",
payload=ShoutedResponse(message="HELLO THERE!"),
)
# Eyebrow should now be raised
assert watchers[0].eyebrow_raised is True
assert watchers[0].triggered_from == "shouter"
@pytest.mark.asyncio
async def test_greeter_closes_raised_todos_on_next_call(self):
"""
When greeter is called again with raised todos, it should close them.
"""
from handlers.hello import Greeting, GreetingResponse, handle_greeting
from agentserver.llm.backend import LLMResponse
# Clear registry
todo_registry = get_todo_registry()
todo_registry.clear()
thread_id = str(uuid.uuid4())
# Pre-register a raised todo (simulating previous invocation)
watcher_id = todo_registry.register(
thread_id=thread_id,
issuer="greeter",
wait_for="ShoutedResponse",
)
todo_registry.check(thread_id, "ShoutedResponse", "shouter")
# Verify eyebrow is raised
assert todo_registry._by_id[watcher_id].eyebrow_raised is True
# Mock LLM
mock_llm = LLMResponse(
content="Hello again!",
model="mock",
usage={"total_tokens": 5},
finish_reason="stop",
)
# Format the nudge as the pump would
raised = todo_registry.get_raised_for(thread_id, "greeter")
nudge = todo_registry.format_nudge(raised)
with patch('agentserver.llm.complete', new=AsyncMock(return_value=mock_llm)):
# Call greeter with the nudge
metadata = HandlerMetadata(
thread_id=thread_id,
from_id="console-router",
own_name="greeter",
todo_nudge=nudge,
)
await handle_greeting(Greeting(name="Bob"), metadata)
# Old watcher should be closed
assert watcher_id not in todo_registry._by_id
# But a new one should be registered for this greeting
watchers = todo_registry._watchers.get(thread_id, [])
assert len(watchers) == 1
assert watchers[0].id != watcher_id # New watcher
class TestTodoMultipleWatchers:
"""Test scenarios with multiple watchers."""
def test_multiple_watchers_same_thread(self):
"""Multiple watchers on same thread should work independently."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
id1 = registry.register(thread_id, "agent1", "ResponseA")
id2 = registry.register(thread_id, "agent2", "ResponseB")
id3 = registry.register(thread_id, "agent1", "ResponseB")
# Raise only ResponseB watchers
raised = registry.check(thread_id, "ResponseB", "source")
assert len(raised) == 2
raised_ids = {w.id for w in raised}
assert id2 in raised_ids
assert id3 in raised_ids
assert id1 not in raised_ids
def test_watcher_only_raised_once(self):
"""Eyebrow should only raise once per watcher."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
registry.register(thread_id, "agent", "Response")
# First check raises
raised1 = registry.check(thread_id, "Response", "source")
assert len(raised1) == 1
# Second check does not re-raise
raised2 = registry.check(thread_id, "Response", "source")
assert len(raised2) == 0
def test_close_all_for_thread(self):
"""close_all_for_thread should remove all watchers on thread."""
registry = TodoRegistry()
thread_id = str(uuid.uuid4())
id1 = registry.register(thread_id, "agent1", "A")
id2 = registry.register(thread_id, "agent2", "B")
count = registry.close_all_for_thread(thread_id)
assert count == 2
assert id1 not in registry._by_id
assert id2 not in registry._by_id
assert thread_id not in registry._watchers