Wire budget cleanup to thread lifecycle
When threads terminate (handler returns None or chain exhausted), the pump now calls budget_registry.cleanup_thread() to: - Free memory for completed threads - Return final budget for logging/billing - Log token usage at debug level This ensures budgets don't accumulate for completed conversations. Also adds: - has_budget() method to check if thread exists without creating - Tests for cleanup behavior Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
860395cd58
commit
f98a21f96b
2 changed files with 79 additions and 0 deletions
|
|
@ -571,3 +571,64 @@ class TestLLMRouterBudgetIntegration:
|
||||||
)
|
)
|
||||||
|
|
||||||
assert response.content == "Hello!"
|
assert response.content == "Hello!"
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Budget Cleanup Tests
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
class TestBudgetCleanup:
|
||||||
|
"""Test budget cleanup when threads complete."""
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def reset_all(self):
|
||||||
|
"""Reset all global registries."""
|
||||||
|
reset_budget_registry()
|
||||||
|
yield
|
||||||
|
reset_budget_registry()
|
||||||
|
|
||||||
|
def test_cleanup_thread_returns_budget(self):
|
||||||
|
"""cleanup_thread should return the budget before removing it."""
|
||||||
|
registry = ThreadBudgetRegistry()
|
||||||
|
registry.consume("thread-1", prompt_tokens=500, completion_tokens=200)
|
||||||
|
|
||||||
|
final = registry.cleanup_thread("thread-1")
|
||||||
|
|
||||||
|
assert final is not None
|
||||||
|
assert final.prompt_tokens == 500
|
||||||
|
assert final.completion_tokens == 200
|
||||||
|
assert final.total_tokens == 700
|
||||||
|
|
||||||
|
def test_cleanup_thread_removes_budget(self):
|
||||||
|
"""cleanup_thread should remove the budget from registry."""
|
||||||
|
registry = ThreadBudgetRegistry()
|
||||||
|
registry.consume("thread-1", prompt_tokens=500, completion_tokens=200)
|
||||||
|
|
||||||
|
registry.cleanup_thread("thread-1")
|
||||||
|
|
||||||
|
# Budget should no longer exist
|
||||||
|
assert not registry.has_budget("thread-1")
|
||||||
|
assert registry.get_usage("thread-1") is None
|
||||||
|
|
||||||
|
def test_cleanup_nonexistent_thread_returns_none(self):
|
||||||
|
"""cleanup_thread for unknown thread should return None."""
|
||||||
|
registry = ThreadBudgetRegistry()
|
||||||
|
|
||||||
|
result = registry.cleanup_thread("nonexistent")
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_global_cleanup(self):
|
||||||
|
"""Test cleanup via global registry."""
|
||||||
|
configure_budget_registry(max_tokens_per_thread=10000)
|
||||||
|
registry = get_budget_registry()
|
||||||
|
|
||||||
|
# Consume some tokens
|
||||||
|
registry.consume("test-thread", prompt_tokens=1000, completion_tokens=500)
|
||||||
|
assert registry.has_budget("test-thread")
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
final = registry.cleanup_thread("test-thread")
|
||||||
|
|
||||||
|
assert final.total_tokens == 1500
|
||||||
|
assert not registry.has_budget("test-thread")
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ from xml_pipeline.message_bus.steps.thread_assignment import thread_assignment_s
|
||||||
from xml_pipeline.message_bus.message_state import MessageState, HandlerMetadata, HandlerResponse, SystemError, ROUTING_ERROR
|
from xml_pipeline.message_bus.message_state import MessageState, HandlerMetadata, HandlerResponse, SystemError, ROUTING_ERROR
|
||||||
from xml_pipeline.message_bus.thread_registry import get_registry
|
from xml_pipeline.message_bus.thread_registry import get_registry
|
||||||
from xml_pipeline.message_bus.todo_registry import get_todo_registry
|
from xml_pipeline.message_bus.todo_registry import get_todo_registry
|
||||||
|
from xml_pipeline.message_bus.budget_registry import get_budget_registry
|
||||||
from xml_pipeline.memory import get_context_buffer
|
from xml_pipeline.memory import get_context_buffer
|
||||||
|
|
||||||
pump_logger = logging.getLogger(__name__)
|
pump_logger = logging.getLogger(__name__)
|
||||||
|
|
@ -681,6 +682,15 @@ class StreamPump:
|
||||||
|
|
||||||
# None means "no response needed" - don't re-inject
|
# None means "no response needed" - don't re-inject
|
||||||
if response is None:
|
if response is None:
|
||||||
|
# Thread terminates here - cleanup budget
|
||||||
|
budget_registry = get_budget_registry()
|
||||||
|
final_budget = budget_registry.cleanup_thread(current_thread)
|
||||||
|
if final_budget:
|
||||||
|
pump_logger.debug(
|
||||||
|
f"Thread {current_thread[:8]}... completed: "
|
||||||
|
f"{final_budget.total_tokens} tokens used"
|
||||||
|
)
|
||||||
|
|
||||||
# Emit idle state
|
# Emit idle state
|
||||||
self._emit_event(AgentStateEvent(
|
self._emit_event(AgentStateEvent(
|
||||||
agent_name=listener.name,
|
agent_name=listener.name,
|
||||||
|
|
@ -698,6 +708,14 @@ class StreamPump:
|
||||||
target, new_thread_id = registry.prune_for_response(current_thread)
|
target, new_thread_id = registry.prune_for_response(current_thread)
|
||||||
if target is None:
|
if target is None:
|
||||||
# Chain exhausted - nowhere to respond to
|
# Chain exhausted - nowhere to respond to
|
||||||
|
# Cleanup thread budget
|
||||||
|
budget_registry = get_budget_registry()
|
||||||
|
final_budget = budget_registry.cleanup_thread(current_thread)
|
||||||
|
if final_budget:
|
||||||
|
pump_logger.debug(
|
||||||
|
f"Thread {current_thread[:8]}... chain exhausted: "
|
||||||
|
f"{final_budget.total_tokens} tokens used"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
to_id = target
|
to_id = target
|
||||||
thread_id = new_thread_id
|
thread_id = new_thread_id
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue