xml-pipeline/tests/test_restart.py
dullfig d97c24b1dd
Some checks failed
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / test (3.13) (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / typecheck (push) Has been cancelled
Add message journal, graceful restart, and clean repo for public release
Three workstreams implemented:

W1 (Repo Split): Remove proprietary BloxServer files and docs, update
pyproject.toml URLs to public GitHub, clean doc references, add CI
workflow (.github/workflows/ci.yml) and CONTRIBUTING.md.

W2 (Message Journal): Add DispatchHook protocol for dispatch lifecycle
events, SQLite-backed MessageJournal with WAL mode for certified-mail
delivery guarantees (PENDING→DISPATCHED→ACKED/FAILED), integrate hooks
into StreamPump._dispatch_to_handlers(), add journal REST endpoints,
and aiosqlite dependency.

W3 (Hot Deployment): Add RestartOrchestrator for graceful restart with
queue drain and journal stats collection, SIGHUP signal handler in CLI,
POST /organism/restart endpoint, restart-aware app lifespan with journal
recovery on boot, and os.execv/subprocess re-exec for Unix/Windows.

All 439 tests pass (37 new tests for W2/W3).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-28 22:27:38 -08:00

172 lines
5.3 KiB
Python

"""
Tests for the RestartOrchestrator.
"""
from __future__ import annotations
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from xml_pipeline.server.restart import RestartOrchestrator, RestartResult
class FakePump:
"""Minimal pump mock for testing restart orchestration."""
def __init__(self):
self.queue: asyncio.Queue = asyncio.Queue()
self._running = True
self._process_pool = None
self.dispatch_hooks = []
class TestRestartOrchestrator:
"""Test the restart orchestrator."""
async def test_initiate_restart_drains_queue(self):
pump = FakePump()
orchestrator = RestartOrchestrator(pump)
result = await orchestrator.initiate_restart(timeout=5.0)
assert result.success is True
assert result.drained is True
assert pump._running is False
async def test_initiate_restart_with_nonempty_queue_times_out(self):
pump = FakePump()
# Put a message that won't be consumed
await pump.queue.put("test-message")
# Mark as "task done" so join succeeds despite item in queue
# Actually, join() waits for task_done for every put, so we need to
# not call task_done to test the timeout
# The queue has 1 item with no consumer, so join will block
orchestrator = RestartOrchestrator(pump)
result = await orchestrator.initiate_restart(timeout=0.1)
assert result.success is True
assert result.drained is False # Timed out
assert pump._running is False
async def test_double_restart_rejected(self):
pump = FakePump()
orchestrator = RestartOrchestrator(pump)
# First restart
result1 = await orchestrator.initiate_restart(timeout=5.0)
assert result1.success is True
# Second restart should be rejected
result2 = await orchestrator.initiate_restart(timeout=5.0)
assert result2.success is False
assert "already in progress" in (result2.error or "")
async def test_is_restarting_property(self):
pump = FakePump()
orchestrator = RestartOrchestrator(pump)
assert orchestrator.is_restarting is False
await orchestrator.initiate_restart(timeout=5.0)
assert orchestrator.is_restarting is True
async def test_collects_journal_stats(self):
pump = FakePump()
# Add a mock journal hook
from xml_pipeline.message_bus.journal import MessageJournal
mock_journal = MagicMock(spec=MessageJournal)
mock_journal.get_stats = AsyncMock(return_value={
"pending": 0,
"dispatched": 2,
"acked": 50,
"failed": 1,
"total": 53,
})
pump.dispatch_hooks = [mock_journal]
orchestrator = RestartOrchestrator(pump)
result = await orchestrator.initiate_restart(timeout=5.0)
assert result.success is True
assert result.journal_stats["total"] == 53
assert result.journal_stats["dispatched"] == 2
async def test_shuts_down_process_pool(self):
pump = FakePump()
mock_pool = MagicMock()
pump._process_pool = mock_pool
orchestrator = RestartOrchestrator(pump)
await orchestrator.initiate_restart(timeout=5.0)
mock_pool.shutdown.assert_called_once_with(wait=True)
class TestRestartResult:
"""Test the RestartResult dataclass."""
def test_success_result(self):
result = RestartResult(
success=True,
drained=True,
journal_stats={"total": 10},
)
assert result.success
assert result.drained
assert result.journal_stats["total"] == 10
assert result.error is None
def test_failure_result(self):
result = RestartResult(
success=False,
drained=False,
error="Something went wrong",
)
assert not result.success
assert result.error == "Something went wrong"
class TestExecRestart:
"""Test the exec_restart method (without actually exec'ing)."""
def test_exec_restart_exists(self):
"""Verify the method exists and is callable."""
assert callable(RestartOrchestrator.exec_restart)
@patch("xml_pipeline.server.restart.sys")
@patch("xml_pipeline.server.restart.os")
def test_exec_restart_unix(self, mock_os, mock_sys):
"""On Unix, should call os.execv."""
mock_sys.platform = "linux"
mock_sys.executable = "/usr/bin/python3"
mock_sys.argv = ["serve", "organism.yaml"]
RestartOrchestrator.exec_restart()
mock_os.execv.assert_called_once_with(
"/usr/bin/python3",
["/usr/bin/python3", "serve", "organism.yaml"],
)
@patch("xml_pipeline.server.restart.subprocess")
@patch("xml_pipeline.server.restart.sys")
def test_exec_restart_windows(self, mock_sys, mock_subprocess):
"""On Windows, should start a new process and exit."""
mock_sys.platform = "win32"
mock_sys.executable = "C:\\Python\\python.exe"
mock_sys.argv = ["serve", "organism.yaml"]
mock_sys.exit = MagicMock()
RestartOrchestrator.exec_restart()
mock_subprocess.Popen.assert_called_once_with(
["C:\\Python\\python.exe", "serve", "organism.yaml"]
)
mock_sys.exit.assert_called_once_with(0)