Add usage persistence for billing (SQLite)

- UsageStore with async SQLite persistence via aiosqlite - Background batch writer for non-blocking event persistence - Auto-subscribes to UsageTracker for transparent capture - Query methods: query(), get_billing_summary(), get_daily_usage() - REST API endpoints: /usage/history, /usage/billing, /usage/daily - Filtering by org_id, agent_id, model, time range - 18 new tests for persistence layer Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-27 21:58:22 -08:00 · 2026-01-27 21:58:22 -08:00 · d0d78a9f70
commit d0d78a9f70
parent e6697f0ea2
5 changed files with 1227 additions and 1 deletions
--- a/tests/test_usage_store.py
+++ b/tests/test_usage_store.py
@ -0,0 +1,398 @@
 """
 Tests for usage persistence layer.
 Tests UsageStore's SQLite persistence, querying, and billing summaries.
 """
 from __future__ import annotations
 import asyncio
 import tempfile
 from datetime import datetime, timezone
 from pathlib import Path
 import pytest
 from xml_pipeline.llm.usage_tracker import UsageEvent, get_usage_tracker, reset_usage_tracker
 from xml_pipeline.llm.usage_store import UsageStore, reset_usage_store
@pytest.fixture
 def temp_db():
    """Create a temporary database file."""
    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
        yield f.name
    # Cleanup happens automatically
@pytest.fixture
 async def store(temp_db):
    """Create and initialize a UsageStore with temp database."""
    reset_usage_tracker()
    reset_usage_store()
    s = UsageStore(db_path=temp_db)
    await s.initialize()
    yield s
    await s.close()
 class TestUsageStoreBasics:
    """Test basic store operations."""
    async def test_initialize_creates_table(self, temp_db):
        """Store initialization creates the usage_events table."""
        reset_usage_store()
        store = UsageStore(db_path=temp_db)
        await store.initialize()
        # Query should work (table exists)
        events = await store.query(limit=10)
        assert events == []
        await store.close()
    async def test_store_initializes_once(self, store):
        """Multiple initialize calls are idempotent."""
        # Should not raise
        await store.initialize()
        await store.initialize()
    async def test_empty_query(self, store):
        """Query on empty store returns empty list."""
        events = await store.query()
        assert events == []
    async def test_empty_count(self, store):
        """Count on empty store returns 0."""
        count = await store.count()
        assert count == 0
 class TestEventPersistence:
    """Test event persistence via subscriber pattern."""
    async def test_event_persisted_via_tracker(self, store):
        """Events recorded in tracker are persisted to store."""
        tracker = get_usage_tracker()
        # Record an event
        tracker.record(
            thread_id="test-thread-1",
            agent_id="greeter",
            model="grok-4.1",
            provider="xai",
            prompt_tokens=100,
            completion_tokens=50,
            latency_ms=250.5,
            metadata={"org_id": "org-123"},
        )
        # Give background writer time to flush
        await asyncio.sleep(1.5)
        # Query should find the event
        events = await store.query()
        assert len(events) == 1
        event = events[0]
        assert event["thread_id"] == "test-thread-1"
        assert event["agent_id"] == "greeter"
        assert event["model"] == "grok-4.1"
        assert event["provider"] == "xai"
        assert event["prompt_tokens"] == 100
        assert event["completion_tokens"] == 50
        assert event["total_tokens"] == 150
        assert event["latency_ms"] == 250.5
    async def test_multiple_events_persisted(self, store):
        """Multiple events are all persisted."""
        tracker = get_usage_tracker()
        for i in range(5):
            tracker.record(
                thread_id=f"thread-{i}",
                agent_id="agent",
                model="grok-4.1",
                provider="xai",
                prompt_tokens=100 * (i + 1),
                completion_tokens=50 * (i + 1),
                latency_ms=100.0,
            )
        await asyncio.sleep(1.5)
        events = await store.query()
        assert len(events) == 5
    async def test_event_with_cost_estimate(self, store):
        """Estimated cost is persisted."""
        tracker = get_usage_tracker()
        # grok-4.1 has pricing in MODEL_COSTS
        tracker.record(
            thread_id="cost-thread",
            agent_id="agent",
            model="grok-4.1",
            provider="xai",
            prompt_tokens=1000,
            completion_tokens=500,
            latency_ms=100.0,
        )
        await asyncio.sleep(1.5)
        events = await store.query()
        assert len(events) == 1
        assert events[0]["estimated_cost"] is not None
        assert events[0]["estimated_cost"] > 0
 class TestQueryFiltering:
    """Test query filtering options."""
    async def test_filter_by_agent(self, store):
        """Filter events by agent_id."""
        tracker = get_usage_tracker()
        tracker.record(
            thread_id="t1", agent_id="greeter", model="grok-4.1",
            provider="xai", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
        )
        tracker.record(
            thread_id="t2", agent_id="shouter", model="grok-4.1",
            provider="xai", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
        )
        await asyncio.sleep(1.5)
        events = await store.query(agent_id="greeter")
        assert len(events) == 1
        assert events[0]["agent_id"] == "greeter"
    async def test_filter_by_model(self, store):
        """Filter events by model."""
        tracker = get_usage_tracker()
        tracker.record(
            thread_id="t1", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
        )
        tracker.record(
            thread_id="t2", agent_id="agent", model="claude-sonnet-4",
            provider="anthropic", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
        )
        await asyncio.sleep(1.5)
        events = await store.query(model="grok-4.1")
        assert len(events) == 1
        assert events[0]["model"] == "grok-4.1"
    async def test_filter_by_org_id(self, store):
        """Filter events by org_id in metadata."""
        tracker = get_usage_tracker()
        tracker.record(
            thread_id="t1", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
            metadata={"org_id": "org-A"},
        )
        tracker.record(
            thread_id="t2", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
            metadata={"org_id": "org-B"},
        )
        await asyncio.sleep(1.5)
        events = await store.query(org_id="org-A")
        assert len(events) == 1
        assert events[0]["thread_id"] == "t1"
    async def test_pagination(self, store):
        """Test limit and offset for pagination."""
        tracker = get_usage_tracker()
        for i in range(10):
            tracker.record(
                thread_id=f"t{i:02d}", agent_id="agent", model="grok-4.1",
                provider="xai", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
            )
        await asyncio.sleep(1.5)
        # Get first page
        page1 = await store.query(limit=3, offset=0)
        assert len(page1) == 3
        # Get second page
        page2 = await store.query(limit=3, offset=3)
        assert len(page2) == 3
        # Different events
        assert page1[0]["thread_id"] != page2[0]["thread_id"]
 class TestBillingSummary:
    """Test billing summary aggregation."""
    async def test_billing_totals(self, store):
        """Billing summary calculates correct totals."""
        tracker = get_usage_tracker()
        tracker.record(
            thread_id="t1", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=1000, completion_tokens=500, latency_ms=100.0,
        )
        tracker.record(
            thread_id="t2", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=2000, completion_tokens=1000, latency_ms=100.0,
        )
        await asyncio.sleep(1.5)
        summary = await store.get_billing_summary()
        assert summary.total_tokens == 4500  # 1500 + 3000
        assert summary.prompt_tokens == 3000
        assert summary.completion_tokens == 1500
        assert summary.request_count == 2
    async def test_billing_by_model(self, store):
        """Billing summary includes breakdown by model."""
        tracker = get_usage_tracker()
        tracker.record(
            thread_id="t1", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=1000, completion_tokens=500, latency_ms=100.0,
        )
        tracker.record(
            thread_id="t2", agent_id="agent", model="claude-sonnet-4",
            provider="anthropic", prompt_tokens=2000, completion_tokens=1000, latency_ms=100.0,
        )
        await asyncio.sleep(1.5)
        summary = await store.get_billing_summary()
        assert "grok-4.1" in summary.by_model
        assert "claude-sonnet-4" in summary.by_model
        assert summary.by_model["grok-4.1"]["total_tokens"] == 1500
        assert summary.by_model["claude-sonnet-4"]["total_tokens"] == 3000
    async def test_billing_by_agent(self, store):
        """Billing summary includes breakdown by agent."""
        tracker = get_usage_tracker()
        tracker.record(
            thread_id="t1", agent_id="greeter", model="grok-4.1",
            provider="xai", prompt_tokens=1000, completion_tokens=500, latency_ms=100.0,
        )
        tracker.record(
            thread_id="t2", agent_id="shouter", model="grok-4.1",
            provider="xai", prompt_tokens=500, completion_tokens=250, latency_ms=100.0,
        )
        await asyncio.sleep(1.5)
        summary = await store.get_billing_summary()
        assert "greeter" in summary.by_agent
        assert "shouter" in summary.by_agent
        assert summary.by_agent["greeter"]["total_tokens"] == 1500
        assert summary.by_agent["shouter"]["total_tokens"] == 750
    async def test_billing_filtered_by_org(self, store):
        """Billing summary can be filtered by org_id."""
        tracker = get_usage_tracker()
        tracker.record(
            thread_id="t1", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=1000, completion_tokens=500, latency_ms=100.0,
            metadata={"org_id": "org-A"},
        )
        tracker.record(
            thread_id="t2", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=2000, completion_tokens=1000, latency_ms=100.0,
            metadata={"org_id": "org-B"},
        )
        await asyncio.sleep(1.5)
        summary_a = await store.get_billing_summary(org_id="org-A")
        summary_b = await store.get_billing_summary(org_id="org-B")
        assert summary_a.total_tokens == 1500
        assert summary_b.total_tokens == 3000
 class TestDailyUsage:
    """Test daily usage aggregation."""
    async def test_daily_aggregation(self, store):
        """Daily usage aggregates by date."""
        tracker = get_usage_tracker()
        # Record multiple events (all same day since timestamps are auto-generated)
        for i in range(3):
            tracker.record(
                thread_id=f"t{i}", agent_id="agent", model="grok-4.1",
                provider="xai", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
            )
        await asyncio.sleep(1.5)
        days = await store.get_daily_usage()
        assert len(days) == 1  # All same day
        assert days[0]["total_tokens"] == 450
        assert days[0]["request_count"] == 3
    async def test_daily_returns_date(self, store):
        """Daily usage includes date field."""
        tracker = get_usage_tracker()
        tracker.record(
            thread_id="t1", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
        )
        await asyncio.sleep(1.5)
        days = await store.get_daily_usage()
        assert len(days) == 1
        # Date should be in YYYY-MM-DD format
        assert len(days[0]["date"]) == 10
        assert days[0]["date"].count("-") == 2
 class TestStoreLifecycle:
    """Test store lifecycle management."""
    async def test_close_flushes_pending(self, temp_db):
        """Closing store flushes pending writes."""
        reset_usage_tracker()
        reset_usage_store()
        store = UsageStore(db_path=temp_db)
        await store.initialize()
        tracker = get_usage_tracker()
        tracker.record(
            thread_id="t1", agent_id="agent", model="grok-4.1",
            provider="xai", prompt_tokens=100, completion_tokens=50, latency_ms=100.0,
        )
        # Close immediately - should flush
        await store.close()
        # Reopen and verify data persisted
        store2 = UsageStore(db_path=temp_db)
        await store2.initialize()
        events = await store2.query()
        assert len(events) == 1
        await store2.close()
--- a/xml_pipeline/llm/init.py
+++ b/xml_pipeline/llm/init.py
@ -30,6 +30,20 @@ Usage Tracking:
    # Query totals
    totals = tracker.get_totals()
 Usage Persistence (for billing):
    from xml_pipeline.llm import get_usage_store
    store = await get_usage_store()
    # Query historical usage
    events = await store.query(
        start_time="2025-01-01T00:00:00Z",
        org_id="org-123",
    )
    # Get billing summary
    summary = await store.get_billing_summary(org_id="org-123")
 """
 from xml_pipeline.llm.router import (
@ -46,6 +60,13 @@ from xml_pipeline.llm.usage_tracker import (
    get_usage_tracker,
    reset_usage_tracker,
 )
 from xml_pipeline.llm.usage_store import (
    UsageStore,
    BillingSummary,
    get_usage_store,
    close_usage_store,
    reset_usage_store,
 )
 __all__ = [
    # Router
@ -58,9 +79,15 @@ __all__ = [
    "LLMRequest",
    "LLMResponse",
    "BackendError",
-    # Usage tracking
+    # Usage tracking (in-memory)
    "UsageTracker",
    "UsageEvent",
    "get_usage_tracker",
    "reset_usage_tracker",
    # Usage persistence (SQLite)
    "UsageStore",
    "BillingSummary",
    "get_usage_store",
    "close_usage_store",
    "reset_usage_store",
 ]
--- a/xml_pipeline/llm/usage_store.py
+++ b/xml_pipeline/llm/usage_store.py
@ -0,0 +1,599 @@
 """
 Usage Store — Persistent storage for billing and usage analytics.
 Stores UsageEvents to SQLite (default) or PostgreSQL for:
 - Historical billing queries
 - Usage analytics and reporting
 - Audit trails
 The store auto-subscribes to UsageTracker on initialization,
 persisting all events transparently.
 Example:
    from xml_pipeline.llm.usage_store import get_usage_store
    store = await get_usage_store()
    # Query historical usage
    events = await store.query(
        start_time="2025-01-01T00:00:00Z",
        end_time="2025-01-31T23:59:59Z",
        org_id="org-123",
    )
    # Get billing summary
    summary = await store.get_billing_summary(
        org_id="org-123",
        start_time="2025-01-01T00:00:00Z",
    )
 """
 from __future__ import annotations
 import asyncio
 import json
 import logging
 import os
 import threading
 from dataclasses import dataclass, asdict
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 try:
    import aiosqlite
    HAS_AIOSQLITE = True
 except ImportError:
    HAS_AIOSQLITE = False
 from xml_pipeline.llm.usage_tracker import UsageEvent, get_usage_tracker
 logger = logging.getLogger(__name__)
 # Default database path
 DEFAULT_DB_PATH = Path.home() / ".xml-pipeline" / "usage.db"
@dataclass
 class BillingSummary:
    """Aggregated billing data for a time period."""
    org_id: Optional[str]
    start_time: str
    end_time: str
    total_tokens: int
    prompt_tokens: int
    completion_tokens: int
    request_count: int
    total_cost: float
    by_model: Dict[str, Dict[str, Any]]
    by_agent: Dict[str, Dict[str, Any]]
 class UsageStore:
    """
    Persistent storage for usage events.
    Uses SQLite by default, with async I/O via aiosqlite.
    Automatically subscribes to UsageTracker to capture all events.
    """
    def __init__(self, db_path: Optional[str] = None):
        """
        Initialize the usage store.
        Args:
            db_path: Path to SQLite database. Defaults to ~/.xml-pipeline/usage.db
        """
        if not HAS_AIOSQLITE:
            raise ImportError(
                "aiosqlite is required for usage persistence. "
                "Install with: pip install aiosqlite"
            )
        self._db_path = Path(db_path) if db_path else DEFAULT_DB_PATH
        self._db_path.parent.mkdir(parents=True, exist_ok=True)
        self._initialized = False
        self._init_lock = asyncio.Lock()
        # Queue for async persistence (events come from sync callback)
        self._queue: asyncio.Queue[UsageEvent] = asyncio.Queue()
        self._writer_task: Optional[asyncio.Task] = None
        self._running = False
    async def initialize(self) -> None:
        """Initialize database schema and start background writer."""
        async with self._init_lock:
            if self._initialized:
                return
            # Create tables
            async with aiosqlite.connect(str(self._db_path)) as db:
                await db.execute("""
                    CREATE TABLE IF NOT EXISTS usage_events (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        timestamp TEXT NOT NULL,
                        thread_id TEXT NOT NULL,
                        agent_id TEXT,
                        model TEXT NOT NULL,
                        provider TEXT NOT NULL,
                        prompt_tokens INTEGER NOT NULL,
                        completion_tokens INTEGER NOT NULL,
                        total_tokens INTEGER NOT NULL,
                        latency_ms REAL NOT NULL,
                        estimated_cost REAL,
                        metadata TEXT,
                        -- Denormalized for billing queries
                        org_id TEXT,
                        user_id TEXT
                    )
                """)
                # Indexes for common queries
                await db.execute("""
                    CREATE INDEX IF NOT EXISTS idx_usage_timestamp
                    ON usage_events(timestamp)
                """)
                await db.execute("""
                    CREATE INDEX IF NOT EXISTS idx_usage_org_id
                    ON usage_events(org_id)
                """)
                await db.execute("""
                    CREATE INDEX IF NOT EXISTS idx_usage_agent_id
                    ON usage_events(agent_id)
                """)
                await db.execute("""
                    CREATE INDEX IF NOT EXISTS idx_usage_model
                    ON usage_events(model)
                """)
                await db.commit()
            # Start background writer
            self._running = True
            self._writer_task = asyncio.create_task(self._writer_loop())
            # Subscribe to usage tracker
            tracker = get_usage_tracker()
            tracker.subscribe(self._on_usage_event)
            self._initialized = True
            logger.info(f"UsageStore initialized: {self._db_path}")
    def _on_usage_event(self, event: UsageEvent) -> None:
        """
        Callback for UsageTracker events.
        This runs synchronously from the tracker, so we queue
        the event for async persistence.
        """
        try:
            self._queue.put_nowait(event)
        except asyncio.QueueFull:
            logger.warning("Usage event queue full, dropping event")
    async def _writer_loop(self) -> None:
        """Background task that writes queued events to database."""
        batch: List[UsageEvent] = []
        batch_timeout = 1.0  # Flush every second or 100 events
        while self._running or not self._queue.empty():
            try:
                # Collect batch
                try:
                    event = await asyncio.wait_for(
                        self._queue.get(),
                        timeout=batch_timeout
                    )
                    batch.append(event)
                    # Drain queue up to batch size
                    while len(batch) < 100:
                        try:
                            event = self._queue.get_nowait()
                            batch.append(event)
                        except asyncio.QueueEmpty:
                            break
                except asyncio.TimeoutError:
                    pass
                # Write batch
                if batch:
                    await self._write_batch(batch)
                    batch = []
            except Exception as e:
                logger.error(f"Usage writer error: {e}")
                await asyncio.sleep(1.0)
    async def _write_batch(self, events: List[UsageEvent]) -> None:
        """Write a batch of events to database."""
        async with aiosqlite.connect(str(self._db_path)) as db:
            await db.executemany(
                """
                INSERT INTO usage_events (
                    timestamp, thread_id, agent_id, model, provider,
                    prompt_tokens, completion_tokens, total_tokens,
                    latency_ms, estimated_cost, metadata, org_id, user_id
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """,
                [
                    (
                        e.timestamp,
                        e.thread_id,
                        e.agent_id,
                        e.model,
                        e.provider,
                        e.prompt_tokens,
                        e.completion_tokens,
                        e.total_tokens,
                        e.latency_ms,
                        e.estimated_cost,
                        json.dumps(e.metadata) if e.metadata else None,
                        e.metadata.get("org_id") if e.metadata else None,
                        e.metadata.get("user_id") if e.metadata else None,
                    )
                    for e in events
                ]
            )
            await db.commit()
        logger.debug(f"Persisted {len(events)} usage events")
    async def query(
        self,
        *,
        start_time: Optional[str] = None,
        end_time: Optional[str] = None,
        org_id: Optional[str] = None,
        user_id: Optional[str] = None,
        agent_id: Optional[str] = None,
        model: Optional[str] = None,
        limit: int = 1000,
        offset: int = 0,
    ) -> List[Dict[str, Any]]:
        """
        Query historical usage events.
        Args:
            start_time: ISO 8601 timestamp (inclusive)
            end_time: ISO 8601 timestamp (inclusive)
            org_id: Filter by organization
            user_id: Filter by user
            agent_id: Filter by agent
            model: Filter by model
            limit: Max results (default 1000)
            offset: Pagination offset
        Returns:
            List of usage event dicts
        """
        conditions = []
        params = []
        if start_time:
            conditions.append("timestamp >= ?")
            params.append(start_time)
        if end_time:
            conditions.append("timestamp <= ?")
            params.append(end_time)
        if org_id:
            conditions.append("org_id = ?")
            params.append(org_id)
        if user_id:
            conditions.append("user_id = ?")
            params.append(user_id)
        if agent_id:
            conditions.append("agent_id = ?")
            params.append(agent_id)
        if model:
            conditions.append("model = ?")
            params.append(model)
        where_clause = " AND ".join(conditions) if conditions else "1=1"
        async with aiosqlite.connect(str(self._db_path)) as db:
            db.row_factory = aiosqlite.Row
            cursor = await db.execute(
                f"""
                SELECT * FROM usage_events
                WHERE {where_clause}
                ORDER BY timestamp DESC
                LIMIT ? OFFSET ?
                """,
                params + [limit, offset]
            )
            rows = await cursor.fetchall()
        return [
            {
                "id": row["id"],
                "timestamp": row["timestamp"],
                "thread_id": row["thread_id"],
                "agent_id": row["agent_id"],
                "model": row["model"],
                "provider": row["provider"],
                "prompt_tokens": row["prompt_tokens"],
                "completion_tokens": row["completion_tokens"],
                "total_tokens": row["total_tokens"],
                "latency_ms": row["latency_ms"],
                "estimated_cost": row["estimated_cost"],
                "metadata": json.loads(row["metadata"]) if row["metadata"] else {},
            }
            for row in rows
        ]
    async def get_billing_summary(
        self,
        *,
        start_time: Optional[str] = None,
        end_time: Optional[str] = None,
        org_id: Optional[str] = None,
    ) -> BillingSummary:
        """
        Get aggregated billing summary for a time period.
        Args:
            start_time: ISO 8601 timestamp (inclusive)
            end_time: ISO 8601 timestamp (inclusive)
            org_id: Filter by organization
        Returns:
            BillingSummary with totals and breakdowns
        """
        conditions = []
        params = []
        if start_time:
            conditions.append("timestamp >= ?")
            params.append(start_time)
        if end_time:
            conditions.append("timestamp <= ?")
            params.append(end_time)
        if org_id:
            conditions.append("org_id = ?")
            params.append(org_id)
        where_clause = " AND ".join(conditions) if conditions else "1=1"
        async with aiosqlite.connect(str(self._db_path)) as db:
            # Overall totals
            cursor = await db.execute(
                f"""
                SELECT
                    COALESCE(SUM(total_tokens), 0) as total_tokens,
                    COALESCE(SUM(prompt_tokens), 0) as prompt_tokens,
                    COALESCE(SUM(completion_tokens), 0) as completion_tokens,
                    COUNT(*) as request_count,
                    COALESCE(SUM(estimated_cost), 0) as total_cost
                FROM usage_events
                WHERE {where_clause}
                """,
                params
            )
            row = await cursor.fetchone()
            totals = {
                "total_tokens": row[0],
                "prompt_tokens": row[1],
                "completion_tokens": row[2],
                "request_count": row[3],
                "total_cost": round(row[4], 4),
            }
            # By model
            cursor = await db.execute(
                f"""
                SELECT
                    model,
                    SUM(total_tokens) as total_tokens,
                    SUM(prompt_tokens) as prompt_tokens,
                    SUM(completion_tokens) as completion_tokens,
                    COUNT(*) as request_count,
                    SUM(estimated_cost) as total_cost
                FROM usage_events
                WHERE {where_clause}
                GROUP BY model
                """,
                params
            )
            by_model = {
                row[0]: {
                    "total_tokens": row[1],
                    "prompt_tokens": row[2],
                    "completion_tokens": row[3],
                    "request_count": row[4],
                    "total_cost": round(row[5] or 0, 4),
                }
                for row in await cursor.fetchall()
            }
            # By agent
            cursor = await db.execute(
                f"""
                SELECT
                    agent_id,
                    SUM(total_tokens) as total_tokens,
                    SUM(prompt_tokens) as prompt_tokens,
                    SUM(completion_tokens) as completion_tokens,
                    COUNT(*) as request_count,
                    SUM(estimated_cost) as total_cost
                FROM usage_events
                WHERE {where_clause} AND agent_id IS NOT NULL
                GROUP BY agent_id
                """,
                params
            )
            by_agent = {
                row[0]: {
                    "total_tokens": row[1],
                    "prompt_tokens": row[2],
                    "completion_tokens": row[3],
                    "request_count": row[4],
                    "total_cost": round(row[5] or 0, 4),
                }
                for row in await cursor.fetchall()
            }
        return BillingSummary(
            org_id=org_id,
            start_time=start_time or "",
            end_time=end_time or datetime.now(timezone.utc).isoformat(),
            total_tokens=totals["total_tokens"],
            prompt_tokens=totals["prompt_tokens"],
            completion_tokens=totals["completion_tokens"],
            request_count=totals["request_count"],
            total_cost=totals["total_cost"],
            by_model=by_model,
            by_agent=by_agent,
        )
    async def get_daily_usage(
        self,
        *,
        start_time: Optional[str] = None,
        end_time: Optional[str] = None,
        org_id: Optional[str] = None,
    ) -> List[Dict[str, Any]]:
        """
        Get usage aggregated by day for charting.
        Returns:
            List of {date, total_tokens, request_count, total_cost}
        """
        conditions = []
        params = []
        if start_time:
            conditions.append("timestamp >= ?")
            params.append(start_time)
        if end_time:
            conditions.append("timestamp <= ?")
            params.append(end_time)
        if org_id:
            conditions.append("org_id = ?")
            params.append(org_id)
        where_clause = " AND ".join(conditions) if conditions else "1=1"
        async with aiosqlite.connect(str(self._db_path)) as db:
            cursor = await db.execute(
                f"""
                SELECT
                    DATE(timestamp) as date,
                    SUM(total_tokens) as total_tokens,
                    COUNT(*) as request_count,
                    SUM(estimated_cost) as total_cost
                FROM usage_events
                WHERE {where_clause}
                GROUP BY DATE(timestamp)
                ORDER BY date
                """,
                params
            )
            rows = await cursor.fetchall()
        return [
            {
                "date": row[0],
                "total_tokens": row[1],
                "request_count": row[2],
                "total_cost": round(row[3] or 0, 4),
            }
            for row in rows
        ]
    async def count(
        self,
        *,
        start_time: Optional[str] = None,
        end_time: Optional[str] = None,
        org_id: Optional[str] = None,
    ) -> int:
        """Get total count of events matching criteria."""
        conditions = []
        params = []
        if start_time:
            conditions.append("timestamp >= ?")
            params.append(start_time)
        if end_time:
            conditions.append("timestamp <= ?")
            params.append(end_time)
        if org_id:
            conditions.append("org_id = ?")
            params.append(org_id)
        where_clause = " AND ".join(conditions) if conditions else "1=1"
        async with aiosqlite.connect(str(self._db_path)) as db:
            cursor = await db.execute(
                f"SELECT COUNT(*) FROM usage_events WHERE {where_clause}",
                params
            )
            row = await cursor.fetchone()
            return row[0]
    async def close(self) -> None:
        """Shutdown the store, flushing pending writes."""
        self._running = False
        # Unsubscribe from tracker
        tracker = get_usage_tracker()
        tracker.unsubscribe(self._on_usage_event)
        # Wait for writer to finish
        if self._writer_task:
            await self._writer_task
            self._writer_task = None
        logger.info("UsageStore closed")
 # =============================================================================
 # Global Instance
 # =============================================================================
 _store: Optional[UsageStore] = None
 _store_lock = threading.Lock()
 async def get_usage_store(db_path: Optional[str] = None) -> UsageStore:
    """
    Get the global usage store, initializing if needed.
    Args:
        db_path: Optional path to SQLite database
    Returns:
        Initialized UsageStore
    """
    global _store
    if _store is None:
        with _store_lock:
            if _store is None:
                _store = UsageStore(db_path)
    if not _store._initialized:
        await _store.initialize()
    return _store
 async def close_usage_store() -> None:
    """Close the global usage store."""
    global _store
    if _store is not None:
        await _store.close()
        _store = None
 def reset_usage_store() -> None:
    """Reset global store (for testing)."""
    global _store
    with _store_lock:
        _store = None
--- a/xml_pipeline/server/api.py
+++ b/xml_pipeline/server/api.py
@ -19,9 +19,12 @@ from xml_pipeline.server.models import (
    AgentInfo,
    AgentListResponse,
    AgentUsageInfo,
    BillingSummaryResponse,
    CapabilityDetail,
    CapabilityInfo,
    CapabilityListResponse,
    DailyUsagePoint,
    DailyUsageResponse,
    ErrorResponse,
    InjectRequest,
    InjectResponse,
@ -33,6 +36,8 @@ from xml_pipeline.server.models import (
    ThreadInfo,
    ThreadListResponse,
    ThreadStatus,
    UsageEventInfo,
    UsageHistoryResponse,
    UsageOverview,
    UsageResponse,
    UsageTotals,
@ -425,6 +430,140 @@ def create_router(state: "ServerState") -> APIRouter:
        reset_usage_tracker()
        return {"success": True, "message": "Usage tracking reset"}
    # =========================================================================
    # Usage History Endpoints (Persistent)
    # =========================================================================
    @router.get("/usage/history", response_model=UsageHistoryResponse)
    async def get_usage_history(
        start_time: Optional[str] = Query(None, description="ISO 8601 start time"),
        end_time: Optional[str] = Query(None, description="ISO 8601 end time"),
        org_id: Optional[str] = Query(None, description="Filter by organization"),
        agent_id: Optional[str] = Query(None, description="Filter by agent"),
        model: Optional[str] = Query(None, description="Filter by model"),
        limit: int = Query(100, ge=1, le=1000),
        offset: int = Query(0, ge=0),
    ) -> UsageHistoryResponse:
        """
        Query historical usage events from persistent storage.
        Use for billing reconciliation, audit trails, and detailed analytics.
        Events are stored in SQLite and persist across restarts.
        """
        from xml_pipeline.llm.usage_store import get_usage_store
        store = await get_usage_store()
        events = await store.query(
            start_time=start_time,
            end_time=end_time,
            org_id=org_id,
            agent_id=agent_id,
            model=model,
            limit=limit,
            offset=offset,
        )
        total = await store.count(
            start_time=start_time,
            end_time=end_time,
            org_id=org_id,
        )
        return UsageHistoryResponse(
            events=[
                UsageEventInfo(
                    id=e["id"],
                    timestamp=e["timestamp"],
                    thread_id=e["thread_id"],
                    agent_id=e.get("agent_id"),
                    model=e["model"],
                    provider=e["provider"],
                    prompt_tokens=e["prompt_tokens"],
                    completion_tokens=e["completion_tokens"],
                    total_tokens=e["total_tokens"],
                    latency_ms=e["latency_ms"],
                    estimated_cost=e.get("estimated_cost"),
                    metadata=e.get("metadata", {}),
                )
                for e in events
            ],
            count=len(events),
            total=total,
            offset=offset,
            limit=limit,
        )
    @router.get("/usage/billing", response_model=BillingSummaryResponse)
    async def get_billing_summary(
        start_time: Optional[str] = Query(None, description="ISO 8601 start time"),
        end_time: Optional[str] = Query(None, description="ISO 8601 end time"),
        org_id: Optional[str] = Query(None, description="Filter by organization"),
    ) -> BillingSummaryResponse:
        """
        Get aggregated billing summary for a time period.
        Returns total tokens, costs, and breakdowns by model and agent.
        Use for invoicing and cost analysis.
        """
        from xml_pipeline.llm.usage_store import get_usage_store
        store = await get_usage_store()
        summary = await store.get_billing_summary(
            start_time=start_time,
            end_time=end_time,
            org_id=org_id,
        )
        return BillingSummaryResponse(
            org_id=summary.org_id,
            start_time=summary.start_time,
            end_time=summary.end_time,
            total_tokens=summary.total_tokens,
            prompt_tokens=summary.prompt_tokens,
            completion_tokens=summary.completion_tokens,
            request_count=summary.request_count,
            total_cost=summary.total_cost,
            by_model=summary.by_model,
            by_agent=summary.by_agent,
        )
    @router.get("/usage/daily", response_model=DailyUsageResponse)
    async def get_daily_usage(
        start_time: Optional[str] = Query(None, description="ISO 8601 start time"),
        end_time: Optional[str] = Query(None, description="ISO 8601 end time"),
        org_id: Optional[str] = Query(None, description="Filter by organization"),
    ) -> DailyUsageResponse:
        """
        Get usage aggregated by day for charting.
        Returns daily totals for tokens, requests, and costs.
        Useful for dashboards and trend analysis.
        """
        from xml_pipeline.llm.usage_store import get_usage_store
        store = await get_usage_store()
        days = await store.get_daily_usage(
            start_time=start_time,
            end_time=end_time,
            org_id=org_id,
        )
        return DailyUsageResponse(
            days=[
                DailyUsagePoint(
                    date=d["date"],
                    total_tokens=d["total_tokens"],
                    request_count=d["request_count"],
                    total_cost=d["total_cost"],
                )
                for d in days
            ],
            count=len(days),
        )
    # =========================================================================
    # Control Endpoints
    # =========================================================================
--- a/xml_pipeline/server/models.py
+++ b/xml_pipeline/server/models.py
@ -368,3 +368,66 @@ class ThreadBudgetListResponse(CamelModel):
    threads: List[ThreadBudgetInfo]
    count: int
    default_max_tokens: int = Field(alias="defaultMaxTokens")
 # =============================================================================
 # Usage History Models (Persistent)
 # =============================================================================
 class UsageEventInfo(CamelModel):
    """A single usage event from history."""
    id: int
    timestamp: str
    thread_id: str = Field(alias="threadId")
    agent_id: Optional[str] = Field(None, alias="agentId")
    model: str
    provider: str
    prompt_tokens: int = Field(alias="promptTokens")
    completion_tokens: int = Field(alias="completionTokens")
    total_tokens: int = Field(alias="totalTokens")
    latency_ms: float = Field(alias="latencyMs")
    estimated_cost: Optional[float] = Field(None, alias="estimatedCost")
    metadata: dict = Field(default_factory=dict)
 class UsageHistoryResponse(CamelModel):
    """Response for GET /usage/history."""
    events: List[UsageEventInfo]
    count: int
    total: int
    offset: int
    limit: int
 class BillingSummaryResponse(CamelModel):
    """Response for GET /usage/billing."""
    org_id: Optional[str] = Field(None, alias="orgId")
    start_time: str = Field(alias="startTime")
    end_time: str = Field(alias="endTime")
    total_tokens: int = Field(alias="totalTokens")
    prompt_tokens: int = Field(alias="promptTokens")
    completion_tokens: int = Field(alias="completionTokens")
    request_count: int = Field(alias="requestCount")
    total_cost: float = Field(alias="totalCost")
    by_model: dict = Field(default_factory=dict, alias="byModel")
    by_agent: dict = Field(default_factory=dict, alias="byAgent")
 class DailyUsagePoint(CamelModel):
    """A single day's usage for charting."""
    date: str
    total_tokens: int = Field(alias="totalTokens")
    request_count: int = Field(alias="requestCount")
    total_cost: float = Field(alias="totalCost")
 class DailyUsageResponse(CamelModel):
    """Response for GET /usage/daily."""
    days: List[DailyUsagePoint]
    count: int