"""
Integration tests for Premium Librarian query system.
These tests require:
- eXist-db running (for storage)
- LLM router configured (for synthesis)
Mark with @pytest.mark.integration to skip in CI without dependencies.
"""
import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from xml_pipeline.librarian.query import (
QueryResult,
Source,
RetrievedChunk,
_build_rag_prompt,
format_sources_xml,
)
from xml_pipeline.librarian.index import LibraryIndex
class TestBuildRagPrompt:
"""Tests for RAG prompt construction."""
def test_builds_prompt_with_context(self) -> None:
chunks = [
RetrievedChunk(
chunk_id="test:foo:abc123",
file_path="src/utils.py",
name="calculate",
chunk_type="function",
language="python",
start_line=10,
end_line=20,
content="def calculate(x): return x * 2",
docstring="Calculate double.",
signature="def calculate(x) -> int",
score=0.9,
),
RetrievedChunk(
chunk_id="test:bar:def456",
file_path="src/main.py",
name="main",
chunk_type="function",
language="python",
start_line=1,
end_line=5,
content="def main(): print('hello')",
docstring="",
signature="def main()",
score=0.7,
),
]
prompt = _build_rag_prompt(
question="How does the calculate function work?",
chunks=chunks,
library_name="test-lib",
)
# Verify prompt structure
assert "test-lib" in prompt
assert "calculate function" in prompt
assert "src/utils.py" in prompt
assert "src/main.py" in prompt
assert "[1]" in prompt
assert "[2]" in prompt
assert "```python" in prompt
def test_truncates_long_content(self) -> None:
long_content = "x" * 3000 # Longer than 2000 char limit
chunks = [
RetrievedChunk(
chunk_id="test:long:123",
file_path="long.py",
name="long_func",
chunk_type="function",
language="python",
start_line=1,
end_line=100,
content=long_content,
docstring="",
signature="",
score=0.5,
),
]
prompt = _build_rag_prompt("What?", chunks, "lib")
# Content should be truncated
assert "(truncated)" in prompt
# Should not contain full content
assert long_content not in prompt
def test_empty_chunks_list(self) -> None:
prompt = _build_rag_prompt("What?", [], "lib")
assert "lib" in prompt
assert "Question" in prompt
class TestFormatSourcesXml:
"""Tests for XML source formatting."""
def test_formats_sources_as_xml(self) -> None:
sources = [
Source(
file_path="src/app.py",
name="process",
chunk_type="function",
start_line=10,
end_line=25,
relevance_score=0.95,
snippet="def process(data): ...",
),
]
xml = format_sources_xml(sources)
assert "" in xml
assert "" in xml
assert "" in xml
assert "src/app.py" in xml
assert "process" in xml
assert "function" in xml
assert "10-25" in xml
assert "0.95" in xml
def test_escapes_special_characters(self) -> None:
sources = [
Source(
file_path="src/.py",
name="func&name",
chunk_type="function",
start_line=1,
end_line=1,
relevance_score=0.5,
snippet="code with & entities",
),
]
xml = format_sources_xml(sources)
# XML entities should be escaped
assert "<special>" in xml
assert "func&name" in xml
def test_empty_sources_list(self) -> None:
xml = format_sources_xml([])
assert "" in xml
assert "" in xml
class TestQueryResultDataclass:
"""Tests for QueryResult dataclass."""
def test_default_values(self) -> None:
result = QueryResult(answer="Test answer")
assert result.answer == "Test answer"
assert result.sources == []
assert result.tokens_used == 0
assert result.chunks_examined == 0
assert result.error == ""
def test_with_sources(self) -> None:
sources = [
Source(
file_path="test.py",
name="test",
chunk_type="function",
start_line=1,
end_line=10,
relevance_score=0.9,
),
]
result = QueryResult(
answer="Test answer",
sources=sources,
tokens_used=100,
chunks_examined=5,
)
assert len(result.sources) == 1
assert result.tokens_used == 100
assert result.chunks_examined == 5
class TestRetrievedChunk:
"""Tests for RetrievedChunk dataclass."""
def test_all_fields(self) -> None:
chunk = RetrievedChunk(
chunk_id="lib:file:hash",
file_path="src/module.py",
name="my_function",
chunk_type="function",
language="python",
start_line=10,
end_line=20,
content="def my_function(): pass",
docstring="Does something.",
signature="def my_function() -> None",
score=0.85,
)
assert chunk.chunk_id == "lib:file:hash"
assert chunk.file_path == "src/module.py"
assert chunk.name == "my_function"
assert chunk.language == "python"
assert chunk.score == 0.85
@pytest.mark.integration
class TestQueryLibraryIntegration:
"""Integration tests requiring eXist-db and LLM."""
async def test_query_nonexistent_library(self) -> None:
"""Query should return error for non-existent library."""
from xml_pipeline.librarian.query import query_library
# Mock get_index to return None - patch at index module level
with patch("xml_pipeline.librarian.index.get_index", new_callable=AsyncMock) as mock_get_index:
mock_get_index.return_value = None
result = await query_library(
library_id="nonexistent-lib-xyz",
question="What does this do?",
)
assert result.error
assert "not found" in result.error.lower()
async def test_query_with_no_relevant_chunks(self) -> None:
"""Query should handle case where search returns no results."""
from xml_pipeline.librarian.query import query_library
mock_index = LibraryIndex(
library_id="test-lib",
name="Test Library",
source_url="https://example.com/repo",
created_at="2024-01-01T00:00:00Z",
)
# Patch get_index at the index module level (where it's defined)
# and _search_chunks at query module level
with patch("xml_pipeline.librarian.index.get_index", new_callable=AsyncMock) as mock_get_index:
mock_get_index.return_value = mock_index
with patch("xml_pipeline.librarian.query._search_chunks", new_callable=AsyncMock) as mock_search:
mock_search.return_value = []
result = await query_library(
library_id="test-lib",
question="What does foo do?",
)
assert "No relevant code found" in result.answer
assert result.chunks_examined == 0
class TestLibraryIndex:
"""Tests for LibraryIndex dataclass."""
def test_properties(self) -> None:
index = LibraryIndex(
library_id="test-id",
name="Test Lib",
source_url="https://github.com/test/repo",
created_at="2024-01-01",
files=["a.py", "b.py", "c.py"],
functions={"func1": "a.py", "func2": "b.py"},
classes={"MyClass": "c.py"},
stats={"chunks": 10, "files": 3},
)
assert index.total_chunks == 10
assert index.total_files == 3
def test_empty_stats(self) -> None:
index = LibraryIndex(
library_id="test",
name="Test",
source_url="",
created_at="",
)
assert index.total_chunks == 0
assert index.total_files == 0