""" Integration tests for Premium Librarian query system. These tests require: - eXist-db running (for storage) - LLM router configured (for synthesis) Mark with @pytest.mark.integration to skip in CI without dependencies. """ import pytest from unittest.mock import AsyncMock, patch, MagicMock from xml_pipeline.librarian.query import ( QueryResult, Source, RetrievedChunk, _build_rag_prompt, format_sources_xml, ) from xml_pipeline.librarian.index import LibraryIndex class TestBuildRagPrompt: """Tests for RAG prompt construction.""" def test_builds_prompt_with_context(self) -> None: chunks = [ RetrievedChunk( chunk_id="test:foo:abc123", file_path="src/utils.py", name="calculate", chunk_type="function", language="python", start_line=10, end_line=20, content="def calculate(x): return x * 2", docstring="Calculate double.", signature="def calculate(x) -> int", score=0.9, ), RetrievedChunk( chunk_id="test:bar:def456", file_path="src/main.py", name="main", chunk_type="function", language="python", start_line=1, end_line=5, content="def main(): print('hello')", docstring="", signature="def main()", score=0.7, ), ] prompt = _build_rag_prompt( question="How does the calculate function work?", chunks=chunks, library_name="test-lib", ) # Verify prompt structure assert "test-lib" in prompt assert "calculate function" in prompt assert "src/utils.py" in prompt assert "src/main.py" in prompt assert "[1]" in prompt assert "[2]" in prompt assert "```python" in prompt def test_truncates_long_content(self) -> None: long_content = "x" * 3000 # Longer than 2000 char limit chunks = [ RetrievedChunk( chunk_id="test:long:123", file_path="long.py", name="long_func", chunk_type="function", language="python", start_line=1, end_line=100, content=long_content, docstring="", signature="", score=0.5, ), ] prompt = _build_rag_prompt("What?", chunks, "lib") # Content should be truncated assert "(truncated)" in prompt # Should not contain full content assert long_content not in prompt def test_empty_chunks_list(self) -> None: prompt = _build_rag_prompt("What?", [], "lib") assert "lib" in prompt assert "Question" in prompt class TestFormatSourcesXml: """Tests for XML source formatting.""" def test_formats_sources_as_xml(self) -> None: sources = [ Source( file_path="src/app.py", name="process", chunk_type="function", start_line=10, end_line=25, relevance_score=0.95, snippet="def process(data): ...", ), ] xml = format_sources_xml(sources) assert "" in xml assert "" in xml assert "" in xml assert "src/app.py" in xml assert "process" in xml assert "function" in xml assert "10-25" in xml assert "0.95" in xml def test_escapes_special_characters(self) -> None: sources = [ Source( file_path="src/.py", name="func&name", chunk_type="function", start_line=1, end_line=1, relevance_score=0.5, snippet="code with & entities", ), ] xml = format_sources_xml(sources) # XML entities should be escaped assert "<special>" in xml assert "func&name" in xml def test_empty_sources_list(self) -> None: xml = format_sources_xml([]) assert "" in xml assert "" in xml class TestQueryResultDataclass: """Tests for QueryResult dataclass.""" def test_default_values(self) -> None: result = QueryResult(answer="Test answer") assert result.answer == "Test answer" assert result.sources == [] assert result.tokens_used == 0 assert result.chunks_examined == 0 assert result.error == "" def test_with_sources(self) -> None: sources = [ Source( file_path="test.py", name="test", chunk_type="function", start_line=1, end_line=10, relevance_score=0.9, ), ] result = QueryResult( answer="Test answer", sources=sources, tokens_used=100, chunks_examined=5, ) assert len(result.sources) == 1 assert result.tokens_used == 100 assert result.chunks_examined == 5 class TestRetrievedChunk: """Tests for RetrievedChunk dataclass.""" def test_all_fields(self) -> None: chunk = RetrievedChunk( chunk_id="lib:file:hash", file_path="src/module.py", name="my_function", chunk_type="function", language="python", start_line=10, end_line=20, content="def my_function(): pass", docstring="Does something.", signature="def my_function() -> None", score=0.85, ) assert chunk.chunk_id == "lib:file:hash" assert chunk.file_path == "src/module.py" assert chunk.name == "my_function" assert chunk.language == "python" assert chunk.score == 0.85 @pytest.mark.integration class TestQueryLibraryIntegration: """Integration tests requiring eXist-db and LLM.""" async def test_query_nonexistent_library(self) -> None: """Query should return error for non-existent library.""" from xml_pipeline.librarian.query import query_library # Mock get_index to return None - patch at index module level with patch("xml_pipeline.librarian.index.get_index", new_callable=AsyncMock) as mock_get_index: mock_get_index.return_value = None result = await query_library( library_id="nonexistent-lib-xyz", question="What does this do?", ) assert result.error assert "not found" in result.error.lower() async def test_query_with_no_relevant_chunks(self) -> None: """Query should handle case where search returns no results.""" from xml_pipeline.librarian.query import query_library mock_index = LibraryIndex( library_id="test-lib", name="Test Library", source_url="https://example.com/repo", created_at="2024-01-01T00:00:00Z", ) # Patch get_index at the index module level (where it's defined) # and _search_chunks at query module level with patch("xml_pipeline.librarian.index.get_index", new_callable=AsyncMock) as mock_get_index: mock_get_index.return_value = mock_index with patch("xml_pipeline.librarian.query._search_chunks", new_callable=AsyncMock) as mock_search: mock_search.return_value = [] result = await query_library( library_id="test-lib", question="What does foo do?", ) assert "No relevant code found" in result.answer assert result.chunks_examined == 0 class TestLibraryIndex: """Tests for LibraryIndex dataclass.""" def test_properties(self) -> None: index = LibraryIndex( library_id="test-id", name="Test Lib", source_url="https://github.com/test/repo", created_at="2024-01-01", files=["a.py", "b.py", "c.py"], functions={"func1": "a.py", "func2": "b.py"}, classes={"MyClass": "c.py"}, stats={"chunks": 10, "files": 3}, ) assert index.total_chunks == 10 assert index.total_files == 3 def test_empty_stats(self) -> None: index = LibraryIndex( library_id="test", name="Test", source_url="", created_at="", ) assert index.total_chunks == 0 assert index.total_files == 0