import asyncio import json import logging from collections.abc import AsyncGenerator import httpx from .config import settings logger = logging.getLogger(__name__) _semaphore = asyncio.Semaphore(settings.max_concurrent_llm) async def check_ollama() -> bool: """Verify Ollama is reachable.""" try: async with httpx.AsyncClient() as client: resp = await client.get(f"{settings.ollama_url}/api/tags", timeout=5) resp.raise_for_status() return True except Exception as e: logger.error("Ollama not reachable at %s: %s", settings.ollama_url, e) return False async def generate_response( transcript: str, system_prompt: str | None = None, ) -> AsyncGenerator[str, None]: """Stream text tokens from Ollama for the given user transcript.""" prompt = system_prompt or settings.system_prompt payload = { "model": settings.llm_model, "messages": [ {"role": "system", "content": prompt}, {"role": "user", "content": transcript}, ], "stream": True, } async with _semaphore: async with httpx.AsyncClient(timeout=httpx.Timeout(120.0, connect=10.0)) as client: async with client.stream( "POST", f"{settings.ollama_url}/api/chat", json=payload, ) as resp: resp.raise_for_status() async for line in resp.aiter_lines(): if not line: continue data = json.loads(line) token = data.get("message", {}).get("content", "") if token: yield token if data.get("done"): break