import asyncio import logging from concurrent.futures import ThreadPoolExecutor from functools import partial import numpy as np from faster_whisper import WhisperModel from .config import settings logger = logging.getLogger(__name__) _model: WhisperModel | None = None _executor = ThreadPoolExecutor(max_workers=settings.max_concurrent_transcriptions) _semaphore = asyncio.Semaphore(settings.max_concurrent_transcriptions) def load_model() -> None: global _model logger.info( "Loading Whisper model %s on %s (%s)...", settings.whisper_model, settings.whisper_device, settings.whisper_compute_type, ) _model = WhisperModel( settings.whisper_model, device=settings.whisper_device, compute_type=settings.whisper_compute_type, ) logger.info("Whisper model loaded.") def _transcribe_sync(audio_bytes: bytes, sample_rate: int) -> str: assert _model is not None, "Whisper model not loaded — call load_model() first" # Convert raw PCM 16-bit mono bytes to float32 numpy array audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0 if sample_rate != 16000: # faster-whisper expects 16kHz — resample via simple linear interpolation duration = len(audio) / sample_rate target_len = int(duration * 16000) audio = np.interp( np.linspace(0, len(audio) - 1, target_len), np.arange(len(audio)), audio, ).astype(np.float32) segments, info = _model.transcribe(audio, beam_size=5) text = " ".join(seg.text.strip() for seg in segments) logger.info("Transcribed %.1fs audio → %d chars", info.duration, len(text)) return text async def transcribe(audio_bytes: bytes, sample_rate: int) -> str: async with _semaphore: loop = asyncio.get_running_loop() return await loop.run_in_executor( _executor, partial(_transcribe_sync, audio_bytes, sample_rate), )