diff --git a/openrouter_client.py b/openrouter_client.py index 14e671b..fbf30b3 100644 --- a/openrouter_client.py +++ b/openrouter_client.py @@ -1,8 +1,10 @@ from __future__ import annotations +import asyncio import base64 import json import os +import tempfile from pathlib import Path from typing import Any @@ -44,31 +46,74 @@ def _build_transcription_payload(audio_path: str) -> dict[str, Any]: "data": encoded, "format": _audio_format(audio_path), }, + "language": "en", } +async def _normalize_audio_for_transcription(audio_path: str) -> str: + source = Path(audio_path) + fd, normalized_path = tempfile.mkstemp(prefix="meeting-normalized-", suffix=".wav") + os.close(fd) + + proc = await asyncio.create_subprocess_exec( + "ffmpeg", + "-y", + "-i", + str(source), + "-ac", + "1", + "-ar", + "16000", + "-c:a", + "pcm_s16le", + normalized_path, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.PIPE, + ) + _, stderr = await proc.communicate() + if proc.returncode != 0: + try: + os.remove(normalized_path) + except OSError: + pass + raise RuntimeError( + "Audio normalization failed: " + (stderr.decode("utf-8", errors="replace").strip() or f"ffmpeg exited {proc.returncode}") + ) + + return normalized_path + + async def transcribe(audio_path: str) -> str: """Send audio to OpenRouter's whisper model and return transcript text.""" headers = _auth_headers() headers["Content-Type"] = "application/json" - async with httpx.AsyncClient(timeout=300) as client: - resp = await client.post( - f"{OPENROUTER_BASE}/audio/transcriptions", - headers=headers, - content=json.dumps(_build_transcription_payload(audio_path)), - ) - try: - resp.raise_for_status() - except httpx.HTTPStatusError as exc: - detail = summarize_error(_safe_json(resp), fallback=resp.text) - raise RuntimeError(f"OpenRouter transcription failed: {detail}") from exc + normalized_path = await _normalize_audio_for_transcription(audio_path) + try: + async with httpx.AsyncClient(timeout=300) as client: + resp = await client.post( + f"{OPENROUTER_BASE}/audio/transcriptions", + headers=headers, + content=json.dumps(_build_transcription_payload(normalized_path)), + ) + try: + resp.raise_for_status() + except httpx.HTTPStatusError as exc: + detail = summarize_error(_safe_json(resp), fallback=resp.text) + raise RuntimeError( + f"OpenRouter transcription failed ({resp.status_code}): {detail}" + ) from exc - data = resp.json() - text = data.get("text", "") - if not text.strip(): - raise RuntimeError("OpenRouter transcription returned empty text") - return text.strip() + data = resp.json() + text = data.get("text", "") + if not text.strip(): + raise RuntimeError("OpenRouter transcription returned empty text") + return text.strip() + finally: + try: + os.remove(normalized_path) + except OSError: + pass async def summarize(transcript: str) -> str: diff --git a/tests/test_openrouter_client.py b/tests/test_openrouter_client.py index fc79e56..b0f3206 100644 --- a/tests/test_openrouter_client.py +++ b/tests/test_openrouter_client.py @@ -21,5 +21,6 @@ def test_build_transcription_payload_uses_base64_json_shape(tmp_path: Path): payload = _build_transcription_payload(str(path)) assert payload["model"] == "openai/whisper-large-v3" + assert payload["language"] == "en" assert payload["input_audio"]["format"] == "wav" assert payload["input_audio"]["data"] == base64.b64encode(b"RIFFdemo").decode("ascii")