fix: normalize meeting audio before transcription

This commit is contained in:
2026-06-08 06:37:59 +00:00
parent a191bcd1eb
commit f629794a50
2 changed files with 62 additions and 16 deletions
+61 -16
View File
@@ -1,8 +1,10 @@
from __future__ import annotations
import asyncio
import base64
import json
import os
import tempfile
from pathlib import Path
from typing import Any
@@ -44,31 +46,74 @@ def _build_transcription_payload(audio_path: str) -> dict[str, Any]:
"data": encoded,
"format": _audio_format(audio_path),
},
"language": "en",
}
async def _normalize_audio_for_transcription(audio_path: str) -> str:
source = Path(audio_path)
fd, normalized_path = tempfile.mkstemp(prefix="meeting-normalized-", suffix=".wav")
os.close(fd)
proc = await asyncio.create_subprocess_exec(
"ffmpeg",
"-y",
"-i",
str(source),
"-ac",
"1",
"-ar",
"16000",
"-c:a",
"pcm_s16le",
normalized_path,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.PIPE,
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
try:
os.remove(normalized_path)
except OSError:
pass
raise RuntimeError(
"Audio normalization failed: " + (stderr.decode("utf-8", errors="replace").strip() or f"ffmpeg exited {proc.returncode}")
)
return normalized_path
async def transcribe(audio_path: str) -> str:
"""Send audio to OpenRouter's whisper model and return transcript text."""
headers = _auth_headers()
headers["Content-Type"] = "application/json"
async with httpx.AsyncClient(timeout=300) as client:
resp = await client.post(
f"{OPENROUTER_BASE}/audio/transcriptions",
headers=headers,
content=json.dumps(_build_transcription_payload(audio_path)),
)
try:
resp.raise_for_status()
except httpx.HTTPStatusError as exc:
detail = summarize_error(_safe_json(resp), fallback=resp.text)
raise RuntimeError(f"OpenRouter transcription failed: {detail}") from exc
normalized_path = await _normalize_audio_for_transcription(audio_path)
try:
async with httpx.AsyncClient(timeout=300) as client:
resp = await client.post(
f"{OPENROUTER_BASE}/audio/transcriptions",
headers=headers,
content=json.dumps(_build_transcription_payload(normalized_path)),
)
try:
resp.raise_for_status()
except httpx.HTTPStatusError as exc:
detail = summarize_error(_safe_json(resp), fallback=resp.text)
raise RuntimeError(
f"OpenRouter transcription failed ({resp.status_code}): {detail}"
) from exc
data = resp.json()
text = data.get("text", "")
if not text.strip():
raise RuntimeError("OpenRouter transcription returned empty text")
return text.strip()
data = resp.json()
text = data.get("text", "")
if not text.strip():
raise RuntimeError("OpenRouter transcription returned empty text")
return text.strip()
finally:
try:
os.remove(normalized_path)
except OSError:
pass
async def summarize(transcript: str) -> str: