fix: normalize meeting audio before transcription
This commit is contained in:
+61
-16
@@ -1,8 +1,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -44,31 +46,74 @@ def _build_transcription_payload(audio_path: str) -> dict[str, Any]:
|
||||
"data": encoded,
|
||||
"format": _audio_format(audio_path),
|
||||
},
|
||||
"language": "en",
|
||||
}
|
||||
|
||||
|
||||
async def _normalize_audio_for_transcription(audio_path: str) -> str:
|
||||
source = Path(audio_path)
|
||||
fd, normalized_path = tempfile.mkstemp(prefix="meeting-normalized-", suffix=".wav")
|
||||
os.close(fd)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
str(source),
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-c:a",
|
||||
"pcm_s16le",
|
||||
normalized_path,
|
||||
stdout=asyncio.subprocess.DEVNULL,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
try:
|
||||
os.remove(normalized_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise RuntimeError(
|
||||
"Audio normalization failed: " + (stderr.decode("utf-8", errors="replace").strip() or f"ffmpeg exited {proc.returncode}")
|
||||
)
|
||||
|
||||
return normalized_path
|
||||
|
||||
|
||||
async def transcribe(audio_path: str) -> str:
|
||||
"""Send audio to OpenRouter's whisper model and return transcript text."""
|
||||
headers = _auth_headers()
|
||||
headers["Content-Type"] = "application/json"
|
||||
|
||||
async with httpx.AsyncClient(timeout=300) as client:
|
||||
resp = await client.post(
|
||||
f"{OPENROUTER_BASE}/audio/transcriptions",
|
||||
headers=headers,
|
||||
content=json.dumps(_build_transcription_payload(audio_path)),
|
||||
)
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
detail = summarize_error(_safe_json(resp), fallback=resp.text)
|
||||
raise RuntimeError(f"OpenRouter transcription failed: {detail}") from exc
|
||||
normalized_path = await _normalize_audio_for_transcription(audio_path)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=300) as client:
|
||||
resp = await client.post(
|
||||
f"{OPENROUTER_BASE}/audio/transcriptions",
|
||||
headers=headers,
|
||||
content=json.dumps(_build_transcription_payload(normalized_path)),
|
||||
)
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
detail = summarize_error(_safe_json(resp), fallback=resp.text)
|
||||
raise RuntimeError(
|
||||
f"OpenRouter transcription failed ({resp.status_code}): {detail}"
|
||||
) from exc
|
||||
|
||||
data = resp.json()
|
||||
text = data.get("text", "")
|
||||
if not text.strip():
|
||||
raise RuntimeError("OpenRouter transcription returned empty text")
|
||||
return text.strip()
|
||||
data = resp.json()
|
||||
text = data.get("text", "")
|
||||
if not text.strip():
|
||||
raise RuntimeError("OpenRouter transcription returned empty text")
|
||||
return text.strip()
|
||||
finally:
|
||||
try:
|
||||
os.remove(normalized_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
async def summarize(transcript: str) -> str:
|
||||
|
||||
Reference in New Issue
Block a user