fix: decode opus manually and use json audio transcription

2026-06-08 06:29:29 +00:00
parent e6aac63aca
commit a191bcd1eb
4 changed files with 114 additions and 37 deletions
@@ -1,6 +1,9 @@
 from __future__ import annotations

+import base64
+import json
 import os
+from pathlib import Path
 from typing import Any

 import httpx
@@ -26,19 +29,35 @@ def _auth_headers() -> dict[str, str]:
    }


+def _audio_format(audio_path: str) -> str:
+    suffix = Path(audio_path).suffix.lower().lstrip(".")
+    return suffix or "wav"
+
+
+def _build_transcription_payload(audio_path: str) -> dict[str, Any]:
+    with open(audio_path, "rb") as audio_file:
+        encoded = base64.b64encode(audio_file.read()).decode("ascii")
+
+    return {
+        "model": "openai/whisper-large-v3",
+        "input_audio": {
+            "data": encoded,
+            "format": _audio_format(audio_path),
+        },
+    }
+
+
 async def transcribe(audio_path: str) -> str:
-    """Send a WAV file to OpenRouter's whisper model and return transcript text."""
+    """Send audio to OpenRouter's whisper model and return transcript text."""
+    headers = _auth_headers()
+    headers["Content-Type"] = "application/json"
+
    async with httpx.AsyncClient(timeout=300) as client:
-        with open(audio_path, "rb") as audio_file:
-            files = {
-                "file": (os.path.basename(audio_path), audio_file, "audio/wav"),
-                "model": (None, "openai/whisper-large-v3"),
-            }
-            resp = await client.post(
-                f"{OPENROUTER_BASE}/audio/transcriptions",
-                headers=_auth_headers(),
-                files=files,
-            )
+        resp = await client.post(
+            f"{OPENROUTER_BASE}/audio/transcriptions",
+            headers=headers,
+            content=json.dumps(_build_transcription_payload(audio_path)),
+        )
        try:
            resp.raise_for_status()
        except httpx.HTTPStatusError as exc: