fix: decode opus manually and use json audio transcription
This commit is contained in:
+30
-11
@@ -1,6 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
@@ -26,19 +29,35 @@ def _auth_headers() -> dict[str, str]:
|
||||
}
|
||||
|
||||
|
||||
def _audio_format(audio_path: str) -> str:
|
||||
suffix = Path(audio_path).suffix.lower().lstrip(".")
|
||||
return suffix or "wav"
|
||||
|
||||
|
||||
def _build_transcription_payload(audio_path: str) -> dict[str, Any]:
|
||||
with open(audio_path, "rb") as audio_file:
|
||||
encoded = base64.b64encode(audio_file.read()).decode("ascii")
|
||||
|
||||
return {
|
||||
"model": "openai/whisper-large-v3",
|
||||
"input_audio": {
|
||||
"data": encoded,
|
||||
"format": _audio_format(audio_path),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def transcribe(audio_path: str) -> str:
|
||||
"""Send a WAV file to OpenRouter's whisper model and return transcript text."""
|
||||
"""Send audio to OpenRouter's whisper model and return transcript text."""
|
||||
headers = _auth_headers()
|
||||
headers["Content-Type"] = "application/json"
|
||||
|
||||
async with httpx.AsyncClient(timeout=300) as client:
|
||||
with open(audio_path, "rb") as audio_file:
|
||||
files = {
|
||||
"file": (os.path.basename(audio_path), audio_file, "audio/wav"),
|
||||
"model": (None, "openai/whisper-large-v3"),
|
||||
}
|
||||
resp = await client.post(
|
||||
f"{OPENROUTER_BASE}/audio/transcriptions",
|
||||
headers=_auth_headers(),
|
||||
files=files,
|
||||
)
|
||||
resp = await client.post(
|
||||
f"{OPENROUTER_BASE}/audio/transcriptions",
|
||||
headers=headers,
|
||||
content=json.dumps(_build_transcription_payload(audio_path)),
|
||||
)
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
|
||||
Reference in New Issue
Block a user