fallback to _transcribe_audio

2025-02-11 17:30:39 -08:00 · 2025-02-11 17:30:39 -08:00 · b8927e5e65
commit b8927e5e65
parent 8301427ab5
1 changed files with 15 additions and 7 deletions
--- a/packages/markitdown/src/markitdown/converters/_wav_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_wav_converter.py
@ -1,7 +1,10 @@
+import logging
 from typing import Union
 from ._base import DocumentConverter, DocumentConverterResult
 from ._media_converter import MediaConverter

+logger = logging.getLogger(__name__)
+
 # Optional Transcription support
 IS_AUDIO_TRANSCRIPTION_CAPABLE = False
 IS_WHISPER_CAPABLE = False
@ -82,13 +85,18 @@ class WavConverter(MediaConverter):
        )

    def _transcribe_with_whisper(self, local_path: str, client) -> str:
-        """Transcribe audio using OpenAI's Whisper model."""
-        with open(local_path, "rb") as audio_file:
-            transcription = client.audio.transcriptions.create(
-                model="whisper-1",
-                file=audio_file
-            )
-            return transcription.text.strip()
+        """Transcribe audio using OpenAI's Whisper model, falling back to speech_recognition if it fails."""
+        try:
+            with open(local_path, "rb") as audio_file:
+                transcription = client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio_file
+                )
+                return transcription.text.strip()
+        except Exception as e:
+            logger.warning(f"Whisper transcription attempt failed: {str(e)}")
+            logger.info("Falling back to speech_recognition...")
+            return self._transcribe_audio(local_path)

    def _transcribe_audio(self, local_path) -> str:
        recognizer = sr.Recognizer()