From b8927e5e6545901f42c7959eb0589da3c7d1e248 Mon Sep 17 00:00:00 2001
From: Ji Zhang <jizhang.work@gmail.com>
Date: Tue, 11 Feb 2025 17:30:39 -0800
Subject: [PATCH] fallback to _transcribe_audio

---
 .../markitdown/converters/_wav_converter.py   | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/packages/markitdown/src/markitdown/converters/_wav_converter.py b/packages/markitdown/src/markitdown/converters/_wav_converter.py
index 722ea48..0ca2b57 100644
--- a/packages/markitdown/src/markitdown/converters/_wav_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_wav_converter.py
@@ -1,7 +1,10 @@
+import logging
 from typing import Union
 from ._base import DocumentConverter, DocumentConverterResult
 from ._media_converter import MediaConverter
 
+logger = logging.getLogger(__name__)
+
 # Optional Transcription support
 IS_AUDIO_TRANSCRIPTION_CAPABLE = False
 IS_WHISPER_CAPABLE = False
@@ -82,13 +85,18 @@ class WavConverter(MediaConverter):
         )
 
     def _transcribe_with_whisper(self, local_path: str, client) -> str:
-        """Transcribe audio using OpenAI's Whisper model."""
-        with open(local_path, "rb") as audio_file:
-            transcription = client.audio.transcriptions.create(
-                model="whisper-1",
-                file=audio_file
-            )
-            return transcription.text.strip()
+        """Transcribe audio using OpenAI's Whisper model, falling back to speech_recognition if it fails."""
+        try:
+            with open(local_path, "rb") as audio_file:
+                transcription = client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio_file
+                )
+                return transcription.text.strip()
+        except Exception as e:
+            logger.warning(f"Whisper transcription attempt failed: {str(e)}")
+            logger.info("Falling back to speech_recognition...")
+            return self._transcribe_audio(local_path)
 
     def _transcribe_audio(self, local_path) -> str:
         recognizer = sr.Recognizer()