diff --git a/packages/markitdown/src/markitdown/converters/_mp3_converter.py b/packages/markitdown/src/markitdown/converters/_mp3_converter.py
index 91fd270..cbbdab0 100644
--- a/packages/markitdown/src/markitdown/converters/_mp3_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_mp3_converter.py
@@ -1,7 +1,8 @@
+import os
 import tempfile
 from typing import Union
 from ._base import DocumentConverter, DocumentConverterResult
-from ._wav_converter import WavConverter
+from ._wav_converter import WavConverter, IS_WHISPER_CAPABLE
 from warnings import resetwarnings, catch_warnings
 
 # Optional Transcription support
@@ -25,7 +26,8 @@ finally:
 
 class Mp3Converter(WavConverter):
     """
-    Converts MP3 files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` AND `pydub` are installed).
+    Converts MP3 files to markdown via extraction of metadata (if `exiftool` is installed), 
+    and speech transcription (if `speech_recognition` AND `pydub` are installed, or OpenAI Whisper is configured).
     """
 
     def __init__(
@@ -59,18 +61,27 @@ class Mp3Converter(WavConverter):
                 if f in metadata:
                     md_content += f"{f}: {metadata[f]}\n"
 
-        # Transcribe
-        if IS_AUDIO_TRANSCRIPTION_CAPABLE:
+        # Try transcribing with Whisper first if OpenAI client is available
+        llm_client = kwargs.get("llm_client")
+        if IS_WHISPER_CAPABLE and llm_client is not None:
+            try:
+                transcript = self._transcribe_with_whisper(local_path, llm_client)
+                if transcript:
+                    md_content += "\n\n### Audio Transcript (Whisper):\n" + transcript
+            except Exception as e:
+                md_content += f"\n\n### Audio Transcript:\nError transcribing with Whisper: {str(e)}"
+        # Fall back to speech_recognition if Whisper failed or isn't available
+        elif IS_AUDIO_TRANSCRIPTION_CAPABLE:
             handle, temp_path = tempfile.mkstemp(suffix=".wav")
             os.close(handle)
             try:
                 sound = pydub.AudioSegment.from_mp3(local_path)
                 sound.export(temp_path, format="wav")
-
+                
                 _args = dict()
                 _args.update(kwargs)
                 _args["file_extension"] = ".wav"
-
+                
                 try:
                     transcript = super()._transcribe_audio(temp_path).strip()
                     md_content += "\n\n### Audio Transcript:\n" + (
@@ -78,11 +89,9 @@ class Mp3Converter(WavConverter):
                     )
                 except Exception:
                     md_content += "\n\n### Audio Transcript:\nError. Could not transcribe this audio."
-
             finally:
                 os.unlink(temp_path)
 
-        # Return the result
         return DocumentConverterResult(
             title=None,
             text_content=md_content.strip(),
diff --git a/packages/markitdown/src/markitdown/converters/_wav_converter.py b/packages/markitdown/src/markitdown/converters/_wav_converter.py
index 3c8d842..0ca2b57 100644
--- a/packages/markitdown/src/markitdown/converters/_wav_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_wav_converter.py
@@ -1,20 +1,30 @@
+import logging
 from typing import Union
 from ._base import DocumentConverter, DocumentConverterResult
 from ._media_converter import MediaConverter
 
+logger = logging.getLogger(__name__)
+
 # Optional Transcription support
 IS_AUDIO_TRANSCRIPTION_CAPABLE = False
+IS_WHISPER_CAPABLE = False
 try:
     import speech_recognition as sr
-
     IS_AUDIO_TRANSCRIPTION_CAPABLE = True
 except ModuleNotFoundError:
     pass
 
+try:
+    from openai import OpenAI
+    IS_WHISPER_CAPABLE = True
+except ModuleNotFoundError:
+    pass
+
 
 class WavConverter(MediaConverter):
     """
-    Converts WAV files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` is installed).
+    Converts WAV files to markdown via extraction of metadata (if `exiftool` is installed), 
+    and speech transcription (if `speech_recognition` is installed or OpenAI Whisper is configured).
     """
 
     def __init__(
@@ -48,8 +58,17 @@ class WavConverter(MediaConverter):
                 if f in metadata:
                     md_content += f"{f}: {metadata[f]}\n"
 
-        # Transcribe
-        if IS_AUDIO_TRANSCRIPTION_CAPABLE:
+        # Try transcribing with Whisper first if OpenAI client is available
+        llm_client = kwargs.get("llm_client")
+        if IS_WHISPER_CAPABLE and llm_client is not None :
+            try:
+                transcript = self._transcribe_with_whisper(local_path, llm_client)
+                if transcript:
+                    md_content += "\n\n### Audio Transcript (Whisper):\n" + transcript
+            except Exception as e:
+                md_content += f"\n\n### Audio Transcript:\nError transcribing with Whisper: {str(e)}"
+        # Fall back to speech_recognition if Whisper failed or isn't available
+        elif IS_AUDIO_TRANSCRIPTION_CAPABLE:
             try:
                 transcript = self._transcribe_audio(local_path)
                 md_content += "\n\n### Audio Transcript:\n" + (
@@ -65,6 +84,20 @@ class WavConverter(MediaConverter):
             text_content=md_content.strip(),
         )
 
+    def _transcribe_with_whisper(self, local_path: str, client) -> str:
+        """Transcribe audio using OpenAI's Whisper model, falling back to speech_recognition if it fails."""
+        try:
+            with open(local_path, "rb") as audio_file:
+                transcription = client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio_file
+                )
+                return transcription.text.strip()
+        except Exception as e:
+            logger.warning(f"Whisper transcription attempt failed: {str(e)}")
+            logger.info("Falling back to speech_recognition...")
+            return self._transcribe_audio(local_path)
+
     def _transcribe_audio(self, local_path) -> str:
         recognizer = sr.Recognizer()
         with sr.AudioFile(local_path) as source:
diff --git a/packages/markitdown/tests/test_files/test.wav b/packages/markitdown/tests/test_files/test.wav
new file mode 100644
index 0000000..bc78141
Binary files /dev/null and b/packages/markitdown/tests/test_files/test.wav differ
diff --git a/packages/markitdown/tests/test_markitdown.py b/packages/markitdown/tests/test_markitdown.py
index 0a3b56e..59a6c5e 100644
--- a/packages/markitdown/tests/test_markitdown.py
+++ b/packages/markitdown/tests/test_markitdown.py
@@ -150,6 +150,10 @@ JSON_TEST_STRINGS = [
     "9700dc99-6685-40b4-9a3a-5e406dcb37f3",
 ]
 
+AUDIO_TEST_STRINGS = [
+    "small step",
+]
+
 
 # --- Helper Functions ---
 def validate_strings(result, expected_strings, exclude_strings=None):
@@ -340,6 +344,22 @@ def test_markitdown_llm() -> None:
         assert test_string in result.text_content.lower()
 
 
+@pytest.mark.skipif(
+    skip_llm,
+    reason="do not run llm tests without a key",
+)
+def test_markitdown_audio_transcription() -> None:
+    """Test audio transcription capabilities."""
+    client = openai.OpenAI()
+    markitdown = MarkItDown(llm_client=client)
+
+    # Test WAV transcription with Whisper
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.wav"))
+    
+    for test_string in AUDIO_TEST_STRINGS:
+        assert test_string.lower() in result.text_content.lower()
+
+
 if __name__ == "__main__":
     """Runs this file's tests from the command line."""
     test_markitdown_remote()
@@ -347,4 +367,5 @@ if __name__ == "__main__":
     test_exceptions()
     test_markitdown_exiftool()
     # test_markitdown_llm()
+    # test_markitdown_audio_transcription()
     print("All tests passed!")