From 1f3d3ef524716aa9b89ae574e985018a02f0d294 Mon Sep 17 00:00:00 2001 From: Ji Zhang Date: Tue, 11 Feb 2025 17:31:11 -0800 Subject: [PATCH] add test for audio. commented out by default. --- packages/markitdown/tests/test_markitdown.py | 21 ++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/packages/markitdown/tests/test_markitdown.py b/packages/markitdown/tests/test_markitdown.py index efd45ac..c7ce726 100644 --- a/packages/markitdown/tests/test_markitdown.py +++ b/packages/markitdown/tests/test_markitdown.py @@ -150,6 +150,10 @@ JSON_TEST_STRINGS = [ "9700dc99-6685-40b4-9a3a-5e406dcb37f3", ] +AUDIO_TEST_STRINGS = [ + "small step", +] + # --- Helper Functions --- def validate_strings(result, expected_strings, exclude_strings=None): @@ -325,10 +329,27 @@ def test_markitdown_llm() -> None: assert test_string in result.text_content.lower() +@pytest.mark.skipif( + skip_llm, + reason="do not run llm tests without a key", +) +def test_markitdown_audio_transcription() -> None: + """Test audio transcription capabilities.""" + client = openai.OpenAI() + markitdown = MarkItDown(llm_client=client) + + # Test WAV transcription with Whisper + result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.wav")) + + for test_string in AUDIO_TEST_STRINGS: + assert test_string.lower() in result.text_content.lower() + + if __name__ == "__main__": """Runs this file's tests from the command line.""" test_markitdown_remote() test_markitdown_local() test_markitdown_exiftool() # test_markitdown_llm() + # test_markitdown_audio_transcription() print("All tests passed!")