Add new test cases for MarkItDown to cover LLM, remote, and local file conversions. Implement tests for handling deprecation warnings, external URL queries, and EXIF data processing. Ensure tests are skipped when necessary environment conditions are not met, improving test reliability and maintainability.
34 lines
952 B
Python
34 lines
952 B
Python
import os
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from markitdown import MarkItDown
|
|
|
|
TEST_FILES_DIR = Path(__file__).parent.parent / "test_files"
|
|
|
|
# Skip exiftool tests if not installed
|
|
skip_exiftool = shutil.which("exiftool") is None
|
|
|
|
JPG_TEST_EXIFTOOL = {
|
|
"Author": "AutoGen Authors",
|
|
"Title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
|
|
"Description": "AutoGen enables diverse LLM-based applications",
|
|
"ImageSize": "1615x1967",
|
|
"DateTimeOriginal": "2024:03:14 22:10:00",
|
|
}
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
skip_exiftool,
|
|
reason="do not run if exiftool is not installed",
|
|
)
|
|
def test_markitdown_exiftool() -> None:
|
|
markitdown = MarkItDown()
|
|
|
|
# Test JPG metadata processing
|
|
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.jpg"))
|
|
for key in JPG_TEST_EXIFTOOL:
|
|
target = f"{key}: {JPG_TEST_EXIFTOOL[key]}"
|
|
assert target in result.text_content
|