add test, adjust docstring

This commit is contained in:
makermotion 2024-12-22 12:30:53 +03:00
parent 997f244d8f
commit 3207e3ab38
3 changed files with 14 additions and 2 deletions

View file

@ -1083,7 +1083,6 @@ class OutlookMsgConverter(DocumentConverter):
Uses the olefile package to parse the .msg file structure and extract: Uses the olefile package to parse the .msg file structure and extract:
- Email headers (From, To, Subject, Date) - Email headers (From, To, Subject, Date)
- Email body content - Email body content
- Attachments (listed but not converted)
""" """
def convert( def convert(
@ -1101,7 +1100,7 @@ class OutlookMsgConverter(DocumentConverter):
# Get headers # Get headers
headers = { headers = {
"From": self._get_stream_data(msg, "__substg1.0_0C1A001F"), "From": self._get_stream_data(msg, "__substg1.0_0C1F001F"),
"To": self._get_stream_data(msg, "__substg1.0_0E04001F"), "To": self._get_stream_data(msg, "__substg1.0_0E04001F"),
"Subject": self._get_stream_data(msg, "__substg1.0_0037001F"), "Subject": self._get_stream_data(msg, "__substg1.0_0037001F"),
} }

BIN
tests/test_files/test_outlook_msg.msg vendored Normal file

Binary file not shown.

View file

@ -63,6 +63,15 @@ DOCX_TEST_STRINGS = [
"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation", "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
] ]
MSG_TEST_STRINGS = [
"# Email Message",
"**From:** test.sender@example.com",
"**To:** test.recipient@example.com",
"**Subject:** Test Email Message",
"## Content",
"This is the body of the test email message",
]
DOCX_COMMENT_TEST_STRINGS = [ DOCX_COMMENT_TEST_STRINGS = [
"314b0a30-5b04-470b-b9f7-eed2c2bec74a", "314b0a30-5b04-470b-b9f7-eed2c2bec74a",
"49e168b7-d2ae-407f-a055-2167576f39a1", "49e168b7-d2ae-407f-a055-2167576f39a1",
@ -232,6 +241,10 @@ def test_markitdown_local() -> None:
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_mskanji.csv")) result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_mskanji.csv"))
validate_strings(result, CSV_CP932_TEST_STRINGS) validate_strings(result, CSV_CP932_TEST_STRINGS)
# Test MSG (Outlook email) processing
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_outlook_msg.msg"))
validate_strings(result, MSG_TEST_STRINGS)
@pytest.mark.skipif( @pytest.mark.skipif(
skip_exiftool, skip_exiftool,