add test, adjust docstring
This commit is contained in:
parent
997f244d8f
commit
3207e3ab38
3 changed files with 14 additions and 2 deletions
|
|
@ -1083,7 +1083,6 @@ class OutlookMsgConverter(DocumentConverter):
|
|||
Uses the olefile package to parse the .msg file structure and extract:
|
||||
- Email headers (From, To, Subject, Date)
|
||||
- Email body content
|
||||
- Attachments (listed but not converted)
|
||||
"""
|
||||
|
||||
def convert(
|
||||
|
|
@ -1101,7 +1100,7 @@ class OutlookMsgConverter(DocumentConverter):
|
|||
|
||||
# Get headers
|
||||
headers = {
|
||||
"From": self._get_stream_data(msg, "__substg1.0_0C1A001F"),
|
||||
"From": self._get_stream_data(msg, "__substg1.0_0C1F001F"),
|
||||
"To": self._get_stream_data(msg, "__substg1.0_0E04001F"),
|
||||
"Subject": self._get_stream_data(msg, "__substg1.0_0037001F"),
|
||||
}
|
||||
|
|
|
|||
BIN
tests/test_files/test_outlook_msg.msg
vendored
Normal file
BIN
tests/test_files/test_outlook_msg.msg
vendored
Normal file
Binary file not shown.
|
|
@ -63,6 +63,15 @@ DOCX_TEST_STRINGS = [
|
|||
"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
|
||||
]
|
||||
|
||||
MSG_TEST_STRINGS = [
|
||||
"# Email Message",
|
||||
"**From:** test.sender@example.com",
|
||||
"**To:** test.recipient@example.com",
|
||||
"**Subject:** Test Email Message",
|
||||
"## Content",
|
||||
"This is the body of the test email message",
|
||||
]
|
||||
|
||||
DOCX_COMMENT_TEST_STRINGS = [
|
||||
"314b0a30-5b04-470b-b9f7-eed2c2bec74a",
|
||||
"49e168b7-d2ae-407f-a055-2167576f39a1",
|
||||
|
|
@ -232,6 +241,10 @@ def test_markitdown_local() -> None:
|
|||
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_mskanji.csv"))
|
||||
validate_strings(result, CSV_CP932_TEST_STRINGS)
|
||||
|
||||
# Test MSG (Outlook email) processing
|
||||
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_outlook_msg.msg"))
|
||||
validate_strings(result, MSG_TEST_STRINGS)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
skip_exiftool,
|
||||
|
|
|
|||
Loading…
Reference in a new issue