add test, adjust docstring
This commit is contained in:
parent
997f244d8f
commit
3207e3ab38
3 changed files with 14 additions and 2 deletions
|
|
@ -1083,7 +1083,6 @@ class OutlookMsgConverter(DocumentConverter):
|
||||||
Uses the olefile package to parse the .msg file structure and extract:
|
Uses the olefile package to parse the .msg file structure and extract:
|
||||||
- Email headers (From, To, Subject, Date)
|
- Email headers (From, To, Subject, Date)
|
||||||
- Email body content
|
- Email body content
|
||||||
- Attachments (listed but not converted)
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def convert(
|
def convert(
|
||||||
|
|
@ -1101,7 +1100,7 @@ class OutlookMsgConverter(DocumentConverter):
|
||||||
|
|
||||||
# Get headers
|
# Get headers
|
||||||
headers = {
|
headers = {
|
||||||
"From": self._get_stream_data(msg, "__substg1.0_0C1A001F"),
|
"From": self._get_stream_data(msg, "__substg1.0_0C1F001F"),
|
||||||
"To": self._get_stream_data(msg, "__substg1.0_0E04001F"),
|
"To": self._get_stream_data(msg, "__substg1.0_0E04001F"),
|
||||||
"Subject": self._get_stream_data(msg, "__substg1.0_0037001F"),
|
"Subject": self._get_stream_data(msg, "__substg1.0_0037001F"),
|
||||||
}
|
}
|
||||||
|
|
|
||||||
BIN
tests/test_files/test_outlook_msg.msg
vendored
Normal file
BIN
tests/test_files/test_outlook_msg.msg
vendored
Normal file
Binary file not shown.
|
|
@ -63,6 +63,15 @@ DOCX_TEST_STRINGS = [
|
||||||
"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
|
"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
MSG_TEST_STRINGS = [
|
||||||
|
"# Email Message",
|
||||||
|
"**From:** test.sender@example.com",
|
||||||
|
"**To:** test.recipient@example.com",
|
||||||
|
"**Subject:** Test Email Message",
|
||||||
|
"## Content",
|
||||||
|
"This is the body of the test email message",
|
||||||
|
]
|
||||||
|
|
||||||
DOCX_COMMENT_TEST_STRINGS = [
|
DOCX_COMMENT_TEST_STRINGS = [
|
||||||
"314b0a30-5b04-470b-b9f7-eed2c2bec74a",
|
"314b0a30-5b04-470b-b9f7-eed2c2bec74a",
|
||||||
"49e168b7-d2ae-407f-a055-2167576f39a1",
|
"49e168b7-d2ae-407f-a055-2167576f39a1",
|
||||||
|
|
@ -232,6 +241,10 @@ def test_markitdown_local() -> None:
|
||||||
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_mskanji.csv"))
|
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_mskanji.csv"))
|
||||||
validate_strings(result, CSV_CP932_TEST_STRINGS)
|
validate_strings(result, CSV_CP932_TEST_STRINGS)
|
||||||
|
|
||||||
|
# Test MSG (Outlook email) processing
|
||||||
|
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_outlook_msg.msg"))
|
||||||
|
validate_strings(result, MSG_TEST_STRINGS)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
skip_exiftool,
|
skip_exiftool,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue