Merge 39d5a088b8 into 73ba69d8cd
This commit is contained in:
commit
bc5a57ec6e
4 changed files with 84 additions and 0 deletions
|
|
@ -46,6 +46,13 @@ dependencies = [
|
||||||
"azure-identity"
|
"azure-identity"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[tool.uv]
|
||||||
|
dev-dependencies = [
|
||||||
|
"pytest>=7.0",
|
||||||
|
"pytest-asyncio>=0.23.0",
|
||||||
|
"black>=23.7.0",
|
||||||
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Documentation = "https://github.com/microsoft/markitdown#readme"
|
Documentation = "https://github.com/microsoft/markitdown#readme"
|
||||||
Issues = "https://github.com/microsoft/markitdown/issues"
|
Issues = "https://github.com/microsoft/markitdown/issues"
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,11 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
from ._markitdown import MarkItDown, FileConversionException, UnsupportedFormatException
|
from ._markitdown import MarkItDown, FileConversionException, UnsupportedFormatException
|
||||||
|
from ._async_wrapper import AsyncMarkItDown
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"MarkItDown",
|
"MarkItDown",
|
||||||
|
"AsyncMarkItDown",
|
||||||
"FileConversionException",
|
"FileConversionException",
|
||||||
"UnsupportedFormatException",
|
"UnsupportedFormatException",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
46
src/markitdown/_async_wrapper.py
Normal file
46
src/markitdown/_async_wrapper.py
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
"""Async wrapper for MarkItDown."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from functools import partial
|
||||||
|
from typing import Optional, Union
|
||||||
|
|
||||||
|
from ._markitdown import MarkItDown, DocumentConverterResult
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncMarkItDown:
|
||||||
|
"""Async wrapper for MarkItDown that runs operations in a thread pool."""
|
||||||
|
|
||||||
|
def __init__(self, markitdown: Optional[MarkItDown] = None):
|
||||||
|
"""Initialize the async wrapper.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
markitdown: Optional MarkItDown instance to wrap. If not provided,
|
||||||
|
a new instance will be created.
|
||||||
|
"""
|
||||||
|
self._markitdown = markitdown or MarkItDown()
|
||||||
|
self._loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
"""Async context manager entry."""
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
"""Async context manager exit."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def convert(self, file_path: str, **kwargs) -> DocumentConverterResult:
|
||||||
|
"""Convert a file to markdown asynchronously.
|
||||||
|
|
||||||
|
This runs the synchronous convert operation in a thread pool to avoid
|
||||||
|
blocking the event loop.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the file to convert
|
||||||
|
**kwargs: Additional arguments to pass to the converter
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DocumentConverterResult containing the converted markdown
|
||||||
|
"""
|
||||||
|
# Run the synchronous convert in a thread pool
|
||||||
|
func = partial(self._markitdown.convert, file_path, **kwargs)
|
||||||
|
return await self._loop.run_in_executor(None, func)
|
||||||
29
tests/test_async_markitdown.py
Normal file
29
tests/test_async_markitdown.py
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
#!/usr/bin/env python3 -m pytest
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from markitdown import AsyncMarkItDown
|
||||||
|
|
||||||
|
TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "test_files")
|
||||||
|
|
||||||
|
DOCX_TEST_STRINGS = [
|
||||||
|
"314b0a30-5b04-470b-b9f7-eed2c2bec74a",
|
||||||
|
"49e168b7-d2ae-407f-a055-2167576f39a1",
|
||||||
|
"## d666f1f7-46cb-42bd-9a39-9a39cf2a509f",
|
||||||
|
"# Abstract",
|
||||||
|
"# Introduction",
|
||||||
|
]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_async_markitdown_basic():
|
||||||
|
"""Test basic async functionality with a local file."""
|
||||||
|
async with AsyncMarkItDown() as markitdown:
|
||||||
|
result = await markitdown.convert(os.path.join(TEST_FILES_DIR, "test.docx"))
|
||||||
|
|
||||||
|
# Verify the conversion worked as expected
|
||||||
|
for test_string in DOCX_TEST_STRINGS:
|
||||||
|
text_content = result.text_content.replace("\\", "")
|
||||||
|
assert test_string in text_content
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pytest.main([__file__])
|
||||||
Loading…
Reference in a new issue