Merge 39d5a088b8 into 73ba69d8cd
This commit is contained in:
commit
bc5a57ec6e
4 changed files with 84 additions and 0 deletions
|
|
@ -46,6 +46,13 @@ dependencies = [
|
|||
"azure-identity"
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
dev-dependencies = [
|
||||
"pytest>=7.0",
|
||||
"pytest-asyncio>=0.23.0",
|
||||
"black>=23.7.0",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Documentation = "https://github.com/microsoft/markitdown#readme"
|
||||
Issues = "https://github.com/microsoft/markitdown/issues"
|
||||
|
|
|
|||
|
|
@ -3,9 +3,11 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from ._markitdown import MarkItDown, FileConversionException, UnsupportedFormatException
|
||||
from ._async_wrapper import AsyncMarkItDown
|
||||
|
||||
__all__ = [
|
||||
"MarkItDown",
|
||||
"AsyncMarkItDown",
|
||||
"FileConversionException",
|
||||
"UnsupportedFormatException",
|
||||
]
|
||||
|
|
|
|||
46
src/markitdown/_async_wrapper.py
Normal file
46
src/markitdown/_async_wrapper.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
"""Async wrapper for MarkItDown."""
|
||||
|
||||
import asyncio
|
||||
from functools import partial
|
||||
from typing import Optional, Union
|
||||
|
||||
from ._markitdown import MarkItDown, DocumentConverterResult
|
||||
|
||||
|
||||
class AsyncMarkItDown:
|
||||
"""Async wrapper for MarkItDown that runs operations in a thread pool."""
|
||||
|
||||
def __init__(self, markitdown: Optional[MarkItDown] = None):
|
||||
"""Initialize the async wrapper.
|
||||
|
||||
Args:
|
||||
markitdown: Optional MarkItDown instance to wrap. If not provided,
|
||||
a new instance will be created.
|
||||
"""
|
||||
self._markitdown = markitdown or MarkItDown()
|
||||
self._loop = asyncio.get_event_loop()
|
||||
|
||||
async def __aenter__(self):
|
||||
"""Async context manager entry."""
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Async context manager exit."""
|
||||
pass
|
||||
|
||||
async def convert(self, file_path: str, **kwargs) -> DocumentConverterResult:
|
||||
"""Convert a file to markdown asynchronously.
|
||||
|
||||
This runs the synchronous convert operation in a thread pool to avoid
|
||||
blocking the event loop.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to convert
|
||||
**kwargs: Additional arguments to pass to the converter
|
||||
|
||||
Returns:
|
||||
DocumentConverterResult containing the converted markdown
|
||||
"""
|
||||
# Run the synchronous convert in a thread pool
|
||||
func = partial(self._markitdown.convert, file_path, **kwargs)
|
||||
return await self._loop.run_in_executor(None, func)
|
||||
29
tests/test_async_markitdown.py
Normal file
29
tests/test_async_markitdown.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env python3 -m pytest
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from markitdown import AsyncMarkItDown
|
||||
|
||||
TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "test_files")
|
||||
|
||||
DOCX_TEST_STRINGS = [
|
||||
"314b0a30-5b04-470b-b9f7-eed2c2bec74a",
|
||||
"49e168b7-d2ae-407f-a055-2167576f39a1",
|
||||
"## d666f1f7-46cb-42bd-9a39-9a39cf2a509f",
|
||||
"# Abstract",
|
||||
"# Introduction",
|
||||
]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_markitdown_basic():
|
||||
"""Test basic async functionality with a local file."""
|
||||
async with AsyncMarkItDown() as markitdown:
|
||||
result = await markitdown.convert(os.path.join(TEST_FILES_DIR, "test.docx"))
|
||||
|
||||
# Verify the conversion worked as expected
|
||||
for test_string in DOCX_TEST_STRINGS:
|
||||
text_content = result.text_content.replace("\\", "")
|
||||
assert test_string in text_content
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
Loading…
Reference in a new issue