Merge 1ffb875bf6 into 81e3f24acd
This commit is contained in:
commit
5cbd3ceb6e
3 changed files with 18 additions and 2 deletions
|
|
@ -2,10 +2,16 @@
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
from ._markitdown import MarkItDown, FileConversionException, UnsupportedFormatException
|
from ._markitdown import (
|
||||||
|
MarkItDown,
|
||||||
|
FileConversionException,
|
||||||
|
UnsupportedFormatException,
|
||||||
|
FileDoesNotExistException,
|
||||||
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"MarkItDown",
|
"MarkItDown",
|
||||||
"FileConversionException",
|
"FileConversionException",
|
||||||
|
"FileDoesNotExistException",
|
||||||
"UnsupportedFormatException",
|
"UnsupportedFormatException",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -845,6 +845,10 @@ class UnsupportedFormatException(BaseException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class FileDoesNotExistException(BaseException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class MarkItDown:
|
class MarkItDown:
|
||||||
"""(In preview) An extremely simple text-based document reader, suitable for LLM use.
|
"""(In preview) An extremely simple text-based document reader, suitable for LLM use.
|
||||||
This reader will convert common file-types or webpages to Markdown."""
|
This reader will convert common file-types or webpages to Markdown."""
|
||||||
|
|
@ -911,6 +915,9 @@ class MarkItDown:
|
||||||
ext = kwargs.get("file_extension")
|
ext = kwargs.get("file_extension")
|
||||||
extensions = [ext] if ext is not None else []
|
extensions = [ext] if ext is not None else []
|
||||||
|
|
||||||
|
if not os.path.exists(path):
|
||||||
|
raise FileDoesNotExistException(f"File {path} does not exist")
|
||||||
|
|
||||||
# Get extension alternatives from the path and puremagic
|
# Get extension alternatives from the path and puremagic
|
||||||
base, ext = os.path.splitext(path)
|
base, ext = os.path.splitext(path)
|
||||||
self._append_ext(extensions, ext)
|
self._append_ext(extensions, ext)
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import shutil
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from markitdown import MarkItDown
|
from markitdown import MarkItDown, FileDoesNotExistException
|
||||||
|
|
||||||
skip_remote = (
|
skip_remote = (
|
||||||
True if os.environ.get("GITHUB_ACTIONS") else False
|
True if os.environ.get("GITHUB_ACTIONS") else False
|
||||||
|
|
@ -144,6 +144,9 @@ def test_markitdown_local() -> None:
|
||||||
text_content = result.text_content.replace("\\", "")
|
text_content = result.text_content.replace("\\", "")
|
||||||
assert test_string in text_content
|
assert test_string in text_content
|
||||||
|
|
||||||
|
with pytest.raises(FileDoesNotExistException):
|
||||||
|
markitdown.convert(os.path.join(TEST_FILES_DIR, "missing_file.pdf"))
|
||||||
|
|
||||||
# Test Wikipedia processing
|
# Test Wikipedia processing
|
||||||
result = markitdown.convert(
|
result = markitdown.convert(
|
||||||
os.path.join(TEST_FILES_DIR, "test_wikipedia.html"), url=WIKIPEDIA_TEST_URL
|
os.path.join(TEST_FILES_DIR, "test_wikipedia.html"), url=WIKIPEDIA_TEST_URL
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue