Merge 1ffb875bf6 into 81e3f24acd
This commit is contained in:
commit
5cbd3ceb6e
3 changed files with 18 additions and 2 deletions
|
|
@ -2,10 +2,16 @@
|
|||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from ._markitdown import MarkItDown, FileConversionException, UnsupportedFormatException
|
||||
from ._markitdown import (
|
||||
MarkItDown,
|
||||
FileConversionException,
|
||||
UnsupportedFormatException,
|
||||
FileDoesNotExistException,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"MarkItDown",
|
||||
"FileConversionException",
|
||||
"FileDoesNotExistException",
|
||||
"UnsupportedFormatException",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -845,6 +845,10 @@ class UnsupportedFormatException(BaseException):
|
|||
pass
|
||||
|
||||
|
||||
class FileDoesNotExistException(BaseException):
|
||||
pass
|
||||
|
||||
|
||||
class MarkItDown:
|
||||
"""(In preview) An extremely simple text-based document reader, suitable for LLM use.
|
||||
This reader will convert common file-types or webpages to Markdown."""
|
||||
|
|
@ -911,6 +915,9 @@ class MarkItDown:
|
|||
ext = kwargs.get("file_extension")
|
||||
extensions = [ext] if ext is not None else []
|
||||
|
||||
if not os.path.exists(path):
|
||||
raise FileDoesNotExistException(f"File {path} does not exist")
|
||||
|
||||
# Get extension alternatives from the path and puremagic
|
||||
base, ext = os.path.splitext(path)
|
||||
self._append_ext(extensions, ext)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import shutil
|
|||
import pytest
|
||||
import requests
|
||||
|
||||
from markitdown import MarkItDown
|
||||
from markitdown import MarkItDown, FileDoesNotExistException
|
||||
|
||||
skip_remote = (
|
||||
True if os.environ.get("GITHUB_ACTIONS") else False
|
||||
|
|
@ -144,6 +144,9 @@ def test_markitdown_local() -> None:
|
|||
text_content = result.text_content.replace("\\", "")
|
||||
assert test_string in text_content
|
||||
|
||||
with pytest.raises(FileDoesNotExistException):
|
||||
markitdown.convert(os.path.join(TEST_FILES_DIR, "missing_file.pdf"))
|
||||
|
||||
# Test Wikipedia processing
|
||||
result = markitdown.convert(
|
||||
os.path.join(TEST_FILES_DIR, "test_wikipedia.html"), url=WIKIPEDIA_TEST_URL
|
||||
|
|
|
|||
Loading…
Reference in a new issue