diff --git a/src/markitdown/__init__.py b/src/markitdown/__init__.py index 482f428..737ca06 100644 --- a/src/markitdown/__init__.py +++ b/src/markitdown/__init__.py @@ -2,10 +2,16 @@ # # SPDX-License-Identifier: MIT -from ._markitdown import MarkItDown, FileConversionException, UnsupportedFormatException +from ._markitdown import ( + MarkItDown, + FileConversionException, + UnsupportedFormatException, + FileDoesNotExistException, +) __all__ = [ "MarkItDown", "FileConversionException", + "FileDoesNotExistException", "UnsupportedFormatException", ] diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 96997cf..0b0b991 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -845,6 +845,10 @@ class UnsupportedFormatException(BaseException): pass +class FileDoesNotExistException(BaseException): + pass + + class MarkItDown: """(In preview) An extremely simple text-based document reader, suitable for LLM use. This reader will convert common file-types or webpages to Markdown.""" @@ -911,6 +915,9 @@ class MarkItDown: ext = kwargs.get("file_extension") extensions = [ext] if ext is not None else [] + if not os.path.exists(path): + raise FileDoesNotExistException(f"File {path} does not exist") + # Get extension alternatives from the path and puremagic base, ext = os.path.splitext(path) self._append_ext(extensions, ext) diff --git a/tests/test_markitdown.py b/tests/test_markitdown.py index 94fd886..b2fb3ef 100644 --- a/tests/test_markitdown.py +++ b/tests/test_markitdown.py @@ -6,7 +6,7 @@ import shutil import pytest import requests -from markitdown import MarkItDown +from markitdown import MarkItDown, FileDoesNotExistException skip_remote = ( True if os.environ.get("GITHUB_ACTIONS") else False @@ -144,6 +144,9 @@ def test_markitdown_local() -> None: text_content = result.text_content.replace("\\", "") assert test_string in text_content + with pytest.raises(FileDoesNotExistException): + markitdown.convert(os.path.join(TEST_FILES_DIR, "missing_file.pdf")) + # Test Wikipedia processing result = markitdown.convert( os.path.join(TEST_FILES_DIR, "test_wikipedia.html"), url=WIKIPEDIA_TEST_URL