Proper handling when trying to read a non-existing file

This commit is contained in:
Alex 2024-12-16 01:54:43 +02:00
parent 81e3f24acd
commit 1ffb875bf6
3 changed files with 18 additions and 2 deletions

View file

@ -2,10 +2,16 @@
#
# SPDX-License-Identifier: MIT
from ._markitdown import MarkItDown, FileConversionException, UnsupportedFormatException
from ._markitdown import (
MarkItDown,
FileConversionException,
UnsupportedFormatException,
FileDoesNotExistException,
)
__all__ = [
"MarkItDown",
"FileConversionException",
"FileDoesNotExistException",
"UnsupportedFormatException",
]

View file

@ -845,6 +845,10 @@ class UnsupportedFormatException(BaseException):
pass
class FileDoesNotExistException(BaseException):
pass
class MarkItDown:
"""(In preview) An extremely simple text-based document reader, suitable for LLM use.
This reader will convert common file-types or webpages to Markdown."""
@ -911,6 +915,9 @@ class MarkItDown:
ext = kwargs.get("file_extension")
extensions = [ext] if ext is not None else []
if not os.path.exists(path):
raise FileDoesNotExistException(f"File {path} does not exist")
# Get extension alternatives from the path and puremagic
base, ext = os.path.splitext(path)
self._append_ext(extensions, ext)

View file

@ -6,7 +6,7 @@ import shutil
import pytest
import requests
from markitdown import MarkItDown
from markitdown import MarkItDown, FileDoesNotExistException
skip_remote = (
True if os.environ.get("GITHUB_ACTIONS") else False
@ -144,6 +144,9 @@ def test_markitdown_local() -> None:
text_content = result.text_content.replace("\\", "")
assert test_string in text_content
with pytest.raises(FileDoesNotExistException):
markitdown.convert(os.path.join(TEST_FILES_DIR, "missing_file.pdf"))
# Test Wikipedia processing
result = markitdown.convert(
os.path.join(TEST_FILES_DIR, "test_wikipedia.html"), url=WIKIPEDIA_TEST_URL