diff --git a/packages/markitdown/pyproject.toml b/packages/markitdown/pyproject.toml index c053c7b..d0f515e 100644 --- a/packages/markitdown/pyproject.toml +++ b/packages/markitdown/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ dependencies = [ "beautifulsoup4", "requests", - "markdownify~=0.14.1", + "markdownify", "puremagic", "pathvalidate", "charset-normalizer", @@ -78,11 +78,14 @@ extra-dependencies = [ ] [tool.hatch.envs.types] +features = ["all"] extra-dependencies = [ + "openai", "mypy>=1.0.0", ] + [tool.hatch.envs.types.scripts] -check = "mypy --install-types --non-interactive {args:src/markitdown tests}" +check = "mypy --install-types --non-interactive --ignore-missing-imports {args:src/markitdown tests}" [tool.coverage.run] source_pkgs = ["markitdown", "tests"] diff --git a/packages/markitdown/src/markitdown/_exceptions.py b/packages/markitdown/src/markitdown/_exceptions.py index abfebc6..93f8f0e 100644 --- a/packages/markitdown/src/markitdown/_exceptions.py +++ b/packages/markitdown/src/markitdown/_exceptions.py @@ -68,6 +68,9 @@ class FileConversionException(MarkItDownException): else: message = f"File conversion failed after {len(attempts)} attempts:\n" for attempt in attempts: - message += f" - {type(attempt.converter).__name__} threw {attempt.exc_info[0].__name__} with message: {attempt.exc_info[1]}\n" + if attempt.exc_info is None: + message += " - {type(attempt.converter).__name__} provided no execution info." + else: + message += f" - {type(attempt.converter).__name__} threw {attempt.exc_info[0].__name__} with message: {attempt.exc_info[1]}\n" super().__init__(message) diff --git a/packages/markitdown/src/markitdown/_stream_info.py b/packages/markitdown/src/markitdown/_stream_info.py index 8c3cb90..32a51ef 100644 --- a/packages/markitdown/src/markitdown/_stream_info.py +++ b/packages/markitdown/src/markitdown/_stream_info.py @@ -62,7 +62,8 @@ def _guess_stream_info_from_stream( # Add a guess purely based on the filename hint if filename_hint: try: - mimetype, _ = mimetypes.guess_file_type(filename_hint) + # Requires Python 3.13+ + mimetype, _ = mimetypes.guess_file_type(filename_hint) # type: ignore except AttributeError: mimetype, _ = mimetypes.guess_type(filename_hint) diff --git a/packages/markitdown/src/markitdown/converters/_markdownify.py b/packages/markitdown/src/markitdown/converters/_markdownify.py index e15f607..ae99c0b 100644 --- a/packages/markitdown/src/markitdown/converters/_markdownify.py +++ b/packages/markitdown/src/markitdown/converters/_markdownify.py @@ -1,7 +1,7 @@ import re import markdownify -from typing import Any +from typing import Any, Optional from urllib.parse import quote, unquote, urlparse, urlunparse @@ -20,7 +20,14 @@ class _CustomMarkdownify(markdownify.MarkdownConverter): # Explicitly cast options to the expected type if necessary super().__init__(**options) - def convert_hn(self, n: int, el: Any, text: str, convert_as_inline: bool) -> str: + def convert_hn( + self, + n: int, + el: Any, + text: str, + convert_as_inline: Optional[bool] = False, + **kwargs, + ) -> str: """Same as usual, but be sure to start with a new line""" if not convert_as_inline: if not re.search(r"^\n", text): @@ -28,7 +35,13 @@ class _CustomMarkdownify(markdownify.MarkdownConverter): return super().convert_hn(n, el, text, convert_as_inline) # type: ignore - def convert_a(self, el: Any, text: str, convert_as_inline: bool): + def convert_a( + self, + el: Any, + text: str, + convert_as_inline: Optional[bool] = False, + **kwargs, + ): """Same as usual converter, but removes Javascript links and escapes URIs.""" prefix, suffix, text = markdownify.chomp(text) # type: ignore if not text: @@ -68,7 +81,13 @@ class _CustomMarkdownify(markdownify.MarkdownConverter): else text ) - def convert_img(self, el: Any, text: str, convert_as_inline: bool) -> str: + def convert_img( + self, + el: Any, + text: str, + convert_as_inline: Optional[bool] = False, + **kwargs, + ) -> str: """Same as usual converter, but removes data URIs""" alt = el.attrs.get("alt", None) or ""