Updated markdownify dependency.
This commit is contained in:
parent
cc38144752
commit
4d097aa379
4 changed files with 34 additions and 8 deletions
|
|
@ -26,7 +26,7 @@ classifiers = [
|
|||
dependencies = [
|
||||
"beautifulsoup4",
|
||||
"requests",
|
||||
"markdownify~=0.14.1",
|
||||
"markdownify",
|
||||
"puremagic",
|
||||
"pathvalidate",
|
||||
"charset-normalizer",
|
||||
|
|
@ -78,11 +78,14 @@ extra-dependencies = [
|
|||
]
|
||||
|
||||
[tool.hatch.envs.types]
|
||||
features = ["all"]
|
||||
extra-dependencies = [
|
||||
"openai",
|
||||
"mypy>=1.0.0",
|
||||
]
|
||||
|
||||
[tool.hatch.envs.types.scripts]
|
||||
check = "mypy --install-types --non-interactive {args:src/markitdown tests}"
|
||||
check = "mypy --install-types --non-interactive --ignore-missing-imports {args:src/markitdown tests}"
|
||||
|
||||
[tool.coverage.run]
|
||||
source_pkgs = ["markitdown", "tests"]
|
||||
|
|
|
|||
|
|
@ -68,6 +68,9 @@ class FileConversionException(MarkItDownException):
|
|||
else:
|
||||
message = f"File conversion failed after {len(attempts)} attempts:\n"
|
||||
for attempt in attempts:
|
||||
message += f" - {type(attempt.converter).__name__} threw {attempt.exc_info[0].__name__} with message: {attempt.exc_info[1]}\n"
|
||||
if attempt.exc_info is None:
|
||||
message += " - {type(attempt.converter).__name__} provided no execution info."
|
||||
else:
|
||||
message += f" - {type(attempt.converter).__name__} threw {attempt.exc_info[0].__name__} with message: {attempt.exc_info[1]}\n"
|
||||
|
||||
super().__init__(message)
|
||||
|
|
|
|||
|
|
@ -62,7 +62,8 @@ def _guess_stream_info_from_stream(
|
|||
# Add a guess purely based on the filename hint
|
||||
if filename_hint:
|
||||
try:
|
||||
mimetype, _ = mimetypes.guess_file_type(filename_hint)
|
||||
# Requires Python 3.13+
|
||||
mimetype, _ = mimetypes.guess_file_type(filename_hint) # type: ignore
|
||||
except AttributeError:
|
||||
mimetype, _ = mimetypes.guess_type(filename_hint)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import re
|
||||
import markdownify
|
||||
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import quote, unquote, urlparse, urlunparse
|
||||
|
||||
|
||||
|
|
@ -20,7 +20,14 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
|
|||
# Explicitly cast options to the expected type if necessary
|
||||
super().__init__(**options)
|
||||
|
||||
def convert_hn(self, n: int, el: Any, text: str, convert_as_inline: bool) -> str:
|
||||
def convert_hn(
|
||||
self,
|
||||
n: int,
|
||||
el: Any,
|
||||
text: str,
|
||||
convert_as_inline: Optional[bool] = False,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""Same as usual, but be sure to start with a new line"""
|
||||
if not convert_as_inline:
|
||||
if not re.search(r"^\n", text):
|
||||
|
|
@ -28,7 +35,13 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
|
|||
|
||||
return super().convert_hn(n, el, text, convert_as_inline) # type: ignore
|
||||
|
||||
def convert_a(self, el: Any, text: str, convert_as_inline: bool):
|
||||
def convert_a(
|
||||
self,
|
||||
el: Any,
|
||||
text: str,
|
||||
convert_as_inline: Optional[bool] = False,
|
||||
**kwargs,
|
||||
):
|
||||
"""Same as usual converter, but removes Javascript links and escapes URIs."""
|
||||
prefix, suffix, text = markdownify.chomp(text) # type: ignore
|
||||
if not text:
|
||||
|
|
@ -68,7 +81,13 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
|
|||
else text
|
||||
)
|
||||
|
||||
def convert_img(self, el: Any, text: str, convert_as_inline: bool) -> str:
|
||||
def convert_img(
|
||||
self,
|
||||
el: Any,
|
||||
text: str,
|
||||
convert_as_inline: Optional[bool] = False,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""Same as usual converter, but removes data URIs"""
|
||||
|
||||
alt = el.attrs.get("alt", None) or ""
|
||||
|
|
|
|||
Loading…
Reference in a new issue