Updated markdownify dependency.
This commit is contained in:
parent
cc38144752
commit
4d097aa379
4 changed files with 34 additions and 8 deletions
|
|
@ -26,7 +26,7 @@ classifiers = [
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"beautifulsoup4",
|
"beautifulsoup4",
|
||||||
"requests",
|
"requests",
|
||||||
"markdownify~=0.14.1",
|
"markdownify",
|
||||||
"puremagic",
|
"puremagic",
|
||||||
"pathvalidate",
|
"pathvalidate",
|
||||||
"charset-normalizer",
|
"charset-normalizer",
|
||||||
|
|
@ -78,11 +78,14 @@ extra-dependencies = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.hatch.envs.types]
|
[tool.hatch.envs.types]
|
||||||
|
features = ["all"]
|
||||||
extra-dependencies = [
|
extra-dependencies = [
|
||||||
|
"openai",
|
||||||
"mypy>=1.0.0",
|
"mypy>=1.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.hatch.envs.types.scripts]
|
[tool.hatch.envs.types.scripts]
|
||||||
check = "mypy --install-types --non-interactive {args:src/markitdown tests}"
|
check = "mypy --install-types --non-interactive --ignore-missing-imports {args:src/markitdown tests}"
|
||||||
|
|
||||||
[tool.coverage.run]
|
[tool.coverage.run]
|
||||||
source_pkgs = ["markitdown", "tests"]
|
source_pkgs = ["markitdown", "tests"]
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,9 @@ class FileConversionException(MarkItDownException):
|
||||||
else:
|
else:
|
||||||
message = f"File conversion failed after {len(attempts)} attempts:\n"
|
message = f"File conversion failed after {len(attempts)} attempts:\n"
|
||||||
for attempt in attempts:
|
for attempt in attempts:
|
||||||
message += f" - {type(attempt.converter).__name__} threw {attempt.exc_info[0].__name__} with message: {attempt.exc_info[1]}\n"
|
if attempt.exc_info is None:
|
||||||
|
message += " - {type(attempt.converter).__name__} provided no execution info."
|
||||||
|
else:
|
||||||
|
message += f" - {type(attempt.converter).__name__} threw {attempt.exc_info[0].__name__} with message: {attempt.exc_info[1]}\n"
|
||||||
|
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,8 @@ def _guess_stream_info_from_stream(
|
||||||
# Add a guess purely based on the filename hint
|
# Add a guess purely based on the filename hint
|
||||||
if filename_hint:
|
if filename_hint:
|
||||||
try:
|
try:
|
||||||
mimetype, _ = mimetypes.guess_file_type(filename_hint)
|
# Requires Python 3.13+
|
||||||
|
mimetype, _ = mimetypes.guess_file_type(filename_hint) # type: ignore
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
mimetype, _ = mimetypes.guess_type(filename_hint)
|
mimetype, _ = mimetypes.guess_type(filename_hint)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import re
|
import re
|
||||||
import markdownify
|
import markdownify
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any, Optional
|
||||||
from urllib.parse import quote, unquote, urlparse, urlunparse
|
from urllib.parse import quote, unquote, urlparse, urlunparse
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -20,7 +20,14 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
|
||||||
# Explicitly cast options to the expected type if necessary
|
# Explicitly cast options to the expected type if necessary
|
||||||
super().__init__(**options)
|
super().__init__(**options)
|
||||||
|
|
||||||
def convert_hn(self, n: int, el: Any, text: str, convert_as_inline: bool) -> str:
|
def convert_hn(
|
||||||
|
self,
|
||||||
|
n: int,
|
||||||
|
el: Any,
|
||||||
|
text: str,
|
||||||
|
convert_as_inline: Optional[bool] = False,
|
||||||
|
**kwargs,
|
||||||
|
) -> str:
|
||||||
"""Same as usual, but be sure to start with a new line"""
|
"""Same as usual, but be sure to start with a new line"""
|
||||||
if not convert_as_inline:
|
if not convert_as_inline:
|
||||||
if not re.search(r"^\n", text):
|
if not re.search(r"^\n", text):
|
||||||
|
|
@ -28,7 +35,13 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
|
||||||
|
|
||||||
return super().convert_hn(n, el, text, convert_as_inline) # type: ignore
|
return super().convert_hn(n, el, text, convert_as_inline) # type: ignore
|
||||||
|
|
||||||
def convert_a(self, el: Any, text: str, convert_as_inline: bool):
|
def convert_a(
|
||||||
|
self,
|
||||||
|
el: Any,
|
||||||
|
text: str,
|
||||||
|
convert_as_inline: Optional[bool] = False,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
"""Same as usual converter, but removes Javascript links and escapes URIs."""
|
"""Same as usual converter, but removes Javascript links and escapes URIs."""
|
||||||
prefix, suffix, text = markdownify.chomp(text) # type: ignore
|
prefix, suffix, text = markdownify.chomp(text) # type: ignore
|
||||||
if not text:
|
if not text:
|
||||||
|
|
@ -68,7 +81,13 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
|
||||||
else text
|
else text
|
||||||
)
|
)
|
||||||
|
|
||||||
def convert_img(self, el: Any, text: str, convert_as_inline: bool) -> str:
|
def convert_img(
|
||||||
|
self,
|
||||||
|
el: Any,
|
||||||
|
text: str,
|
||||||
|
convert_as_inline: Optional[bool] = False,
|
||||||
|
**kwargs,
|
||||||
|
) -> str:
|
||||||
"""Same as usual converter, but removes data URIs"""
|
"""Same as usual converter, but removes data URIs"""
|
||||||
|
|
||||||
alt = el.attrs.get("alt", None) or ""
|
alt = el.attrs.get("alt", None) or ""
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue