enhancewebsite

This commit is contained in:
朱昊天 2025-04-30 13:55:38 +12:00
parent dea6b7d19f
commit 4eb0107e2f

View file

@ -104,7 +104,8 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
Supports categorized storage in subfolders by document name Supports categorized storage in subfolders by document name
""" """
alt = el.attrs.get("alt", None) or "" alt = el.attrs.get("alt", None) or ""
src = el.attrs.get("src", None) or "" # src = el.attrs.get("src", None) or ""
src = el.attrs.get("src", None) or el.attrs.get("data-src", None) or ""
title = el.attrs.get("title", None) or "" title = el.attrs.get("title", None) or ""
title_part = ' "%s"' % title.replace('"', r"\"") if title else "" title_part = ' "%s"' % title.replace('"', r"\"") if title else ""
@ -166,8 +167,14 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
print(f"[ERROR] {error_msg}", file=sys.stderr) print(f"[ERROR] {error_msg}", file=sys.stderr)
import traceback import traceback
traceback.print_exc(file=sys.stderr) traceback.print_exc(file=sys.stderr)
# If extraction fails, revert to original truncating behavior
src = src.split(",")[0] + "..."
return f"![{alt}](image_error.png) <!-- {error_msg} -->" return f"![{alt}](image_error.png) <!-- {error_msg} -->"
# Process other data URIs that are not images (truncate them)
elif src.startswith("data:") and not self.options.get("keep_data_uris", False):
src = src.split(",")[0] + "..."
# Return Markdown format image reference # Return Markdown format image reference
return f"![{alt}]({src}{title_part})" return f"![{alt}]({src}{title_part})"