From 663437fe16a94acec9915d5dc232c4a7ec519399 Mon Sep 17 00:00:00 2001 From: Yuzhong Zhang <141388234+BetterAndBetterII@users.noreply.github.com> Date: Tue, 1 Apr 2025 01:05:27 +0800 Subject: [PATCH] fix docx parse error(\n in alt) --- packages/markitdown/src/markitdown/converters/_markdownify.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/markitdown/src/markitdown/converters/_markdownify.py b/packages/markitdown/src/markitdown/converters/_markdownify.py index d98bdfb..058e2ec 100644 --- a/packages/markitdown/src/markitdown/converters/_markdownify.py +++ b/packages/markitdown/src/markitdown/converters/_markdownify.py @@ -95,6 +95,8 @@ class _CustomMarkdownify(markdownify.MarkdownConverter): src = el.attrs.get("src", None) or "" title = el.attrs.get("title", None) or "" title_part = ' "%s"' % title.replace('"', r"\"") if title else "" + # Remove all line breaks from alt + alt = alt.replace("\n", " ") if ( convert_as_inline and el.parent.name not in self.options["keep_inline_images_in"]