From dbf09026bcfc0b2b53f87c34764a6583041255d9 Mon Sep 17 00:00:00 2001 From: Mauro Druwel Date: Sat, 4 Jan 2025 13:26:27 +0100 Subject: [PATCH] Remove newlines in image alt_text --- src/markitdown/_markitdown.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index bf1900c..fa46bf9 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -748,6 +748,7 @@ class DocxConverter(HtmlConverter): """Handles image extraction and saving with collision avoidance and length limits.""" os.makedirs(output_dir, exist_ok=True) + image.alt_text = image.alt_text.replace("\n", " ") raw_name = image.alt_text or f"image_{hash(image)}" sanitized_name = self.sanitize_filename(raw_name) truncated_name = self.truncate_filename(sanitized_name, 251, ".png") @@ -760,7 +761,7 @@ class DocxConverter(HtmlConverter): with image.open() as image_bytes: with open(image_path, "wb") as img_file: img_file.write(image_bytes.read()) - return {"src": image_path} + return {"src": image_path, "alt": image.alt_text} except Exception: # Return an empty src if saving fails return {"src": ""}