Remove newlines in image alt_text
This commit is contained in:
parent
afda281a67
commit
dbf09026bc
1 changed files with 2 additions and 1 deletions
|
|
@ -748,6 +748,7 @@ class DocxConverter(HtmlConverter):
|
||||||
"""Handles image extraction and saving with collision avoidance and length limits."""
|
"""Handles image extraction and saving with collision avoidance and length limits."""
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
image.alt_text = image.alt_text.replace("\n", " ")
|
||||||
raw_name = image.alt_text or f"image_{hash(image)}"
|
raw_name = image.alt_text or f"image_{hash(image)}"
|
||||||
sanitized_name = self.sanitize_filename(raw_name)
|
sanitized_name = self.sanitize_filename(raw_name)
|
||||||
truncated_name = self.truncate_filename(sanitized_name, 251, ".png")
|
truncated_name = self.truncate_filename(sanitized_name, 251, ".png")
|
||||||
|
|
@ -760,7 +761,7 @@ class DocxConverter(HtmlConverter):
|
||||||
with image.open() as image_bytes:
|
with image.open() as image_bytes:
|
||||||
with open(image_path, "wb") as img_file:
|
with open(image_path, "wb") as img_file:
|
||||||
img_file.write(image_bytes.read())
|
img_file.write(image_bytes.read())
|
||||||
return {"src": image_path}
|
return {"src": image_path, "alt": image.alt_text}
|
||||||
except Exception:
|
except Exception:
|
||||||
# Return an empty src if saving fails
|
# Return an empty src if saving fails
|
||||||
return {"src": ""}
|
return {"src": ""}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue