remove leading and trailing \n for HtmlConverter

This commit is contained in:
ZeyuTeng96 2025-01-06 09:59:46 +08:00 committed by GitHub
parent 05b78e7ce1
commit 08a45fa4bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -223,6 +223,9 @@ class HtmlConverter(DocumentConverter):
assert isinstance(webpage_text, str)
# remove leading and trailing \n
webpage_text = webpage_text.strip()
return DocumentConverterResult(
title=None if soup.title is None else soup.title.string,
text_content=webpage_text,