add other converter para support
This commit is contained in:
parent
9f1bcf3b83
commit
41cd9b5e2a
4 changed files with 10 additions and 6 deletions
|
|
@ -79,7 +79,7 @@ class BingSerpConverter(DocumentConverter):
|
|||
slug.extract()
|
||||
|
||||
# Parse the algorithmic results
|
||||
_markdownify = _CustomMarkdownify()
|
||||
_markdownify = _CustomMarkdownify(keep_data_uris=kwargs.get("keep_data_uris", False))
|
||||
results = list()
|
||||
for result in soup.find_all(class_="b_algo"):
|
||||
if not hasattr(result, "find_all"):
|
||||
|
|
|
|||
|
|
@ -72,8 +72,7 @@ class DocxConverter(HtmlConverter):
|
|||
)
|
||||
|
||||
style_map = kwargs.get("style_map", None)
|
||||
keep_data_uris = kwargs.get("keep_data_uris", False)
|
||||
return self._html_converter.convert_string(
|
||||
mammoth.convert_to_html(file_stream, style_map=style_map).value,
|
||||
keep_data_uris=keep_data_uris
|
||||
keep_data_uris=kwargs.get("keep_data_uris", False)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,10 @@ CANDIDATE_FILE_EXTENSIONS = [
|
|||
class RssConverter(DocumentConverter):
|
||||
"""Convert RSS / Atom type to markdown"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._kwargs = {}
|
||||
|
||||
def accepts(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
|
|
@ -82,6 +86,7 @@ class RssConverter(DocumentConverter):
|
|||
stream_info: StreamInfo,
|
||||
**kwargs: Any, # Options to pass to the converter
|
||||
) -> DocumentConverterResult:
|
||||
self._kwargs = kwargs
|
||||
doc = minidom.parse(file_stream)
|
||||
feed_type = self._feed_type(doc)
|
||||
|
||||
|
|
@ -166,7 +171,7 @@ class RssConverter(DocumentConverter):
|
|||
try:
|
||||
# using bs4 because many RSS feeds have HTML-styled content
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
return _CustomMarkdownify().convert_soup(soup)
|
||||
return _CustomMarkdownify(keep_data_uris=self._kwargs.get("keep_data_uris", False)).convert_soup(soup)
|
||||
except BaseException as _:
|
||||
return content
|
||||
|
||||
|
|
|
|||
|
|
@ -76,11 +76,11 @@ class WikipediaConverter(DocumentConverter):
|
|||
main_title = title_elm.string
|
||||
|
||||
# Convert the page
|
||||
webpage_text = f"# {main_title}\n\n" + _CustomMarkdownify().convert_soup(
|
||||
webpage_text = f"# {main_title}\n\n" + _CustomMarkdownify(keep_data_uris=kwargs.get("keep_data_uris", False)).convert_soup(
|
||||
body_elm
|
||||
)
|
||||
else:
|
||||
webpage_text = _CustomMarkdownify().convert_soup(soup)
|
||||
webpage_text = _CustomMarkdownify(keep_data_uris=kwargs.get("keep_data_uris", False)).convert_soup(soup)
|
||||
|
||||
return DocumentConverterResult(
|
||||
markdown=webpage_text,
|
||||
|
|
|
|||
Loading…
Reference in a new issue