From 489914831074d81db9c0db5de7d787c1ac107123 Mon Sep 17 00:00:00 2001 From: Yuzhong Zhang <141388234+BetterAndBetterII@users.noreply.github.com> Date: Tue, 18 Mar 2025 20:30:44 +0800 Subject: [PATCH] fix linter --- .../src/markitdown/converters/_bing_serp_converter.py | 4 +++- .../src/markitdown/converters/_docx_converter.py | 2 +- .../src/markitdown/converters/_html_converter.py | 8 ++++++-- .../src/markitdown/converters/_rss_converter.py | 4 +++- .../src/markitdown/converters/_wikipedia_converter.py | 10 ++++++---- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py b/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py index 284170e..4108990 100644 --- a/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py +++ b/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py @@ -79,7 +79,9 @@ class BingSerpConverter(DocumentConverter): slug.extract() # Parse the algorithmic results - _markdownify = _CustomMarkdownify(keep_data_uris=kwargs.get("keep_data_uris", False)) + _markdownify = _CustomMarkdownify( + keep_data_uris=kwargs.get("keep_data_uris", False) + ) results = list() for result in soup.find_all(class_="b_algo"): if not hasattr(result, "find_all"): diff --git a/packages/markitdown/src/markitdown/converters/_docx_converter.py b/packages/markitdown/src/markitdown/converters/_docx_converter.py index 07ca62a..44ff85a 100644 --- a/packages/markitdown/src/markitdown/converters/_docx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py @@ -74,5 +74,5 @@ class DocxConverter(HtmlConverter): style_map = kwargs.get("style_map", None) return self._html_converter.convert_string( mammoth.convert_to_html(file_stream, style_map=style_map).value, - keep_data_uris=kwargs.get("keep_data_uris", False) + keep_data_uris=kwargs.get("keep_data_uris", False), ) diff --git a/packages/markitdown/src/markitdown/converters/_html_converter.py b/packages/markitdown/src/markitdown/converters/_html_converter.py index 7195772..91595de 100644 --- a/packages/markitdown/src/markitdown/converters/_html_converter.py +++ b/packages/markitdown/src/markitdown/converters/_html_converter.py @@ -57,9 +57,13 @@ class HtmlConverter(DocumentConverter): webpage_text = "" keep_data_uris = kwargs.get("keep_data_uris", False) if body_elm: - webpage_text = _CustomMarkdownify(keep_data_uris=keep_data_uris).convert_soup(body_elm) + webpage_text = _CustomMarkdownify( + keep_data_uris=keep_data_uris + ).convert_soup(body_elm) else: - webpage_text = _CustomMarkdownify(keep_data_uris=keep_data_uris).convert_soup(soup) + webpage_text = _CustomMarkdownify( + keep_data_uris=keep_data_uris + ).convert_soup(soup) assert isinstance(webpage_text, str) diff --git a/packages/markitdown/src/markitdown/converters/_rss_converter.py b/packages/markitdown/src/markitdown/converters/_rss_converter.py index c77e84b..9a4e881 100644 --- a/packages/markitdown/src/markitdown/converters/_rss_converter.py +++ b/packages/markitdown/src/markitdown/converters/_rss_converter.py @@ -171,7 +171,9 @@ class RssConverter(DocumentConverter): try: # using bs4 because many RSS feeds have HTML-styled content soup = BeautifulSoup(content, "html.parser") - return _CustomMarkdownify(keep_data_uris=self._kwargs.get("keep_data_uris", False)).convert_soup(soup) + return _CustomMarkdownify( + keep_data_uris=self._kwargs.get("keep_data_uris", False) + ).convert_soup(soup) except BaseException as _: return content diff --git a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py b/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py index fa1dd37..fb49bad 100644 --- a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py +++ b/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py @@ -76,11 +76,13 @@ class WikipediaConverter(DocumentConverter): main_title = title_elm.string # Convert the page - webpage_text = f"# {main_title}\n\n" + _CustomMarkdownify(keep_data_uris=kwargs.get("keep_data_uris", False)).convert_soup( - body_elm - ) + webpage_text = f"# {main_title}\n\n" + _CustomMarkdownify( + keep_data_uris=kwargs.get("keep_data_uris", False) + ).convert_soup(body_elm) else: - webpage_text = _CustomMarkdownify(keep_data_uris=kwargs.get("keep_data_uris", False)).convert_soup(soup) + webpage_text = _CustomMarkdownify( + keep_data_uris=kwargs.get("keep_data_uris", False) + ).convert_soup(soup) return DocumentConverterResult( markdown=webpage_text,