From 4129f30c23f4ea97b7c614ebe5e610667a2fb070 Mon Sep 17 00:00:00 2001
From: Adam Fourney <adamfo@microsoft.com>
Date: Tue, 4 Mar 2025 00:52:57 -0800
Subject: [PATCH] More progress.

---
 .../markitdown/src/markitdown/__init__.py     |   4 +-
 .../src/markitdown/_base_converter.py         |  67 ++++++-
 .../markitdown/src/markitdown/_markitdown.py  | 168 +++++++-----------
 .../src/markitdown/converters/__init__.py     |   2 -
 .../src/markitdown/converters/_base.py        |  56 ------
 .../converters/_bing_serp_converter.py        |   3 +-
 .../converters/_doc_intel_converter.py        |   3 +-
 .../markitdown/converters/_docx_converter.py  |   3 +-
 .../markitdown/converters/_html_converter.py  |   3 +-
 .../markitdown/converters/_image_converter.py |   3 +-
 .../markitdown/converters/_ipynb_converter.py |   3 +-
 .../markitdown/converters/_media_converter.py |   2 +-
 .../markitdown/converters/_mp3_converter.py   |   3 +-
 .../converters/_outlook_msg_converter.py      |   3 +-
 .../markitdown/converters/_pdf_converter.py   |   3 +-
 .../converters/_plain_text_converter.py       |   3 +-
 .../markitdown/converters/_pptx_converter.py  |   3 +-
 .../markitdown/converters/_rss_converter.py   |   3 +-
 .../markitdown/converters/_wav_converter.py   |   3 +-
 .../converters/_wikipedia_converter.py        |   3 +-
 .../markitdown/converters/_xlsx_converter.py  |   3 +-
 .../converters/_youtube_converter.py          |   3 +-
 .../markitdown/converters/_zip_converter.py   |   3 +-
 23 files changed, 149 insertions(+), 201 deletions(-)
 delete mode 100644 packages/markitdown/src/markitdown/converters/_base.py

diff --git a/packages/markitdown/src/markitdown/__init__.py b/packages/markitdown/src/markitdown/__init__.py
index fb14feb..bb6fcdb 100644
--- a/packages/markitdown/src/markitdown/__init__.py
+++ b/packages/markitdown/src/markitdown/__init__.py
@@ -4,7 +4,7 @@
 
 from .__about__ import __version__
 from ._markitdown import MarkItDown
-from ._base_converter import DocumentConverterResult, BaseDocumentConverter
+from ._base_converter import DocumentConverterResult, DocumentConverter
 from ._stream_info import StreamInfo
 from ._exceptions import (
     MarkItDownException,
@@ -13,13 +13,11 @@ from ._exceptions import (
     FileConversionException,
     UnsupportedFormatException,
 )
-from .converters import DocumentConverter
 
 __all__ = [
     "__version__",
     "MarkItDown",
     "DocumentConverter",
-    "BaseDocumentConverter",
     "DocumentConverterResult",
     "MarkItDownException",
     "MissingDependencyException",
diff --git a/packages/markitdown/src/markitdown/_base_converter.py b/packages/markitdown/src/markitdown/_base_converter.py
index 7cd945f..42e5da7 100644
--- a/packages/markitdown/src/markitdown/_base_converter.py
+++ b/packages/markitdown/src/markitdown/_base_converter.py
@@ -1,5 +1,11 @@
+import os
+import tempfile
+from warnings import warn
+from typing import Any, Union, BinaryIO, Optional, List
 from ._stream_info import StreamInfo
-from typing import Any, Union, BinaryIO, Optional
+
+# Avoid printing the same warning multiple times
+_WARNED: List[str] = []
 
 
 class DocumentConverterResult:
@@ -39,7 +45,7 @@ class DocumentConverterResult:
         return self.markdown
 
 
-class BaseDocumentConverter:
+class DocumentConverter:
     """Abstract superclass of all DocumentConverters."""
 
     # Lower priority values are tried first.
@@ -74,7 +80,7 @@ class BaseDocumentConverter:
         """
         self._priority = priority
 
-    def convert(
+    def convert_stream(
         self,
         file_stream: BinaryIO,
         stream_info: StreamInfo,
@@ -106,6 +112,61 @@ class BaseDocumentConverter:
         - FileConversionException: If the mimetype is recognized, but the conversion fails for some other reason.
         - MissingDependencyException: If the converter requires a dependency that is not installed.
         """
+
+        # Default implementation ensures backward compatibility with the legacy convert() method, and
+        # should absolutely be overridden in subclasses. This behavior is deprecated and will be removed
+        # in the future.
+        result = None
+        used_legacy = False
+
+        if stream_info.local_path is not None and os.path.exists(
+            stream_info.local_path
+        ):
+            # If the stream is backed by a local file, pass it to the legacy convert() method
+            try:
+                result = self.convert(stream_info.local_path, **kwargs)
+                used_legacy = True
+            except (
+                NotImplementedError
+            ):  # If it wasn't implemented, rethrow the error, but with this as the stack trace
+                raise NotImplementedError(
+                    "Subclasses must implement the convert_stream method."
+                )
+        else:
+            # Otherwise, we need to read the stream into a temporary file. There is potential for
+            # thrashing here if there are many converters or conversion attempts
+            cur_pos = file_stream.tell()
+            temp_fd, temp_path = tempfile.mkstemp()
+            try:
+                with os.fdopen(temp_fd, "wb") as temp_file:
+                    temp_file.write(file_stream.read())
+                try:
+                    result = self.convert(temp_path, **kwargs)
+                    used_legacy = True
+                except NotImplementedError:
+                    raise NotImplementedError(
+                        "Subclasses must implement the convert_stream method."
+                    )
+            finally:
+                os.remove(temp_path)
+                file_stream.seek(0)
+
+        if used_legacy:
+            message = f"{type(self).__name__} uses the legacy convert() method, which is deprecated."
+            if message not in _WARNED:
+                warn(message, DeprecationWarning)
+                _WARNED.append(message)
+
+        return result
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        """
+        Legacy, and deprecated method to convert a document to Markdown text.
+        This method reads from the file at `local_path` and returns the converted Markdown text.
+        This method is deprecated in favor of `convert_stream`, which uses a file-like object.
+        """
         raise NotImplementedError("Subclasses must implement this method")
 
     @property
diff --git a/packages/markitdown/src/markitdown/_markitdown.py b/packages/markitdown/src/markitdown/_markitdown.py
index 2738535..d5cd0aa 100644
--- a/packages/markitdown/src/markitdown/_markitdown.py
+++ b/packages/markitdown/src/markitdown/_markitdown.py
@@ -20,7 +20,6 @@ import requests
 from ._stream_info import StreamInfo
 
 from .converters import (
-    DocumentConverter,
     PlainTextConverter,
     HtmlConverter,
     RssConverter,
@@ -41,7 +40,7 @@ from .converters import (
     DocumentIntelligenceConverter,
 )
 
-from ._base_converter import DocumentConverterResult
+from ._base_converter import DocumentConverter, DocumentConverterResult
 
 from ._exceptions import (
     FileConversionException,
@@ -102,7 +101,7 @@ class MarkItDown:
         self._style_map = None
 
         # Register the converters
-        self._page_converters: List[DocumentConverter] = []
+        self._converters: List[DocumentConverter] = []
 
         if (
             enable_builtins is None or enable_builtins
@@ -405,108 +404,73 @@ class MarkItDown:
     def _convert(
         self, *, file_stream: BinaryIO, stream_info_guesses: List[StreamInfo], **kwargs
     ) -> DocumentConverterResult:
-        # Lazily create a temporary file, if needed, for backward compatibility
-        # This is to support a deprecated feature, and will be removed in the future
-        temp_file = None
+        res: Union[None, DocumentConverterResult] = None
 
-        def get_temp_file():
-            nonlocal temp_file
+        # Keep track of which converters throw exceptions
+        failed_attempts: List[FailedConversionAttempt] = []
 
-            if temp_file is not None:
-                return temp_file
-            else:
+        # Create a copy of the page_converters list, sorted by priority.
+        # We do this with each call to _convert because the priority of converters may change between calls.
+        # The sort is guaranteed to be stable, so converters with the same priority will remain in the same order.
+        sorted_converters = sorted(self._converters, key=lambda x: x.priority)
+
+        for stream_info in stream_info_guesses + [StreamInfo()]:
+            for converter in sorted_converters:
+                _kwargs = copy.deepcopy(kwargs)
+
+                # Copy any additional global options
+                if "llm_client" not in _kwargs and self._llm_client is not None:
+                    _kwargs["llm_client"] = self._llm_client
+
+                if "llm_model" not in _kwargs and self._llm_model is not None:
+                    _kwargs["llm_model"] = self._llm_model
+
+                if "style_map" not in _kwargs and self._style_map is not None:
+                    _kwargs["style_map"] = self._style_map
+
+                if "exiftool_path" not in _kwargs and self._exiftool_path is not None:
+                    _kwargs["exiftool_path"] = self._exiftool_path
+
+                # Add the list of converters for nested processing
+                _kwargs["_parent_converters"] = self._converters
+
+                # Add legaxy kwargs
+                if stream_info is not None:
+                    if stream_info.extension is not None:
+                        _kwargs["file_extension"] = stream_info.extension
+
+                    if stream_info.url is not None:
+                        _kwargs["url"] = stream_info.url
+
+                # Attempt the conversion
                 cur_pos = file_stream.tell()
-                handle, temp_file = tempfile.mkstemp()
-                fh = os.fdopen(handle, "wb")
-                file_stream.seek(0)
-                fh.write(file_stream.read())
-                file_stream.seek(cur_pos)
-                fh.close()
-            return temp_file
-
-        try:
-            res: Union[None, DocumentConverterResult] = None
-
-            # Keep track of which converters throw exceptions
-            failed_attempts: List[FailedConversionAttempt] = []
-
-            # Create a copy of the page_converters list, sorted by priority.
-            # We do this with each call to _convert because the priority of converters may change between calls.
-            # The sort is guaranteed to be stable, so converters with the same priority will remain in the same order.
-            sorted_converters = sorted(self._page_converters, key=lambda x: x.priority)
-
-            for file_info in stream_info_guesses + [None]:
-                for converter in sorted_converters:
-                    _kwargs = copy.deepcopy(kwargs)
-
-                    # Copy any additional global options
-                    if "llm_client" not in _kwargs and self._llm_client is not None:
-                        _kwargs["llm_client"] = self._llm_client
-
-                    if "llm_model" not in _kwargs and self._llm_model is not None:
-                        _kwargs["llm_model"] = self._llm_model
-
-                    if "style_map" not in _kwargs and self._style_map is not None:
-                        _kwargs["style_map"] = self._style_map
-
-                    if (
-                        "exiftool_path" not in _kwargs
-                        and self._exiftool_path is not None
-                    ):
-                        _kwargs["exiftool_path"] = self._exiftool_path
-
-                    # Add the list of converters for nested processing
-                    _kwargs["_parent_converters"] = self._page_converters
-
-                    # Add backwards compatibility
-                    if isinstance(converter, DocumentConverter):
-                        if file_info is not None:
-                            # Legacy converters need a file_extension
-                            if file_info.extension is not None:
-                                _kwargs["file_extension"] = file_info.extension
-
-                            # And benefit from urls, when available
-                            if file_info.url is not None:
-                                _kwargs["url"] = file_info.url
-
-                        try:
-                            res = converter.convert(get_temp_file(), **_kwargs)
-                        except Exception:
-                            failed_attempts.append(
-                                FailedConversionAttempt(
-                                    converter=converter, exc_info=sys.exc_info()
-                                )
-                            )
-                    else:
-                        raise NotImplementedError("TODO")
-
-                    if res is not None:
-                        # Normalize the content
-                        res.text_content = "\n".join(
-                            [
-                                line.rstrip()
-                                for line in re.split(r"\r?\n", res.text_content)
-                            ]
-                        )
-                        res.text_content = re.sub(r"\n{3,}", "\n\n", res.text_content)
-                        return res
-
-            # If we got this far without success, report any exceptions
-            if len(failed_attempts) > 0:
-                raise FileConversionException(attempts=failed_attempts)
-
-            # Nothing can handle it!
-            raise UnsupportedFormatException(
-                f"Could not convert stream to Markdown. No converter attempted a conversion, suggesting that the filetype is simply not supported."
-            )
-
-        finally:
-            # Clean up the temporary file
-            if temp_file is not None:
                 try:
-                    os.unlink(temp_file)
+                    res = converter.convert_stream(file_stream, stream_info, **_kwargs)
                 except Exception:
-                    pass
+                    failed_attempts.append(
+                        FailedConversionAttempt(
+                            converter=converter, exc_info=sys.exc_info()
+                        )
+                    )
+                finally:
+                    file_stream.seek(cur_pos)
+
+                if res is not None:
+                    # Normalize the content
+                    res.text_content = "\n".join(
+                        [line.rstrip() for line in re.split(r"\r?\n", res.text_content)]
+                    )
+                    res.text_content = re.sub(r"\n{3,}", "\n\n", res.text_content)
+                    return res
+
+        # If we got this far without success, report any exceptions
+        if len(failed_attempts) > 0:
+            raise FileConversionException(attempts=failed_attempts)
+
+        # Nothing can handle it!
+        raise UnsupportedFormatException(
+            f"Could not convert stream to Markdown. No converter attempted a conversion, suggesting that the filetype is simply not supported."
+        )
 
     def register_page_converter(self, converter: DocumentConverter) -> None:
         """DEPRECATED: User register_converter instead."""
@@ -516,6 +480,6 @@ class MarkItDown:
         )
         self.register_converter(converter)
 
-    def register_converter(self, converter: DocumentConverter) -> None:
+    def register_converter(self, converter: Union[DocumentConverter]) -> None:
         """Register a page text converter."""
-        self._page_converters.insert(0, converter)
+        self._converters.insert(0, converter)
diff --git a/packages/markitdown/src/markitdown/converters/__init__.py b/packages/markitdown/src/markitdown/converters/__init__.py
index 996b78b..038038d 100644
--- a/packages/markitdown/src/markitdown/converters/__init__.py
+++ b/packages/markitdown/src/markitdown/converters/__init__.py
@@ -2,7 +2,6 @@
 #
 # SPDX-License-Identifier: MIT
 
-from ._base import DocumentConverter
 from ._plain_text_converter import PlainTextConverter
 from ._html_converter import HtmlConverter
 from ._rss_converter import RssConverter
@@ -22,7 +21,6 @@ from ._zip_converter import ZipConverter
 from ._doc_intel_converter import DocumentIntelligenceConverter
 
 __all__ = [
-    "DocumentConverter",
     "PlainTextConverter",
     "HtmlConverter",
     "RssConverter",
diff --git a/packages/markitdown/src/markitdown/converters/_base.py b/packages/markitdown/src/markitdown/converters/_base.py
deleted file mode 100644
index e1a544a..0000000
--- a/packages/markitdown/src/markitdown/converters/_base.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from typing import Any, Union
-from .._base_converter import DocumentConverterResult
-
-
-class DocumentConverter:
-    """Abstract superclass of all DocumentConverters."""
-
-    # Lower priority values are tried first.
-    PRIORITY_SPECIFIC_FILE_FORMAT = (
-        0.0  # e.g., .docx, .pdf, .xlsx, Or specific pages, e.g., wikipedia
-    )
-    PRIORITY_GENERIC_FILE_FORMAT = (
-        10.0  # Near catch-all converters for mimetypes like text/*, etc.
-    )
-
-    def __init__(self, priority: float = PRIORITY_SPECIFIC_FILE_FORMAT):
-        """
-        Initialize the DocumentConverter with a given priority.
-
-        Priorities work as follows: By default, most converters get priority
-        DocumentConverter.PRIORITY_SPECIFIC_FILE_FORMAT (== 0). The exception
-        is the PlainTextConverter, which gets priority PRIORITY_SPECIFIC_FILE_FORMAT (== 10),
-        with lower values being tried first (i.e., higher priority).
-
-        Just prior to conversion, the converters are sorted by priority, using
-        a stable sort. This means that converters with the same priority will
-        remain in the same order, with the most recently registered converters
-        appearing first.
-
-        We have tight control over the order of built-in converters, but
-        plugins can register converters in any order. A converter's priority
-        field reasserts some control over the order of converters.
-
-        Plugins can register converters with any priority, to appear before or
-        after the built-ins. For example, a plugin with priority 9 will run
-        before the PlainTextConverter, but after the built-in converters.
-        """
-        self._priority = priority
-
-    def convert(
-        self, local_path: str, **kwargs: Any
-    ) -> Union[None, DocumentConverterResult]:
-        raise NotImplementedError("Subclasses must implement this method")
-
-    @property
-    def priority(self) -> float:
-        """Priority of the converter in markitdown's converter list. Higher priority values are tried first."""
-        return self._priority
-
-    @priority.setter
-    def priority(self, value: float):
-        self._priority = value
-
-    @priority.deleter
-    def priority(self):
-        raise AttributeError("Cannot delete the priority attribute")
diff --git a/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py b/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py
index bdb15bf..2ac8e7e 100644
--- a/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py
@@ -6,8 +6,7 @@ from typing import Union
 from urllib.parse import parse_qs, urlparse
 from bs4 import BeautifulSoup
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from ._markdownify import _CustomMarkdownify
 
 
diff --git a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py
index 1ad8981..3129409 100644
--- a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py
@@ -2,8 +2,7 @@ from typing import Any, Union
 import re
 import sys
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from .._exceptions import MissingDependencyException
 
 # Try loading optional (but in this case, required) dependencies
diff --git a/packages/markitdown/src/markitdown/converters/_docx_converter.py b/packages/markitdown/src/markitdown/converters/_docx_converter.py
index ea2550b..8f298ab 100644
--- a/packages/markitdown/src/markitdown/converters/_docx_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py
@@ -2,8 +2,7 @@ import sys
 
 from typing import Union
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from ._html_converter import HtmlConverter
 from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
 
diff --git a/packages/markitdown/src/markitdown/converters/_html_converter.py b/packages/markitdown/src/markitdown/converters/_html_converter.py
index 64efb9a..172875e 100644
--- a/packages/markitdown/src/markitdown/converters/_html_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_html_converter.py
@@ -1,8 +1,7 @@
 from typing import Any, Union
 from bs4 import BeautifulSoup
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from ._markdownify import _CustomMarkdownify
 
 
diff --git a/packages/markitdown/src/markitdown/converters/_image_converter.py b/packages/markitdown/src/markitdown/converters/_image_converter.py
index 5923103..72f70e2 100644
--- a/packages/markitdown/src/markitdown/converters/_image_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_image_converter.py
@@ -1,6 +1,5 @@
 from typing import Union
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from ._media_converter import MediaConverter
 import base64
 import mimetypes
diff --git a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py b/packages/markitdown/src/markitdown/converters/_ipynb_converter.py
index cc40d4e..2c5cb3f 100644
--- a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_ipynb_converter.py
@@ -1,8 +1,7 @@
 import json
 from typing import Any, Union
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 
 from .._exceptions import FileConversionException
 
diff --git a/packages/markitdown/src/markitdown/converters/_media_converter.py b/packages/markitdown/src/markitdown/converters/_media_converter.py
index 5c7d82b..0a5cebf 100644
--- a/packages/markitdown/src/markitdown/converters/_media_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_media_converter.py
@@ -3,7 +3,7 @@ import shutil
 import json
 from warnings import warn
 
-from ._base import DocumentConverter
+from .._base_converter import DocumentConverter
 
 
 class MediaConverter(DocumentConverter):
diff --git a/packages/markitdown/src/markitdown/converters/_mp3_converter.py b/packages/markitdown/src/markitdown/converters/_mp3_converter.py
index a2276b6..6ba2202 100644
--- a/packages/markitdown/src/markitdown/converters/_mp3_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_mp3_converter.py
@@ -1,7 +1,6 @@
 import tempfile
 from typing import Union
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from ._wav_converter import WavConverter
 from warnings import resetwarnings, catch_warnings
 
diff --git a/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py b/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py
index 4abc860..84d8c47 100644
--- a/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py
@@ -1,7 +1,6 @@
 import sys
 from typing import Any, Union
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
 
 # Try loading optional (but in this case, required) dependencies
diff --git a/packages/markitdown/src/markitdown/converters/_pdf_converter.py b/packages/markitdown/src/markitdown/converters/_pdf_converter.py
index 2767954..00228b5 100644
--- a/packages/markitdown/src/markitdown/converters/_pdf_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_pdf_converter.py
@@ -1,7 +1,6 @@
 import sys
 from typing import Union
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
 
 # Try loading optional (but in this case, required) dependencies
diff --git a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py
index 5905851..a9f1902 100644
--- a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py
@@ -3,8 +3,7 @@ import mimetypes
 from charset_normalizer import from_path
 from typing import Any, Union
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 
 
 # Mimetypes to ignore (commonly confused extensions)
diff --git a/packages/markitdown/src/markitdown/converters/_pptx_converter.py b/packages/markitdown/src/markitdown/converters/_pptx_converter.py
index 99e4337..d77d3bc 100644
--- a/packages/markitdown/src/markitdown/converters/_pptx_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_pptx_converter.py
@@ -5,8 +5,7 @@ import sys
 
 from typing import Union
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from ._html_converter import HtmlConverter
 from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
 
diff --git a/packages/markitdown/src/markitdown/converters/_rss_converter.py b/packages/markitdown/src/markitdown/converters/_rss_converter.py
index 2471799..021d09d 100644
--- a/packages/markitdown/src/markitdown/converters/_rss_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_rss_converter.py
@@ -3,8 +3,7 @@ from typing import Union
 from bs4 import BeautifulSoup
 
 from ._markdownify import _CustomMarkdownify
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 
 
 class RssConverter(DocumentConverter):
diff --git a/packages/markitdown/src/markitdown/converters/_wav_converter.py b/packages/markitdown/src/markitdown/converters/_wav_converter.py
index 4278f6f..c14a9a3 100644
--- a/packages/markitdown/src/markitdown/converters/_wav_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_wav_converter.py
@@ -1,6 +1,5 @@
 from typing import Union
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from ._media_converter import MediaConverter
 
 # Optional Transcription support
diff --git a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py b/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py
index b4665c0..2be066d 100644
--- a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py
@@ -3,8 +3,7 @@ import re
 from typing import Any, Union
 from bs4 import BeautifulSoup
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from ._markdownify import _CustomMarkdownify
 
 
diff --git a/packages/markitdown/src/markitdown/converters/_xlsx_converter.py b/packages/markitdown/src/markitdown/converters/_xlsx_converter.py
index 7257768..37535ca 100644
--- a/packages/markitdown/src/markitdown/converters/_xlsx_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_xlsx_converter.py
@@ -2,8 +2,7 @@ import sys
 
 from typing import Union
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 from ._html_converter import HtmlConverter
 from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
 
diff --git a/packages/markitdown/src/markitdown/converters/_youtube_converter.py b/packages/markitdown/src/markitdown/converters/_youtube_converter.py
index 485b095..975d668 100644
--- a/packages/markitdown/src/markitdown/converters/_youtube_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_youtube_converter.py
@@ -7,8 +7,7 @@ from typing import Any, Union, Dict, List
 from urllib.parse import parse_qs, urlparse
 from bs4 import BeautifulSoup
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 
 
 # Optional YouTube transcription support
diff --git a/packages/markitdown/src/markitdown/converters/_zip_converter.py b/packages/markitdown/src/markitdown/converters/_zip_converter.py
index d8f2951..7c8c6db 100644
--- a/packages/markitdown/src/markitdown/converters/_zip_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_zip_converter.py
@@ -3,8 +3,7 @@ import zipfile
 import shutil
 from typing import Any, Union
 
-from ._base import DocumentConverter
-from .._base_converter import DocumentConverterResult
+from .._base_converter import DocumentConverter, DocumentConverterResult
 
 
 class ZipConverter(DocumentConverter):