diff --git a/packages/markitdown/src/markitdown/__init__.py b/packages/markitdown/src/markitdown/__init__.py index f82f024..43f007f 100644 --- a/packages/markitdown/src/markitdown/__init__.py +++ b/packages/markitdown/src/markitdown/__init__.py @@ -6,7 +6,7 @@ from .__about__ import __version__ from ._markitdown import MarkItDown from ._exceptions import ( MarkItDownException, - MissingOptionalDependencyException, + MissingDependencyException, FileConversionException, UnsupportedFormatException, ) @@ -18,7 +18,7 @@ __all__ = [ "DocumentConverter", "DocumentConverterResult", "MarkItDownException", - "MissingOptionalDependencyException", + "MissingDependencyException", "FileConversionException", "UnsupportedFormatException", ] diff --git a/packages/markitdown/src/markitdown/_exceptions.py b/packages/markitdown/src/markitdown/_exceptions.py index 10b8cec..6cf1073 100644 --- a/packages/markitdown/src/markitdown/_exceptions.py +++ b/packages/markitdown/src/markitdown/_exceptions.py @@ -6,7 +6,7 @@ class MarkItDownException(BaseException): pass -class MissingOptionalDependencyException(MarkItDownException): +class MissingDependencyException(MarkItDownException): """ Converters shipped with MarkItDown may depend on optional dependencies. This exception is thrown when a converter's diff --git a/packages/markitdown/src/markitdown/converters/_pptx_converter.py b/packages/markitdown/src/markitdown/converters/_pptx_converter.py index 76c481a..34d4fbd 100644 --- a/packages/markitdown/src/markitdown/converters/_pptx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_pptx_converter.py @@ -1,12 +1,22 @@ import base64 -import pptx import re import html +import sys from typing import Union from ._base import DocumentConverterResult, DocumentConverter from ._html_converter import HtmlConverter +from .._exceptions import MissingDependencyException + +# Try loading optional (but in this case, required) dependencies +# Save reporting of any exceptions for later +_dependency_exc_info = None +try: + import pptx +except ImportError: + # Preserve the error and stack trace for later + _dependency_exc_info = sys.exc_info() class PptxConverter(HtmlConverter): @@ -54,9 +64,21 @@ class PptxConverter(HtmlConverter): if extension.lower() != ".pptx": return None - md_content = "" + # Load the dependencies + if _dependency_exc_info is not None: + raise MissingDependencyException( + f"""{type(self).__name__} recognized the input as a .pptx file, but the dependencies needed to read .pptx files have not been installed. To resolve this error, include the optional dependency [pptx] or [all] when installing MarkItDown. For example: + +* pip install markitdown[pptx] +* pip install markitdown[all] +* pip install markitdown[pptx, docx, ...] +* etc.""" + ) from _dependency_exc_info[1].with_traceback( + _dependency_exc_info[2] + ) # Restore the original traceback presentation = pptx.Presentation(local_path) + md_content = "" slide_num = 0 for slide in presentation.slides: slide_num += 1