Exploring ways to enable optional dependencies. Starting with pptx.
This commit is contained in:
parent
0f63a7e28f
commit
7d2e0bd9d4
3 changed files with 27 additions and 5 deletions
|
|
@ -6,7 +6,7 @@ from .__about__ import __version__
|
|||
from ._markitdown import MarkItDown
|
||||
from ._exceptions import (
|
||||
MarkItDownException,
|
||||
MissingOptionalDependencyException,
|
||||
MissingDependencyException,
|
||||
FileConversionException,
|
||||
UnsupportedFormatException,
|
||||
)
|
||||
|
|
@ -18,7 +18,7 @@ __all__ = [
|
|||
"DocumentConverter",
|
||||
"DocumentConverterResult",
|
||||
"MarkItDownException",
|
||||
"MissingOptionalDependencyException",
|
||||
"MissingDependencyException",
|
||||
"FileConversionException",
|
||||
"UnsupportedFormatException",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ class MarkItDownException(BaseException):
|
|||
pass
|
||||
|
||||
|
||||
class MissingOptionalDependencyException(MarkItDownException):
|
||||
class MissingDependencyException(MarkItDownException):
|
||||
"""
|
||||
Converters shipped with MarkItDown may depend on optional
|
||||
dependencies. This exception is thrown when a converter's
|
||||
|
|
|
|||
|
|
@ -1,12 +1,22 @@
|
|||
import base64
|
||||
import pptx
|
||||
import re
|
||||
import html
|
||||
import sys
|
||||
|
||||
from typing import Union
|
||||
|
||||
from ._base import DocumentConverterResult, DocumentConverter
|
||||
from ._html_converter import HtmlConverter
|
||||
from .._exceptions import MissingDependencyException
|
||||
|
||||
# Try loading optional (but in this case, required) dependencies
|
||||
# Save reporting of any exceptions for later
|
||||
_dependency_exc_info = None
|
||||
try:
|
||||
import pptx
|
||||
except ImportError:
|
||||
# Preserve the error and stack trace for later
|
||||
_dependency_exc_info = sys.exc_info()
|
||||
|
||||
|
||||
class PptxConverter(HtmlConverter):
|
||||
|
|
@ -54,9 +64,21 @@ class PptxConverter(HtmlConverter):
|
|||
if extension.lower() != ".pptx":
|
||||
return None
|
||||
|
||||
md_content = ""
|
||||
# Load the dependencies
|
||||
if _dependency_exc_info is not None:
|
||||
raise MissingDependencyException(
|
||||
f"""{type(self).__name__} recognized the input as a .pptx file, but the dependencies needed to read .pptx files have not been installed. To resolve this error, include the optional dependency [pptx] or [all] when installing MarkItDown. For example:
|
||||
|
||||
* pip install markitdown[pptx]
|
||||
* pip install markitdown[all]
|
||||
* pip install markitdown[pptx, docx, ...]
|
||||
* etc."""
|
||||
) from _dependency_exc_info[1].with_traceback(
|
||||
_dependency_exc_info[2]
|
||||
) # Restore the original traceback
|
||||
|
||||
presentation = pptx.Presentation(local_path)
|
||||
md_content = ""
|
||||
slide_num = 0
|
||||
for slide in presentation.slides:
|
||||
slide_num += 1
|
||||
|
|
|
|||
Loading…
Reference in a new issue