Exploring ways to enable optional dependencies. Starting with pptx.
This commit is contained in:
parent
0f63a7e28f
commit
7d2e0bd9d4
3 changed files with 27 additions and 5 deletions
|
|
@ -6,7 +6,7 @@ from .__about__ import __version__
|
||||||
from ._markitdown import MarkItDown
|
from ._markitdown import MarkItDown
|
||||||
from ._exceptions import (
|
from ._exceptions import (
|
||||||
MarkItDownException,
|
MarkItDownException,
|
||||||
MissingOptionalDependencyException,
|
MissingDependencyException,
|
||||||
FileConversionException,
|
FileConversionException,
|
||||||
UnsupportedFormatException,
|
UnsupportedFormatException,
|
||||||
)
|
)
|
||||||
|
|
@ -18,7 +18,7 @@ __all__ = [
|
||||||
"DocumentConverter",
|
"DocumentConverter",
|
||||||
"DocumentConverterResult",
|
"DocumentConverterResult",
|
||||||
"MarkItDownException",
|
"MarkItDownException",
|
||||||
"MissingOptionalDependencyException",
|
"MissingDependencyException",
|
||||||
"FileConversionException",
|
"FileConversionException",
|
||||||
"UnsupportedFormatException",
|
"UnsupportedFormatException",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ class MarkItDownException(BaseException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class MissingOptionalDependencyException(MarkItDownException):
|
class MissingDependencyException(MarkItDownException):
|
||||||
"""
|
"""
|
||||||
Converters shipped with MarkItDown may depend on optional
|
Converters shipped with MarkItDown may depend on optional
|
||||||
dependencies. This exception is thrown when a converter's
|
dependencies. This exception is thrown when a converter's
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,22 @@
|
||||||
import base64
|
import base64
|
||||||
import pptx
|
|
||||||
import re
|
import re
|
||||||
import html
|
import html
|
||||||
|
import sys
|
||||||
|
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from ._base import DocumentConverterResult, DocumentConverter
|
from ._base import DocumentConverterResult, DocumentConverter
|
||||||
from ._html_converter import HtmlConverter
|
from ._html_converter import HtmlConverter
|
||||||
|
from .._exceptions import MissingDependencyException
|
||||||
|
|
||||||
|
# Try loading optional (but in this case, required) dependencies
|
||||||
|
# Save reporting of any exceptions for later
|
||||||
|
_dependency_exc_info = None
|
||||||
|
try:
|
||||||
|
import pptx
|
||||||
|
except ImportError:
|
||||||
|
# Preserve the error and stack trace for later
|
||||||
|
_dependency_exc_info = sys.exc_info()
|
||||||
|
|
||||||
|
|
||||||
class PptxConverter(HtmlConverter):
|
class PptxConverter(HtmlConverter):
|
||||||
|
|
@ -54,9 +64,21 @@ class PptxConverter(HtmlConverter):
|
||||||
if extension.lower() != ".pptx":
|
if extension.lower() != ".pptx":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
md_content = ""
|
# Load the dependencies
|
||||||
|
if _dependency_exc_info is not None:
|
||||||
|
raise MissingDependencyException(
|
||||||
|
f"""{type(self).__name__} recognized the input as a .pptx file, but the dependencies needed to read .pptx files have not been installed. To resolve this error, include the optional dependency [pptx] or [all] when installing MarkItDown. For example:
|
||||||
|
|
||||||
|
* pip install markitdown[pptx]
|
||||||
|
* pip install markitdown[all]
|
||||||
|
* pip install markitdown[pptx, docx, ...]
|
||||||
|
* etc."""
|
||||||
|
) from _dependency_exc_info[1].with_traceback(
|
||||||
|
_dependency_exc_info[2]
|
||||||
|
) # Restore the original traceback
|
||||||
|
|
||||||
presentation = pptx.Presentation(local_path)
|
presentation = pptx.Presentation(local_path)
|
||||||
|
md_content = ""
|
||||||
slide_num = 0
|
slide_num = 0
|
||||||
for slide in presentation.slides:
|
for slide in presentation.slides:
|
||||||
slide_num += 1
|
slide_num += 1
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue