Added .docx to optional dependencies
This commit is contained in:
parent
b9487b6b6d
commit
98698a64ce
3 changed files with 31 additions and 5 deletions
|
|
@ -26,7 +26,6 @@ classifiers = [
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"beautifulsoup4",
|
"beautifulsoup4",
|
||||||
"requests",
|
"requests",
|
||||||
"mammoth",
|
|
||||||
"markdownify~=0.14.1",
|
"markdownify~=0.14.1",
|
||||||
"numpy",
|
"numpy",
|
||||||
"pandas",
|
"pandas",
|
||||||
|
|
@ -46,8 +45,12 @@ dependencies = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
all = ["python-pptx"]
|
all = [
|
||||||
|
"python-pptx",
|
||||||
|
"mammoth"
|
||||||
|
]
|
||||||
pptx = ["python-pptx"]
|
pptx = ["python-pptx"]
|
||||||
|
docx = ["mammoth"]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Documentation = "https://github.com/microsoft/markitdown#readme"
|
Documentation = "https://github.com/microsoft/markitdown#readme"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from typing import Union
|
import sys
|
||||||
|
|
||||||
import mammoth
|
from typing import Union
|
||||||
|
|
||||||
from ._base import (
|
from ._base import (
|
||||||
DocumentConverterResult,
|
DocumentConverterResult,
|
||||||
|
|
@ -8,6 +8,16 @@ from ._base import (
|
||||||
|
|
||||||
from ._base import DocumentConverter
|
from ._base import DocumentConverter
|
||||||
from ._html_converter import HtmlConverter
|
from ._html_converter import HtmlConverter
|
||||||
|
from .._exceptions import MissingDependencyException
|
||||||
|
|
||||||
|
# Try loading optional (but in this case, required) dependencies
|
||||||
|
# Save reporting of any exceptions for later
|
||||||
|
_dependency_exc_info = None
|
||||||
|
try:
|
||||||
|
import mammoth
|
||||||
|
except ImportError:
|
||||||
|
# Preserve the error and stack trace for later
|
||||||
|
_dependency_exc_info = sys.exc_info()
|
||||||
|
|
||||||
|
|
||||||
class DocxConverter(HtmlConverter):
|
class DocxConverter(HtmlConverter):
|
||||||
|
|
@ -26,6 +36,19 @@ class DocxConverter(HtmlConverter):
|
||||||
if extension.lower() != ".docx":
|
if extension.lower() != ".docx":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Load the dependencies
|
||||||
|
if _dependency_exc_info is not None:
|
||||||
|
raise MissingDependencyException(
|
||||||
|
f"""{type(self).__name__} recognized the input as a potential .docx file, but the dependencies needed to read .docx files have not been installed. To resolve this error, include the optional dependency [docx] or [all] when installing MarkItDown. For example:
|
||||||
|
|
||||||
|
* pip install markitdown[docx]
|
||||||
|
* pip install markitdown[all]
|
||||||
|
* pip install markitdown[pptx, docx, ...]
|
||||||
|
* etc."""
|
||||||
|
) from _dependency_exc_info[1].with_traceback(
|
||||||
|
_dependency_exc_info[2]
|
||||||
|
) # Restore the original traceback
|
||||||
|
|
||||||
result = None
|
result = None
|
||||||
with open(local_path, "rb") as docx_file:
|
with open(local_path, "rb") as docx_file:
|
||||||
style_map = kwargs.get("style_map", None)
|
style_map = kwargs.get("style_map", None)
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ class PptxConverter(HtmlConverter):
|
||||||
# Load the dependencies
|
# Load the dependencies
|
||||||
if _dependency_exc_info is not None:
|
if _dependency_exc_info is not None:
|
||||||
raise MissingDependencyException(
|
raise MissingDependencyException(
|
||||||
f"""{type(self).__name__} recognized the input as a .pptx file, but the dependencies needed to read .pptx files have not been installed. To resolve this error, include the optional dependency [pptx] or [all] when installing MarkItDown. For example:
|
f"""{type(self).__name__} recognized the input as a potential .pptx file, but the dependencies needed to read .pptx files have not been installed. To resolve this error, include the optional dependency [pptx] or [all] when installing MarkItDown. For example:
|
||||||
|
|
||||||
* pip install markitdown[pptx]
|
* pip install markitdown[pptx]
|
||||||
* pip install markitdown[all]
|
* pip install markitdown[all]
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue