supports pptx
This commit is contained in:
parent
615975f918
commit
555a849a66
10 changed files with 277 additions and 169 deletions
|
|
@ -28,10 +28,7 @@ dependencies = [
|
|||
"markdownify",
|
||||
"magika~=0.6.1",
|
||||
"charset-normalizer",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
all = [
|
||||
"python-magic>=0.4.27",
|
||||
"python-pptx",
|
||||
"mammoth",
|
||||
"pandas",
|
||||
|
|
@ -46,12 +43,6 @@ all = [
|
|||
"azure-ai-documentintelligence",
|
||||
"azure-identity"
|
||||
]
|
||||
pptx = ["python-pptx"]
|
||||
docx = ["mammoth", "lxml"]
|
||||
xlsx = ["pandas", "openpyxl"]
|
||||
xls = ["pandas", "xlrd"]
|
||||
pdf = ["pdfminer.six"]
|
||||
|
||||
|
||||
[tool.hatch.version]
|
||||
path = "src/markitup/__about__.py"
|
||||
|
|
@ -60,16 +51,14 @@ path = "src/markitup/__about__.py"
|
|||
markitup = "markitup.__main__:main"
|
||||
|
||||
[tool.hatch.envs.default]
|
||||
features = ["all"]
|
||||
# No features needed since everything is installed by default
|
||||
|
||||
[tool.hatch.envs.hatch-test]
|
||||
features = ["all"]
|
||||
extra-dependencies = [
|
||||
"openai",
|
||||
]
|
||||
|
||||
[tool.hatch.envs.types]
|
||||
features = ["all"]
|
||||
extra-dependencies = [
|
||||
"openai",
|
||||
"mypy>=1.0.0",
|
||||
|
|
|
|||
|
|
@ -25,22 +25,12 @@ from ._uri_utils import parse_data_uri, file_uri_to_path
|
|||
from .converters import (
|
||||
PlainTextConverter,
|
||||
HtmlConverter,
|
||||
RssConverter,
|
||||
WikipediaConverter,
|
||||
YouTubeConverter,
|
||||
IpynbConverter,
|
||||
BingSerpConverter,
|
||||
PdfConverter,
|
||||
DocxConverter,
|
||||
XlsxConverter,
|
||||
XlsConverter,
|
||||
PptxConverter,
|
||||
ImageConverter,
|
||||
AudioConverter,
|
||||
OutlookMsgConverter,
|
||||
ZipConverter,
|
||||
EpubConverter,
|
||||
DocumentIntelligenceConverter,
|
||||
# AudioConverter,
|
||||
CsvConverter,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,31 +2,7 @@ from dataclasses import dataclass, asdict
|
|||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass(kw_only=True, frozen=True)
|
||||
@dataclass
|
||||
class StreamInfo:
|
||||
"""The StreamInfo class is used to store information about a file stream.
|
||||
All fields can be None, and will depend on how the stream was opened.
|
||||
"""
|
||||
|
||||
mimetype: Optional[str] = None
|
||||
extension: Optional[str] = None
|
||||
charset: Optional[str] = None
|
||||
filename: Optional[
|
||||
str
|
||||
] = None # From local path, url, or Content-Disposition header
|
||||
local_path: Optional[str] = None # If read from disk
|
||||
url: Optional[str] = None # If read from url
|
||||
|
||||
def copy_and_update(self, *args, **kwargs):
|
||||
"""Copy the StreamInfo object and update it with the given StreamInfo
|
||||
instance and/or other keyword arguments."""
|
||||
new_info = asdict(self)
|
||||
|
||||
for si in args:
|
||||
assert isinstance(si, StreamInfo)
|
||||
new_info.update({k: v for k, v in asdict(si).items() if v is not None})
|
||||
|
||||
if len(kwargs) > 0:
|
||||
new_info.update(kwargs)
|
||||
|
||||
return StreamInfo(**new_info)
|
||||
magic_type: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
102
packages/markitup/src/markitup/converter_utils/utils.py
Normal file
102
packages/markitup/src/markitup/converter_utils/utils.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import os
|
||||
from io import BytesIO
|
||||
from markitup._stream_info import StreamInfo
|
||||
import magic
|
||||
|
||||
|
||||
def read_files_to_bytestreams(folder_path="packages/markitup/tests/test_files"):
|
||||
"""
|
||||
Reads all files from the specified folder into BytesIO objects.
|
||||
|
||||
Args:
|
||||
folder_path (str): Path to the folder containing files
|
||||
|
||||
Returns:
|
||||
dict: Dictionary with filenames as keys and BytesIO objects as values
|
||||
"""
|
||||
byte_streams = {}
|
||||
|
||||
# Check if folder exists
|
||||
if not os.path.exists(folder_path):
|
||||
raise FileNotFoundError(f"Folder '{folder_path}' not found")
|
||||
|
||||
# Iterate through all files in the folder
|
||||
for filename in sorted(os.listdir(folder_path)):
|
||||
file_path = os.path.join(folder_path, filename)
|
||||
|
||||
# Check if it's a file (not a subdirectory)
|
||||
if os.path.isfile(file_path):
|
||||
# Read file in binary mode
|
||||
with open(file_path, "rb") as f:
|
||||
# Create BytesIO object with file content
|
||||
file_bytes = BytesIO(f.read())
|
||||
# Add to dictionary with filename as key
|
||||
byte_streams[filename] = file_bytes
|
||||
# Reset BytesIO position to beginning
|
||||
file_bytes.seek(0)
|
||||
|
||||
return byte_streams
|
||||
|
||||
|
||||
def detect_file_types(file_dict):
|
||||
"""
|
||||
Detects file types for a dictionary of {filename: BytesIO} pairs
|
||||
using only magic type (content-based detection)
|
||||
|
||||
Args:
|
||||
file_dict (dict): Dictionary with filenames as keys and BytesIO objects as values
|
||||
|
||||
Returns:
|
||||
dict: Dictionary with filenames as keys and file type information as values
|
||||
"""
|
||||
result = {}
|
||||
|
||||
for filename, byte_stream in file_dict.items():
|
||||
# Get the original position to reset later
|
||||
original_position = byte_stream.tell()
|
||||
|
||||
# Reset stream position to beginning
|
||||
byte_stream.seek(0)
|
||||
|
||||
# Get file content for analysis
|
||||
file_content = byte_stream.read()
|
||||
|
||||
# Use python-magic to determine file type based on content
|
||||
magic_type = magic.from_buffer(file_content, mime=True)
|
||||
|
||||
# Determine file category based on magic_type
|
||||
if magic_type.startswith("image/"):
|
||||
category = "image"
|
||||
elif magic_type.startswith("audio/"):
|
||||
category = "audio"
|
||||
elif magic_type.startswith("video/"):
|
||||
category = "video"
|
||||
elif (
|
||||
magic_type.startswith("application/vnd.ms-excel")
|
||||
or magic_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
):
|
||||
category = "xls"
|
||||
elif (
|
||||
magic_type.startswith("application/vnd.ms-powerpoint")
|
||||
or magic_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
):
|
||||
category = "ppt"
|
||||
elif (
|
||||
magic_type.startswith("application/msword")
|
||||
or magic_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
):
|
||||
category = "doc"
|
||||
elif magic_type == "application/pdf":
|
||||
category = "pdf"
|
||||
elif magic_type.startswith("text/"):
|
||||
category = "text"
|
||||
else:
|
||||
category = "other"
|
||||
|
||||
# Store the results
|
||||
result[filename] = StreamInfo(magic_type=magic_type, category=category)
|
||||
|
||||
# Reset stream position
|
||||
byte_stream.seek(original_position)
|
||||
|
||||
return result
|
||||
|
|
@ -8,13 +8,15 @@ from ._pdf_converter import PdfConverter
|
|||
from ._docx_converter import DocxConverter
|
||||
from ._xlsx_converter import XlsxConverter, XlsConverter
|
||||
from ._pptx_converter import PptxConverter
|
||||
from ._audio_converter import AudioConverter
|
||||
# from ._audio_converter import AudioConverter
|
||||
from ._csv_converter import CsvConverter
|
||||
from ._markdownify import _CustomMarkdownify
|
||||
|
||||
__all__ = [
|
||||
"PlainTextConverter",
|
||||
"HtmlConverter",
|
||||
"RssConverter",
|
||||
"_CustomMarkdownify",
|
||||
"WikipediaConverter",
|
||||
"YouTubeConverter",
|
||||
"IpynbConverter",
|
||||
|
|
@ -25,7 +27,7 @@ __all__ = [
|
|||
"XlsConverter",
|
||||
"PptxConverter",
|
||||
"ImageConverter",
|
||||
"AudioConverter",
|
||||
# "AudioConverter",
|
||||
"OutlookMsgConverter",
|
||||
"ZipConverter",
|
||||
"DocumentIntelligenceConverter",
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import io
|
|||
from typing import Any, BinaryIO, Optional
|
||||
|
||||
from ._exiftool import exiftool_metadata
|
||||
from ._transcribe_audio import transcribe_audio
|
||||
from .._base_converter import DocumentConverter, DocumentConverterResult
|
||||
from .._stream_info import StreamInfo
|
||||
from .._exceptions import MissingDependencyException
|
||||
|
|
|
|||
|
|
@ -6,12 +6,12 @@ from .._base_converter import DocumentConverter, DocumentConverterResult
|
|||
from .._stream_info import StreamInfo
|
||||
from ._markdownify import _CustomMarkdownify
|
||||
|
||||
ACCEPTED_MIME_TYPE_PREFIXES = [
|
||||
ACCEPTED_MAGIC_TYPE_PREFIXES = [
|
||||
"text/html",
|
||||
"application/xhtml",
|
||||
]
|
||||
|
||||
ACCEPTED_FILE_EXTENSIONS = [
|
||||
ACCEPTED_FILE_CATEGORY = [
|
||||
".html",
|
||||
".htm",
|
||||
]
|
||||
|
|
@ -26,14 +26,14 @@ class HtmlConverter(DocumentConverter):
|
|||
stream_info: StreamInfo,
|
||||
**kwargs: Any, # Options to pass to the converter
|
||||
) -> bool:
|
||||
mimetype = (stream_info.mimetype or "").lower()
|
||||
extension = (stream_info.extension or "").lower()
|
||||
magic_type = (stream_info.magic_type or "").lower()
|
||||
category = (stream_info.category or "").lower()
|
||||
|
||||
if extension in ACCEPTED_FILE_EXTENSIONS:
|
||||
if category in ACCEPTED_FILE_CATEGORY:
|
||||
return True
|
||||
|
||||
for prefix in ACCEPTED_MIME_TYPE_PREFIXES:
|
||||
if mimetype.startswith(prefix):
|
||||
for prefix in ACCEPTED_MAGIC_TYPE_PREFIXES:
|
||||
if magic_type.startswith(prefix):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
|
@ -45,7 +45,7 @@ class HtmlConverter(DocumentConverter):
|
|||
**kwargs: Any, # Options to pass to the converter
|
||||
) -> DocumentConverterResult:
|
||||
# Parse the stream
|
||||
encoding = "utf-8" if stream_info.charset is None else stream_info.charset
|
||||
encoding = "utf-8"
|
||||
soup = BeautifulSoup(file_stream, "html.parser", from_encoding=encoding)
|
||||
|
||||
# Remove javascript and style blocks
|
||||
|
|
@ -81,10 +81,8 @@ class HtmlConverter(DocumentConverter):
|
|||
return self.convert(
|
||||
file_stream=io.BytesIO(html_content.encode("utf-8")),
|
||||
stream_info=StreamInfo(
|
||||
mimetype="text/html",
|
||||
extension=".html",
|
||||
charset="utf-8",
|
||||
url=url,
|
||||
magic_type="text/html",
|
||||
category="text",
|
||||
),
|
||||
**kwargs,
|
||||
)
|
||||
|
|
|
|||
111
packages/markitup/src/markitup/converters/_markdownify.py
Normal file
111
packages/markitup/src/markitup/converters/_markdownify.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
import re
|
||||
import markdownify
|
||||
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import quote, unquote, urlparse, urlunparse
|
||||
|
||||
|
||||
class _CustomMarkdownify(markdownify.MarkdownConverter):
|
||||
"""
|
||||
A custom version of markdownify's MarkdownConverter. Changes include:
|
||||
|
||||
- Altering the default heading style to use '#', '##', etc.
|
||||
- Removing javascript hyperlinks.
|
||||
- Truncating images with large data:uri sources.
|
||||
- Ensuring URIs are properly escaped, and do not conflict with Markdown syntax
|
||||
"""
|
||||
|
||||
def __init__(self, **options: Any):
|
||||
options["heading_style"] = options.get("heading_style", markdownify.ATX)
|
||||
options["keep_data_uris"] = options.get("keep_data_uris", False)
|
||||
# Explicitly cast options to the expected type if necessary
|
||||
super().__init__(**options)
|
||||
|
||||
def convert_hn(
|
||||
self,
|
||||
n: int,
|
||||
el: Any,
|
||||
text: str,
|
||||
convert_as_inline: Optional[bool] = False,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""Same as usual, but be sure to start with a new line"""
|
||||
if not convert_as_inline:
|
||||
if not re.search(r"^\n", text):
|
||||
return "\n" + super().convert_hn(n, el, text, convert_as_inline) # type: ignore
|
||||
|
||||
return super().convert_hn(n, el, text, convert_as_inline) # type: ignore
|
||||
|
||||
def convert_a(
|
||||
self,
|
||||
el: Any,
|
||||
text: str,
|
||||
convert_as_inline: Optional[bool] = False,
|
||||
**kwargs,
|
||||
):
|
||||
"""Same as usual converter, but removes Javascript links and escapes URIs."""
|
||||
prefix, suffix, text = markdownify.chomp(text) # type: ignore
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
if el.find_parent("pre") is not None:
|
||||
return text
|
||||
|
||||
href = el.get("href")
|
||||
title = el.get("title")
|
||||
|
||||
# Escape URIs and skip non-http or file schemes
|
||||
if href:
|
||||
try:
|
||||
parsed_url = urlparse(href) # type: ignore
|
||||
if parsed_url.scheme and parsed_url.scheme.lower() not in ["http", "https", "file"]: # type: ignore
|
||||
return "%s%s%s" % (prefix, text, suffix)
|
||||
href = urlunparse(parsed_url._replace(path=quote(unquote(parsed_url.path)))) # type: ignore
|
||||
except ValueError: # It's not clear if this ever gets thrown
|
||||
return "%s%s%s" % (prefix, text, suffix)
|
||||
|
||||
# For the replacement see #29: text nodes underscores are escaped
|
||||
if (
|
||||
self.options["autolinks"]
|
||||
and text.replace(r"\_", "_") == href
|
||||
and not title
|
||||
and not self.options["default_title"]
|
||||
):
|
||||
# Shortcut syntax
|
||||
return "<%s>" % href
|
||||
if self.options["default_title"] and not title:
|
||||
title = href
|
||||
title_part = ' "%s"' % title.replace('"', r"\"") if title else ""
|
||||
return (
|
||||
"%s[%s](%s%s)%s" % (prefix, text, href, title_part, suffix)
|
||||
if href
|
||||
else text
|
||||
)
|
||||
|
||||
def convert_img(
|
||||
self,
|
||||
el: Any,
|
||||
text: str,
|
||||
convert_as_inline: Optional[bool] = False,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""Same as usual converter, but removes data URIs"""
|
||||
|
||||
alt = el.attrs.get("alt", None) or ""
|
||||
src = el.attrs.get("src", None) or ""
|
||||
title = el.attrs.get("title", None) or ""
|
||||
title_part = ' "%s"' % title.replace('"', r"\"") if title else ""
|
||||
if (
|
||||
convert_as_inline
|
||||
and el.parent.name not in self.options["keep_inline_images_in"]
|
||||
):
|
||||
return alt
|
||||
|
||||
# Remove dataURIs
|
||||
if src.startswith("data:") and not self.options["keep_data_uris"]:
|
||||
src = src.split(",")[0] + "..."
|
||||
|
||||
return "" % (alt, src, title_part)
|
||||
|
||||
def convert_soup(self, soup: Any) -> str:
|
||||
return super().convert_soup(soup) # type: ignore
|
||||
|
|
@ -11,23 +11,14 @@ from operator import attrgetter
|
|||
from ._html_converter import HtmlConverter
|
||||
from .._base_converter import DocumentConverter, DocumentConverterResult
|
||||
from .._stream_info import StreamInfo
|
||||
from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
|
||||
|
||||
# Try loading optional (but in this case, required) dependencies
|
||||
# Save reporting of any exceptions for later
|
||||
_dependency_exc_info = None
|
||||
try:
|
||||
import pptx
|
||||
except ImportError:
|
||||
# Preserve the error and stack trace for later
|
||||
_dependency_exc_info = sys.exc_info()
|
||||
import pptx
|
||||
|
||||
|
||||
ACCEPTED_MIME_TYPE_PREFIXES = [
|
||||
ACCEPTED_MAGIC_TYPE_PREFIXES = [
|
||||
"application/vnd.openxmlformats-officedocument.presentationml",
|
||||
]
|
||||
|
||||
ACCEPTED_FILE_EXTENSIONS = [".pptx"]
|
||||
ACCEPTED_FILE_CATEGORY = [".pptx"]
|
||||
|
||||
|
||||
class PptxConverter(DocumentConverter):
|
||||
|
|
@ -45,14 +36,14 @@ class PptxConverter(DocumentConverter):
|
|||
stream_info: StreamInfo,
|
||||
**kwargs: Any, # Options to pass to the converter
|
||||
) -> bool:
|
||||
mimetype = (stream_info.mimetype or "").lower()
|
||||
extension = (stream_info.extension or "").lower()
|
||||
magic_type = (stream_info.magic_type or "").lower()
|
||||
category = (stream_info.category or "").lower()
|
||||
|
||||
if extension in ACCEPTED_FILE_EXTENSIONS:
|
||||
if category in ACCEPTED_FILE_CATEGORY:
|
||||
return True
|
||||
|
||||
for prefix in ACCEPTED_MIME_TYPE_PREFIXES:
|
||||
if mimetype.startswith(prefix):
|
||||
for prefix in ACCEPTED_MAGIC_TYPE_PREFIXES:
|
||||
if magic_type.startswith(prefix):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
|
@ -63,19 +54,6 @@ class PptxConverter(DocumentConverter):
|
|||
stream_info: StreamInfo,
|
||||
**kwargs: Any, # Options to pass to the converter
|
||||
) -> DocumentConverterResult:
|
||||
# Check the dependencies
|
||||
if _dependency_exc_info is not None:
|
||||
raise MissingDependencyException(
|
||||
MISSING_DEPENDENCY_MESSAGE.format(
|
||||
converter=type(self).__name__,
|
||||
extension=".pptx",
|
||||
feature="pptx",
|
||||
)
|
||||
) from _dependency_exc_info[
|
||||
1
|
||||
].with_traceback( # type: ignore[union-attr]
|
||||
_dependency_exc_info[2]
|
||||
)
|
||||
|
||||
# Perform the conversion
|
||||
presentation = pptx.Presentation(file_stream)
|
||||
|
|
@ -109,15 +87,12 @@ class PptxConverter(DocumentConverter):
|
|||
alt_text = re.sub(r"\s+", " ", alt_text).strip()
|
||||
|
||||
# If keep_data_uris is True, use base64 encoding for images
|
||||
if kwargs.get("keep_data_uris", False):
|
||||
|
||||
blob = shape.image.blob
|
||||
content_type = shape.image.content_type or "image/png"
|
||||
b64_string = base64.b64encode(blob).decode("utf-8")
|
||||
md_content += f"\n\n"
|
||||
else:
|
||||
# A placeholder name
|
||||
filename = re.sub(r"\W", "", shape.name) + ".jpg"
|
||||
md_content += "\n\n"
|
||||
|
||||
|
||||
# Tables
|
||||
if self._is_table(shape):
|
||||
|
|
|
|||
|
|
@ -505,92 +505,49 @@ wheels = [
|
|||
name = "markitup"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "magika" },
|
||||
{ name = "markdownify" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
all = [
|
||||
{ name = "azure-ai-documentintelligence" },
|
||||
{ name = "azure-identity" },
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "lxml" },
|
||||
{ name = "magika" },
|
||||
{ name = "mammoth" },
|
||||
{ name = "markdownify" },
|
||||
{ name = "olefile" },
|
||||
{ name = "openpyxl" },
|
||||
{ name = "pandas" },
|
||||
{ name = "pdfminer-six" },
|
||||
{ name = "pydub" },
|
||||
{ name = "python-magic" },
|
||||
{ name = "python-pptx" },
|
||||
{ name = "requests" },
|
||||
{ name = "speechrecognition" },
|
||||
{ name = "xlrd" },
|
||||
{ name = "youtube-transcript-api" },
|
||||
]
|
||||
audio-transcription = [
|
||||
{ name = "pydub" },
|
||||
{ name = "speechrecognition" },
|
||||
]
|
||||
docx = [
|
||||
{ name = "lxml" },
|
||||
{ name = "mammoth" },
|
||||
]
|
||||
outlook = [
|
||||
{ name = "olefile" },
|
||||
]
|
||||
pdf = [
|
||||
{ name = "pdfminer-six" },
|
||||
]
|
||||
pptx = [
|
||||
{ name = "python-pptx" },
|
||||
]
|
||||
xls = [
|
||||
{ name = "pandas" },
|
||||
{ name = "xlrd" },
|
||||
]
|
||||
xlsx = [
|
||||
{ name = "openpyxl" },
|
||||
{ name = "pandas" },
|
||||
]
|
||||
youtube-transcription = [
|
||||
{ name = "youtube-transcript-api" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "azure-ai-documentintelligence", marker = "extra == 'all'" },
|
||||
{ name = "azure-identity", marker = "extra == 'all'" },
|
||||
{ name = "azure-ai-documentintelligence" },
|
||||
{ name = "azure-identity" },
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "lxml", marker = "extra == 'all'" },
|
||||
{ name = "lxml", marker = "extra == 'docx'" },
|
||||
{ name = "lxml" },
|
||||
{ name = "magika", specifier = "~=0.6.1" },
|
||||
{ name = "mammoth", marker = "extra == 'all'" },
|
||||
{ name = "mammoth", marker = "extra == 'docx'" },
|
||||
{ name = "mammoth" },
|
||||
{ name = "markdownify" },
|
||||
{ name = "olefile", marker = "extra == 'all'" },
|
||||
{ name = "olefile", marker = "extra == 'outlook'" },
|
||||
{ name = "openpyxl", marker = "extra == 'all'" },
|
||||
{ name = "openpyxl", marker = "extra == 'xlsx'" },
|
||||
{ name = "pandas", marker = "extra == 'all'" },
|
||||
{ name = "pandas", marker = "extra == 'xls'" },
|
||||
{ name = "pandas", marker = "extra == 'xlsx'" },
|
||||
{ name = "pdfminer-six", marker = "extra == 'all'" },
|
||||
{ name = "pdfminer-six", marker = "extra == 'pdf'" },
|
||||
{ name = "pydub", marker = "extra == 'all'" },
|
||||
{ name = "pydub", marker = "extra == 'audio-transcription'" },
|
||||
{ name = "python-pptx", marker = "extra == 'all'" },
|
||||
{ name = "python-pptx", marker = "extra == 'pptx'" },
|
||||
{ name = "olefile" },
|
||||
{ name = "openpyxl" },
|
||||
{ name = "pandas" },
|
||||
{ name = "pdfminer-six" },
|
||||
{ name = "pydub" },
|
||||
{ name = "python-magic", specifier = ">=0.4.27" },
|
||||
{ name = "python-pptx" },
|
||||
{ name = "requests" },
|
||||
{ name = "speechrecognition", marker = "extra == 'all'" },
|
||||
{ name = "speechrecognition", marker = "extra == 'audio-transcription'" },
|
||||
{ name = "xlrd", marker = "extra == 'all'" },
|
||||
{ name = "xlrd", marker = "extra == 'xls'" },
|
||||
{ name = "youtube-transcript-api", marker = "extra == 'all'", specifier = "~=1.0.0" },
|
||||
{ name = "youtube-transcript-api", marker = "extra == 'youtube-transcription'" },
|
||||
{ name = "speechrecognition" },
|
||||
{ name = "xlrd" },
|
||||
{ name = "youtube-transcript-api", specifier = "~=1.0.0" },
|
||||
]
|
||||
provides-extras = ["all", "audio-transcription", "docx", "outlook", "pdf", "pptx", "xls", "xlsx", "youtube-transcription"]
|
||||
|
||||
[[package]]
|
||||
name = "mpmath"
|
||||
|
|
@ -966,6 +923,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-magic"
|
||||
version = "0.4.27"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/da/db/0b3e28ac047452d079d375ec6798bf76a036a08182dbb39ed38116a49130/python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b", size = 14677 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-pptx"
|
||||
version = "1.0.2"
|
||||
|
|
|
|||
Loading…
Reference in a new issue