make pydub an optional import

This commit is contained in:
rong-xyz 2025-04-24 06:56:00 +00:00
parent da2cb0a796
commit 2ac9cdc120
3 changed files with 18 additions and 5 deletions

View file

@ -30,6 +30,12 @@ pip install git+https://github.com/pathintegral-institute/markitup.git@main#subd
uv add git+https://github.com/pathintegral-institute/markitup.git@main#subdirectory=packages/markitup
```
To use audio transciption using `pydub`, install `markitup[audio]`:
```bash
uv add "git+https://github.com/pathintegral-institute/markitup.git@main#subdirectory=packages/markitup[audio]"
```
## Usage
```python
from markitup.converter_utils.utils import read_files_to_bytestreams

View file

@ -34,13 +34,22 @@ dependencies = [
"pandas",
"lxml",
"olefile",
"pydub",
# "pydub", <- Removed from here
"SpeechRecognition",
"pymupdf>=1.25.5",
"openpyxl>=3.1.5",
"xlrd>=2.0.1",
]
[project.optional-dependencies]
audio = [
"pydub",
]
# Optional: You may want to create an 'all' extra that includes all optional dependencies
all = [
"pydub",
]
[tool.hatch.version]
path = "src/markitup/__about__.py"
@ -84,4 +93,4 @@ exclude_lines = [
]
[tool.hatch.build.targets.sdist]
only-include = ["src/markitup"]
only-include = ["src/markitup"]

View file

@ -1,9 +1,6 @@
import os
from io import BytesIO
from markitup._schemas import StreamInfo
import magic
import speech_recognition as sr
import pydub
import io
from typing import BinaryIO
@ -47,6 +44,7 @@ def transcribe_audio(file_stream: BinaryIO, *, magic_type: str = "audio/mpeg") -
match audio_format:
case 'mp3':
import pydub
audio_segment = pydub.AudioSegment.from_file(
file_stream, format=audio_format)
audio_source = io.BytesIO()