diff --git a/README.md b/README.md index 53bcb2b..418110c 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,12 @@ pip install git+https://github.com/pathintegral-institute/markitup.git@main#subd uv add git+https://github.com/pathintegral-institute/markitup.git@main#subdirectory=packages/markitup ``` +To use audio transciption using `pydub`, install `markitup[audio]`: +```bash +uv add "git+https://github.com/pathintegral-institute/markitup.git@main#subdirectory=packages/markitup[audio]" +``` + + ## Usage ```python from markitup.converter_utils.utils import read_files_to_bytestreams diff --git a/packages/markitup/pyproject.toml b/packages/markitup/pyproject.toml index ac2b95b..ecfb8a8 100644 --- a/packages/markitup/pyproject.toml +++ b/packages/markitup/pyproject.toml @@ -34,13 +34,22 @@ dependencies = [ "pandas", "lxml", "olefile", - "pydub", + # "pydub", <- Removed from here "SpeechRecognition", "pymupdf>=1.25.5", "openpyxl>=3.1.5", "xlrd>=2.0.1", ] +[project.optional-dependencies] +audio = [ + "pydub", +] +# Optional: You may want to create an 'all' extra that includes all optional dependencies +all = [ + "pydub", +] + [tool.hatch.version] path = "src/markitup/__about__.py" @@ -84,4 +93,4 @@ exclude_lines = [ ] [tool.hatch.build.targets.sdist] -only-include = ["src/markitup"] +only-include = ["src/markitup"] \ No newline at end of file diff --git a/packages/markitup/src/markitup/converter_utils/utils.py b/packages/markitup/src/markitup/converter_utils/utils.py index a62c3be..b8d337a 100644 --- a/packages/markitup/src/markitup/converter_utils/utils.py +++ b/packages/markitup/src/markitup/converter_utils/utils.py @@ -1,9 +1,6 @@ import os from io import BytesIO -from markitup._schemas import StreamInfo -import magic import speech_recognition as sr -import pydub import io from typing import BinaryIO @@ -47,6 +44,7 @@ def transcribe_audio(file_stream: BinaryIO, *, magic_type: str = "audio/mpeg") - match audio_format: case 'mp3': + import pydub audio_segment = pydub.AudioSegment.from_file( file_stream, format=audio_format) audio_source = io.BytesIO()