diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index e13e299..0000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,32 +0,0 @@ -// For format details, see https://aka.ms/devcontainer.json. For config options, see the -// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile -{ - "name": "Existing Dockerfile", - "build": { - // Sets the run context to one level up instead of the .devcontainer folder. - "context": "..", - // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename. - "dockerfile": "../Dockerfile", - "args": { - "INSTALL_GIT": "true" - } - }, - - // Features to add to the dev container. More info: https://containers.dev/features. - // "features": {}, - "features": { - "ghcr.io/devcontainers-extra/features/hatch:2": {} - }, - - // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], - - // Uncomment the next line to run commands after the container is created. - // "postCreateCommand": "cat /etc/os-release", - - // Configure tool-specific properties. - // "customizations": {}, - - // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root. - "remoteUser": "root" -} diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 319b932..0000000 --- a/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!packages/ diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index f787c0e..0000000 --- a/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -packages/markitdown/tests/test_files/** linguist-vendored -packages/markitdown-sample-plugin/tests/test_files/** linguist-vendored diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 5ace460..0000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,6 +0,0 @@ -version: 2 -updates: - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "weekly" diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml deleted file mode 100644 index 321f823..0000000 --- a/.github/workflows/pre-commit.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: pre-commit -on: [pull_request] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.x" - - - name: Install pre-commit - run: | - pip install pre-commit - pre-commit install --install-hooks - - - name: Run pre-commit - run: pre-commit run --all-files diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index 78c7cdc..0000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: tests -on: [pull_request] - -jobs: - tests: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: | - 3.10 - 3.11 - 3.12 - - name: Install Hatch - run: pipx install hatch - - name: Run tests - run: cd packages/markitdown; hatch test diff --git a/.gitignore b/.gitignore index 7f0de2b..2b2fe1a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ __pycache__/ *.py[cod] *$py.class - +/working # C extensions *.so diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 3ed7a92..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -repos: - - repo: https://github.com/psf/black - rev: 23.7.0 # Use the latest version of Black - hooks: - - id: black diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index f9ba8cf..0000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,9 +0,0 @@ -# Microsoft Open Source Code of Conduct - -This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). - -Resources: - -- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) -- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) -- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index c65bf9c..0000000 --- a/Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -FROM python:3.13-slim-bullseye - -ENV DEBIAN_FRONTEND=noninteractive -ENV EXIFTOOL_PATH=/usr/bin/exiftool -ENV FFMPEG_PATH=/usr/bin/ffmpeg - -# Runtime dependency -RUN apt-get update && apt-get install -y --no-install-recommends \ - ffmpeg \ - exiftool - -ARG INSTALL_GIT=false -RUN if [ "$INSTALL_GIT" = "true" ]; then \ - apt-get install -y --no-install-recommends \ - git; \ - fi - -# Cleanup -RUN rm -rf /var/lib/apt/lists/* - -WORKDIR /app -COPY . /app -RUN pip --no-cache-dir install \ - /app/packages/markitdown[all] \ - /app/packages/markitdown-sample-plugin - -# Default USERID and GROUPID -ARG USERID=nobody -ARG GROUPID=nogroup - -USER $USERID:$GROUPID - -ENTRYPOINT [ "markitdown" ] diff --git a/README.md b/README.md index 0433a66..16164e5 100644 --- a/README.md +++ b/README.md @@ -1,223 +1,9 @@ -# MarkItDown +# MarkItUp -[![PyPI](https://img.shields.io/pypi/v/markitdown.svg)](https://pypi.org/project/markitdown/) -![PyPI - Downloads](https://img.shields.io/pypi/dd/markitdown) -[![Built by AutoGen Team](https://img.shields.io/badge/Built%20by-AutoGen%20Team-blue)](https://github.com/microsoft/autogen) +This is a fork of [MarkItDown](https://github.com/microsoft/markitdown). -> [!TIP] -> MarkItDown now offers an MCP (Model Context Protocol) server for integration with LLM applications like Claude Desktop. See [markitdown-mcp](https://github.com/microsoft/markitdown/tree/main/packages/markitdown-mcp) for more information. +While markitup is a useful tool, its returned content is too text-focused, which is not updated to the current rise of multi-modal LLMs. -> [!IMPORTANT] -> Breaking changes between 0.0.1 to 0.1.0: -> * Dependencies are now organized into optional feature-groups (further details below). Use `pip install 'markitdown[all]'` to have backward-compatible behavior. -> * convert\_stream() now requires a binary file-like object (e.g., a file opened in binary mode, or an io.BytesIO object). This is a breaking change from the previous version, where it previously also accepted text file-like objects, like io.StringIO. -> * The DocumentConverter class interface has changed to read from file-like streams rather than file paths. *No temporary files are created anymore*. If you are the maintainer of a plugin, or custom DocumentConverter, you likely need to update your code. Otherwise, if only using the MarkItDown class or CLI (as in these examples), you should not need to change anything. +## Features -MarkItDown is a lightweight Python utility for converting various files to Markdown for use with LLMs and related text analysis pipelines. To this end, it is most comparable to [textract](https://github.com/deanmalmgren/textract), but with a focus on preserving important document structure and content as Markdown (including: headings, lists, tables, links, etc.) While the output is often reasonably presentable and human-friendly, it is meant to be consumed by text analysis tools -- and may not be the best option for high-fidelity document conversions for human consumption. - -At present, MarkItDown supports: - -- PDF -- PowerPoint -- Word -- Excel -- Images (EXIF metadata and OCR) -- Audio (EXIF metadata and speech transcription) -- HTML -- Text-based formats (CSV, JSON, XML) -- ZIP files (iterates over contents) -- Youtube URLs -- EPubs -- ... and more! - -## Why Markdown? - -Markdown is extremely close to plain text, with minimal markup or formatting, but still -provides a way to represent important document structure. Mainstream LLMs, such as -OpenAI's GPT-4o, natively "_speak_" Markdown, and often incorporate Markdown into their -responses unprompted. This suggests that they have been trained on vast amounts of -Markdown-formatted text, and understand it well. As a side benefit, Markdown conventions -are also highly token-efficient. - -## Installation - -To install MarkItDown, use pip: `pip install 'markitdown[all]'`. Alternatively, you can install it from the source: - -```bash -git clone git@github.com:microsoft/markitdown.git -cd markitdown -pip install -e 'packages/markitdown[all]' -``` - -## Usage - -### Command-Line - -```bash -markitdown path-to-file.pdf > document.md -``` - -Or use `-o` to specify the output file: - -```bash -markitdown path-to-file.pdf -o document.md -``` - -You can also pipe content: - -```bash -cat path-to-file.pdf | markitdown -``` - -### Optional Dependencies -MarkItDown has optional dependencies for activating various file formats. Earlier in this document, we installed all optional dependencies with the `[all]` option. However, you can also install them individually for more control. For example: - -```bash -pip install 'markitdown[pdf, docx, pptx]' -``` - -will install only the dependencies for PDF, DOCX, and PPTX files. - -At the moment, the following optional dependencies are available: - -* `[all]` Installs all optional dependencies -* `[pptx]` Installs dependencies for PowerPoint files -* `[docx]` Installs dependencies for Word files -* `[xlsx]` Installs dependencies for Excel files -* `[xls]` Installs dependencies for older Excel files -* `[pdf]` Installs dependencies for PDF files -* `[outlook]` Installs dependencies for Outlook messages -* `[az-doc-intel]` Installs dependencies for Azure Document Intelligence -* `[audio-transcription]` Installs dependencies for audio transcription of wav and mp3 files -* `[youtube-transcription]` Installs dependencies for fetching YouTube video transcription - -### Plugins - -MarkItDown also supports 3rd-party plugins. Plugins are disabled by default. To list installed plugins: - -```bash -markitdown --list-plugins -``` - -To enable plugins use: - -```bash -markitdown --use-plugins path-to-file.pdf -``` - -To find available plugins, search GitHub for the hashtag `#markitdown-plugin`. To develop a plugin, see `packages/markitdown-sample-plugin`. - -### Azure Document Intelligence - -To use Microsoft Document Intelligence for conversion: - -```bash -markitdown path-to-file.pdf -o document.md -d -e "" -``` - -More information about how to set up an Azure Document Intelligence Resource can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/document-intelligence/how-to-guides/create-document-intelligence-resource?view=doc-intel-4.0.0) - -### Python API - -Basic usage in Python: - -```python -from markitdown import MarkItDown - -md = MarkItDown(enable_plugins=False) # Set to True to enable plugins -result = md.convert("test.xlsx") -print(result.text_content) -``` - -Document Intelligence conversion in Python: - -```python -from markitdown import MarkItDown - -md = MarkItDown(docintel_endpoint="") -result = md.convert("test.pdf") -print(result.text_content) -``` - -To use Large Language Models for image descriptions, provide `llm_client` and `llm_model`: - -```python -from markitdown import MarkItDown -from openai import OpenAI - -client = OpenAI() -md = MarkItDown(llm_client=client, llm_model="gpt-4o") -result = md.convert("example.jpg") -print(result.text_content) -``` - -### Docker - -```sh -docker build -t markitdown:latest . -docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md -``` - -## Contributing - -This project welcomes contributions and suggestions. Most contributions require you to agree to a -Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us -the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. - -When you submit a pull request, a CLA bot will automatically determine whether you need to provide -a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions -provided by the bot. You will only need to do this once across all repos using our CLA. - -This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or -contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. - -### How to Contribute - -You can help by looking at issues or helping review PRs. Any issue or PR is welcome, but we have also marked some as 'open for contribution' and 'open for reviewing' to help facilitate community contributions. These are ofcourse just suggestions and you are welcome to contribute in any way you like. - -
- -| | All | Especially Needs Help from Community | -| ---------- | ------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------- | -| **Issues** | [All Issues](https://github.com/microsoft/markitdown/issues) | [Issues open for contribution](https://github.com/microsoft/markitdown/issues?q=is%3Aissue+is%3Aopen+label%3A%22open+for+contribution%22) | -| **PRs** | [All PRs](https://github.com/microsoft/markitdown/pulls) | [PRs open for reviewing](https://github.com/microsoft/markitdown/pulls?q=is%3Apr+is%3Aopen+label%3A%22open+for+reviewing%22) | - -
- -### Running Tests and Checks - -- Navigate to the MarkItDown package: - - ```sh - cd packages/markitdown - ``` - -- Install `hatch` in your environment and run tests: - - ```sh - pip install hatch # Other ways of installing hatch: https://hatch.pypa.io/dev/install/ - hatch shell - hatch test - ``` - - (Alternative) Use the Devcontainer which has all the dependencies installed: - - ```sh - # Reopen the project in Devcontainer and run: - hatch test - ``` - -- Run pre-commit checks before submitting a PR: `pre-commit run --all-files` - -### Contributing 3rd-party Plugins - -You can also contribute by creating and sharing 3rd party plugins. See `packages/markitdown-sample-plugin` for more details. - -## Trademarks - -This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft -trademarks or logos is subject to and must follow -[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). -Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. -Any use of third-party trademarks or logos are subject to those third-party's policies. +We plan to return an OpenAI compatible response, which can be used by most LLM clients. \ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index b3c89ef..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,41 +0,0 @@ - - -## Security - -Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). - -If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. - -## Reporting Security Issues - -**Please do not report security vulnerabilities through public GitHub issues.** - -Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). - -If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). - -You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). - -Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: - - * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) - * Full paths of source file(s) related to the manifestation of the issue - * The location of the affected source code (tag/branch/commit or direct URL) - * Any special configuration required to reproduce the issue - * Step-by-step instructions to reproduce the issue - * Proof-of-concept or exploit code (if possible) - * Impact of the issue, including how an attacker might exploit the issue - -This information will help us triage your report more quickly. - -If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. - -## Preferred Languages - -We prefer all communications to be in English. - -## Policy - -Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). - - diff --git a/SUPPORT.md b/SUPPORT.md deleted file mode 100644 index 291d4d4..0000000 --- a/SUPPORT.md +++ /dev/null @@ -1,25 +0,0 @@ -# TODO: The maintainer of this repo has not yet edited this file - -**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? - -- **No CSS support:** Fill out this template with information about how to file issues and get help. -- **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. -- **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. - -*Then remove this first heading from this SUPPORT.MD file before publishing your repo.* - -# Support - -## How to file issues and get help - -This project uses GitHub Issues to track bugs and feature requests. Please search the existing -issues before filing new issues to avoid duplicates. For new issues, file your bug or -feature request as a new Issue. - -For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE -FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER -CHANNEL. WHERE WILL YOU HELP PEOPLE?**. - -## Microsoft Support Policy - -Support for this **PROJECT or PRODUCT** is limited to the resources listed above. diff --git a/packages/markitdown-mcp/Dockerfile b/packages/markitup-mcp/Dockerfile similarity index 93% rename from packages/markitdown-mcp/Dockerfile rename to packages/markitup-mcp/Dockerfile index fe52a4b..540a2c6 100644 --- a/packages/markitdown-mcp/Dockerfile +++ b/packages/markitup-mcp/Dockerfile @@ -23,4 +23,4 @@ ARG GROUPID=nogroup USER $USERID:$GROUPID -ENTRYPOINT [ "markitdown-mcp" ] +ENTRYPOINT [ "markitup-mcp" ] diff --git a/packages/markitdown-mcp/README.md b/packages/markitup-mcp/README.md similarity index 79% rename from packages/markitdown-mcp/README.md rename to packages/markitup-mcp/README.md index 5b92fb3..0ee5a84 100644 --- a/packages/markitdown-mcp/README.md +++ b/packages/markitup-mcp/README.md @@ -1,10 +1,10 @@ -# MarkItDown-MCP +# MarkItUp-MCP -[![PyPI](https://img.shields.io/pypi/v/markitdown-mcp.svg)](https://pypi.org/project/markitdown-mcp/) -![PyPI - Downloads](https://img.shields.io/pypi/dd/markitdown-mcp) +[![PyPI](https://img.shields.io/pypi/v/markitup-mcp.svg)](https://pypi.org/project/markitup-mcp/) +![PyPI - Downloads](https://img.shields.io/pypi/dd/markitup-mcp) [![Built by AutoGen Team](https://img.shields.io/badge/Built%20by-AutoGen%20Team-blue)](https://github.com/microsoft/autogen) -The `markitdown-mcp` package provides a lightweight STDIO and SSE MCP server for calling MarkItDown. +The `markitup-mcp` package provides a lightweight STDIO and SSE MCP server for calling MarkItUp. It exposes one tool: `convert_to_markdown(uri)`, where uri can be any `http:`, `https:`, `file:`, or `data:` URI. @@ -13,7 +13,7 @@ It exposes one tool: `convert_to_markdown(uri)`, where uri can be any `http:`, ` To install the package, use pip: ```bash -pip install markitdown-mcp +pip install markitup-mcp ``` ## Usage @@ -22,30 +22,30 @@ To run the MCP server, ussing STDIO (default) use the following command: ```bash -markitdown-mcp +markitup-mcp ``` To run the MCP server, using SSE use the following command: ```bash -markitdown-mcp --sse --host 127.0.0.1 --port 3001 +markitup-mcp --sse --host 127.0.0.1 --port 3001 ``` ## Running in Docker -To run `markitdown-mcp` in Docker, build the Docker image using the provided Dockerfile: +To run `markitup-mcp` in Docker, build the Docker image using the provided Dockerfile: ```bash -docker build -t markitdown-mcp:latest . +docker build -t markitup-mcp:latest . ``` And run it using: ```bash -docker run -it --rm markitdown-mcp:latest +docker run -it --rm markitup-mcp:latest ``` This will be sufficient for remote URIs. To access local files, you need to mount the local directory into the container. For example, if you want to access files in `/home/user/data`, you can run: ```bash -docker run -it --rm -v /home/user/data:/workdir markitdown-mcp:latest +docker run -it --rm -v /home/user/data:/workdir markitup-mcp:latest ``` Once mounted, all files under data will be accessible under `/workdir` in the container. For example, if you have a file `example.txt` in `/home/user/data`, it will be accessible in the container at `/workdir/example.txt`. @@ -61,13 +61,13 @@ Edit it to include the following JSON entry: ```json { "mcpServers": { - "markitdown": { + "markitup": { "command": "docker", "args": [ "run", "--rm", "-i", - "markitdown-mcp:latest" + "markitup-mcp:latest" ] } } @@ -79,7 +79,7 @@ If you want to mount a directory, adjust it accordingly: ```json { "mcpServers": { - "markitdown": { + "markitup": { "command": "docker", "args": [ "run", @@ -87,7 +87,7 @@ If you want to mount a directory, adjust it accordingly: "-i", "-v", "/home/user/data:/workdir", - "markitdown-mcp:latest" + "markitup-mcp:latest" ] } } @@ -106,7 +106,7 @@ You can then connect to the insepctor through the specified host and port (e.g., If using STDIO: * select `STDIO` as the transport type, -* input `markitdown-mcp` as the command, and +* input `markitup-mcp` as the command, and * click `Connect` If using SSE: diff --git a/packages/markitdown-mcp/pyproject.toml b/packages/markitup-mcp/pyproject.toml similarity index 64% rename from packages/markitdown-mcp/pyproject.toml rename to packages/markitup-mcp/pyproject.toml index 6cbc0e5..2797a20 100644 --- a/packages/markitdown-mcp/pyproject.toml +++ b/packages/markitup-mcp/pyproject.toml @@ -3,9 +3,9 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "markitdown-mcp" +name = "markitup-mcp" dynamic = ["version"] -description = 'An MCP server for the "markitdown" library.' +description = 'An MCP server for the "markitup" library.' readme = "README.md" requires-python = ">=3.10" license = "MIT" @@ -25,38 +25,38 @@ classifiers = [ ] dependencies = [ "mcp~=1.5.0", - "markitdown[all]>=0.1.1,<0.2.0", + "markitup[all]>=0.1.1,<0.2.0", ] [project.urls] -Documentation = "https://github.com/microsoft/markitdown#readme" -Issues = "https://github.com/microsoft/markitdown/issues" -Source = "https://github.com/microsoft/markitdown" +Documentation = "https://github.com/microsoft/markitup#readme" +Issues = "https://github.com/microsoft/markitup/issues" +Source = "https://github.com/microsoft/markitup" [tool.hatch.version] -path = "src/markitdown_mcp/__about__.py" +path = "src/markitup_mcp/__about__.py" [project.scripts] -markitdown-mcp = "markitdown_mcp.__main__:main" +markitup-mcp = "markitup_mcp.__main__:main" [tool.hatch.envs.types] extra-dependencies = [ "mypy>=1.0.0", ] [tool.hatch.envs.types.scripts] -check = "mypy --install-types --non-interactive {args:src/markitdown_mcp tests}" +check = "mypy --install-types --non-interactive {args:src/markitup_mcp tests}" [tool.coverage.run] -source_pkgs = ["markitdown-mcp", "tests"] +source_pkgs = ["markitup-mcp", "tests"] branch = true parallel = true omit = [ - "src/markitdown_mcp/__about__.py", + "src/markitup_mcp/__about__.py", ] [tool.coverage.paths] -markitdown-mcp = ["src/markitdown_mcp", "*/markitdown-mcp/src/markitdown_mcp"] -tests = ["tests", "*/markitdown-mcp/tests"] +markitup-mcp = ["src/markitup_mcp", "*/markitup-mcp/src/markitup_mcp"] +tests = ["tests", "*/markitup-mcp/tests"] [tool.coverage.report] exclude_lines = [ @@ -66,4 +66,4 @@ exclude_lines = [ ] [tool.hatch.build.targets.sdist] -only-include = ["src/markitdown_mcp"] +only-include = ["src/markitup_mcp"] diff --git a/packages/markitdown-mcp/src/markitdown_mcp/__about__.py b/packages/markitup-mcp/src/markitup_mcp/__about__.py similarity index 100% rename from packages/markitdown-mcp/src/markitdown_mcp/__about__.py rename to packages/markitup-mcp/src/markitup_mcp/__about__.py diff --git a/packages/markitdown-mcp/src/markitdown_mcp/__init__.py b/packages/markitup-mcp/src/markitup_mcp/__init__.py similarity index 100% rename from packages/markitdown-mcp/src/markitdown_mcp/__init__.py rename to packages/markitup-mcp/src/markitup_mcp/__init__.py diff --git a/packages/markitdown-mcp/src/markitdown_mcp/__main__.py b/packages/markitup-mcp/src/markitup_mcp/__main__.py similarity index 92% rename from packages/markitdown-mcp/src/markitdown_mcp/__main__.py rename to packages/markitup-mcp/src/markitup_mcp/__main__.py index 32b7527..ce16b33 100644 --- a/packages/markitdown-mcp/src/markitdown_mcp/__main__.py +++ b/packages/markitup-mcp/src/markitup_mcp/__main__.py @@ -6,17 +6,17 @@ from mcp.server.sse import SseServerTransport from starlette.requests import Request from starlette.routing import Mount, Route from mcp.server import Server -from markitdown import MarkItDown +from markitup import MarkItUp import uvicorn -# Initialize FastMCP server for MarkItDown (SSE) -mcp = FastMCP("markitdown") +# Initialize FastMCP server for MarkItUp (SSE) +mcp = FastMCP("markitup") @mcp.tool() async def convert_to_markdown(uri: str) -> str: """Convert a resource described by an http:, https:, file: or data: URI to markdown""" - return MarkItDown().convert_uri(uri).markdown + return MarkItUp().convert_uri(uri).markdown def create_starlette_app(mcp_server: Server, *, debug: bool = False) -> Starlette: @@ -49,7 +49,7 @@ def main(): mcp_server = mcp._mcp_server - parser = argparse.ArgumentParser(description="Run MCP SSE-based MarkItDown server") + parser = argparse.ArgumentParser(description="Run MCP SSE-based MarkItUp server") parser.add_argument( "--sse", diff --git a/packages/markitdown-mcp/src/markitdown_mcp/py.typed b/packages/markitup-mcp/src/markitup_mcp/py.typed similarity index 100% rename from packages/markitdown-mcp/src/markitdown_mcp/py.typed rename to packages/markitup-mcp/src/markitup_mcp/py.typed diff --git a/packages/markitdown-mcp/tests/__init__.py b/packages/markitup-mcp/tests/__init__.py similarity index 100% rename from packages/markitdown-mcp/tests/__init__.py rename to packages/markitup-mcp/tests/__init__.py diff --git a/packages/markitdown-sample-plugin/README.md b/packages/markitup-sample-plugin/README.md similarity index 70% rename from packages/markitdown-sample-plugin/README.md rename to packages/markitup-sample-plugin/README.md index adf1d9e..98340dc 100644 --- a/packages/markitdown-sample-plugin/README.md +++ b/packages/markitup-sample-plugin/README.md @@ -1,17 +1,17 @@ -# MarkItDown Sample Plugin +# MarkItUp Sample Plugin -[![PyPI](https://img.shields.io/pypi/v/markitdown-sample-plugin.svg)](https://pypi.org/project/markitdown-sample-plugin/) -![PyPI - Downloads](https://img.shields.io/pypi/dd/markitdown-sample-plugin) +[![PyPI](https://img.shields.io/pypi/v/markitup-sample-plugin.svg)](https://pypi.org/project/markitup-sample-plugin/) +![PyPI - Downloads](https://img.shields.io/pypi/dd/markitup-sample-plugin) [![Built by AutoGen Team](https://img.shields.io/badge/Built%20by-AutoGen%20Team-blue)](https://github.com/microsoft/autogen) -This project shows how to create a sample plugin for MarkItDown. The most important parts are as follows: +This project shows how to create a sample plugin for MarkItUp. The most important parts are as follows: Next, implement your custom DocumentConverter: ```python from typing import BinaryIO, Any -from markitdown import MarkItDown, DocumentConverter, DocumentConverterResult, StreamInfo +from markitup import MarkItUp, DocumentConverter, DocumentConverterResult, StreamInfo class RtfConverter(DocumentConverter): @@ -51,22 +51,22 @@ Next, make sure your package implements and exports the following: # The only supported version is 1 for now. __plugin_interface_version__ = 1 -# The main entrypoint for the plugin. This is called each time MarkItDown instances are created. -def register_converters(markitdown: MarkItDown, **kwargs): +# The main entrypoint for the plugin. This is called each time MarkItUp instances are created. +def register_converters(markitup: MarkItUp, **kwargs): """ - Called during construction of MarkItDown instances to register converters provided by plugins. + Called during construction of MarkItUp instances to register converters provided by plugins. """ # Simply create and attach an RtfConverter instance - markitdown.register_converter(RtfConverter()) + markitup.register_converter(RtfConverter()) ``` Finally, create an entrypoint in the `pyproject.toml` file: ```toml -[project.entry-points."markitdown.plugin"] -sample_plugin = "markitdown_sample_plugin" +[project.entry-points."markitup.plugin"] +sample_plugin = "markitup_sample_plugin" ``` Here, the value of `sample_plugin` can be any key, but should ideally be the name of the plugin. The value is the fully qualified name of the package implementing the plugin. @@ -74,30 +74,30 @@ Here, the value of `sample_plugin` can be any key, but should ideally be the nam ## Installation -To use the plugin with MarkItDown, it must be installed. To install the plugin from the current directory use: +To use the plugin with MarkItUp, it must be installed. To install the plugin from the current directory use: ```bash pip install -e . ``` -Once the plugin package is installed, verify that it is available to MarkItDown by running: +Once the plugin package is installed, verify that it is available to MarkItUp by running: ```bash -markitdown --list-plugins +markitup --list-plugins ``` To use the plugin for a conversion use the `--use-plugins` flag. For example, to convert an RTF file: ```bash -markitdown --use-plugins path-to-file.rtf +markitup --use-plugins path-to-file.rtf ``` In Python, plugins can be enabled as follows: ```python -from markitdown import MarkItDown +from markitup import MarkItUp -md = MarkItDown(enable_plugins=True) +md = MarkItUp(enable_plugins=True) result = md.convert("path-to-file.rtf") print(result.text_content) ``` diff --git a/packages/markitdown-sample-plugin/pyproject.toml b/packages/markitup-sample-plugin/pyproject.toml similarity index 60% rename from packages/markitdown-sample-plugin/pyproject.toml rename to packages/markitup-sample-plugin/pyproject.toml index 4721036..ce26a00 100644 --- a/packages/markitdown-sample-plugin/pyproject.toml +++ b/packages/markitup-sample-plugin/pyproject.toml @@ -3,9 +3,9 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "markitdown-sample-plugin" +name = "markitup-sample-plugin" dynamic = ["version"] -description = 'A sample plugin for the "markitdown" library.' +description = 'A sample plugin for the "markitup" library.' readme = "README.md" requires-python = ">=3.10" license = "MIT" @@ -24,40 +24,40 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "markitdown>=0.1.0a1", + "markitup>=0.1.0a1", "striprtf", ] [project.urls] -Documentation = "https://github.com/microsoft/markitdown#readme" -Issues = "https://github.com/microsoft/markitdown/issues" -Source = "https://github.com/microsoft/markitdown" +Documentation = "https://github.com/microsoft/markitup#readme" +Issues = "https://github.com/microsoft/markitup/issues" +Source = "https://github.com/microsoft/markitup" [tool.hatch.version] -path = "src/markitdown_sample_plugin/__about__.py" +path = "src/markitup_sample_plugin/__about__.py" # IMPORTANT: MarkItDown will look for this entry point to find the plugin. -[project.entry-points."markitdown.plugin"] -sample_plugin = "markitdown_sample_plugin" +[project.entry-points."markitup.plugin"] +sample_plugin = "markitup_sample_plugin" [tool.hatch.envs.types] extra-dependencies = [ "mypy>=1.0.0", ] [tool.hatch.envs.types.scripts] -check = "mypy --install-types --non-interactive {args:src/markitdown_sample_plugin tests}" +check = "mypy --install-types --non-interactive {args:src/markitup_sample_plugin tests}" [tool.coverage.run] -source_pkgs = ["markitdown-sample-plugin", "tests"] +source_pkgs = ["markitup-sample-plugin", "tests"] branch = true parallel = true omit = [ - "src/markitdown_sample_plugin/__about__.py", + "src/markitup_sample_plugin/__about__.py", ] [tool.coverage.paths] -markitdown-sample-plugin = ["src/markitdown_sample_plugin", "*/markitdown-sample-plugin/src/markitdown_sample_plugin"] -tests = ["tests", "*/markitdown-sample-plugin/tests"] +markitup-sample-plugin = ["src/markitup_sample_plugin", "*/markitup-sample-plugin/src/markitup_sample_plugin"] +tests = ["tests", "*/markitup-sample-plugin/tests"] [tool.coverage.report] exclude_lines = [ @@ -67,4 +67,4 @@ exclude_lines = [ ] [tool.hatch.build.targets.sdist] -only-include = ["src/markitdown_sample_plugin"] +only-include = ["src/markitup_sample_plugin"] diff --git a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/__about__.py b/packages/markitup-sample-plugin/src/markitup_sample_plugin/__about__.py similarity index 100% rename from packages/markitdown-sample-plugin/src/markitdown_sample_plugin/__about__.py rename to packages/markitup-sample-plugin/src/markitup_sample_plugin/__about__.py diff --git a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/__init__.py b/packages/markitup-sample-plugin/src/markitup_sample_plugin/__init__.py similarity index 100% rename from packages/markitdown-sample-plugin/src/markitdown_sample_plugin/__init__.py rename to packages/markitup-sample-plugin/src/markitup_sample_plugin/__init__.py diff --git a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/_plugin.py b/packages/markitup-sample-plugin/src/markitup_sample_plugin/_plugin.py similarity index 86% rename from packages/markitdown-sample-plugin/src/markitdown_sample_plugin/_plugin.py rename to packages/markitup-sample-plugin/src/markitup_sample_plugin/_plugin.py index 1ca00cc..971efa1 100644 --- a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/_plugin.py +++ b/packages/markitup-sample-plugin/src/markitup_sample_plugin/_plugin.py @@ -2,8 +2,8 @@ import locale from typing import BinaryIO, Any from striprtf.striprtf import rtf_to_text -from markitdown import ( - MarkItDown, +from markitup import ( + MarkItUp, DocumentConverter, DocumentConverterResult, StreamInfo, @@ -22,13 +22,13 @@ ACCEPTED_MIME_TYPE_PREFIXES = [ ACCEPTED_FILE_EXTENSIONS = [".rtf"] -def register_converters(markitdown: MarkItDown, **kwargs): +def register_converters(markitup: MarkItUp, **kwargs): """ - Called during construction of MarkItDown instances to register converters provided by plugins. + Called during construction of MarkItUp instances to register converters provided by plugins. """ # Simply create and attach an RtfConverter instance - markitdown.register_converter(RtfConverter()) + markitup.register_converter(RtfConverter()) class RtfConverter(DocumentConverter): diff --git a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/py.typed b/packages/markitup-sample-plugin/src/markitup_sample_plugin/py.typed similarity index 100% rename from packages/markitdown-sample-plugin/src/markitdown_sample_plugin/py.typed rename to packages/markitup-sample-plugin/src/markitup_sample_plugin/py.typed diff --git a/packages/markitdown-sample-plugin/tests/__init__.py b/packages/markitup-sample-plugin/tests/__init__.py similarity index 100% rename from packages/markitdown-sample-plugin/tests/__init__.py rename to packages/markitup-sample-plugin/tests/__init__.py diff --git a/packages/markitdown-sample-plugin/tests/test_files/test.rtf b/packages/markitup-sample-plugin/tests/test_files/test.rtf similarity index 100% rename from packages/markitdown-sample-plugin/tests/test_files/test.rtf rename to packages/markitup-sample-plugin/tests/test_files/test.rtf diff --git a/packages/markitdown-sample-plugin/tests/test_sample_plugin.py b/packages/markitup-sample-plugin/tests/test_sample_plugin.py similarity index 73% rename from packages/markitdown-sample-plugin/tests/test_sample_plugin.py rename to packages/markitup-sample-plugin/tests/test_sample_plugin.py index 6d0102d..07e2f08 100644 --- a/packages/markitdown-sample-plugin/tests/test_sample_plugin.py +++ b/packages/markitup-sample-plugin/tests/test_sample_plugin.py @@ -2,14 +2,14 @@ import os import pytest -from markitdown import MarkItDown, StreamInfo -from markitdown_sample_plugin import RtfConverter +from markitup import MarkItUp, StreamInfo +from markitup_sample_plugin import RtfConverter TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "test_files") RTF_TEST_STRINGS = { "This is a Sample RTF File", - "It is included to test if the MarkItDown sample plugin can correctly convert RTF files.", + "It is included to test if the MarkItUp sample plugin can correctly convert RTF files.", } @@ -28,9 +28,9 @@ def test_converter() -> None: assert test_string in result.text_content -def test_markitdown() -> None: - """Tests that MarkItDown correctly loads the plugin.""" - md = MarkItDown(enable_plugins=True) +def test_markitup() -> None: + """Tests that MarkItUp correctly loads the plugin.""" + md = MarkItUp(enable_plugins=True) result = md.convert(os.path.join(TEST_FILES_DIR, "test.rtf")) for test_string in RTF_TEST_STRINGS: @@ -40,5 +40,5 @@ def test_markitdown() -> None: if __name__ == "__main__": """Runs this file's tests from the command line.""" test_converter() - test_markitdown() + test_markitup() print("All tests passed.") diff --git a/packages/markitdown/README.md b/packages/markitup/README.md similarity index 63% rename from packages/markitdown/README.md rename to packages/markitup/README.md index edd2701..907dfef 100644 --- a/packages/markitdown/README.md +++ b/packages/markitup/README.md @@ -1,24 +1,24 @@ -# MarkItDown +# MarkItUp > [!IMPORTANT] -> MarkItDown is a Python package and command-line utility for converting various files to Markdown (e.g., for indexing, text analysis, etc). +> MarkItUp is a Python package and command-line utility for converting various files to Markdown (e.g., for indexing, text analysis, etc). > -> For more information, and full documentation, see the project [README.md](https://github.com/microsoft/markitdown) on GitHub. +> For more information, and full documentation, see the project [README.md](https://github.com/microsoft/markitup) on GitHub. ## Installation From PyPI: ```bash -pip install markitdown[all] +pip install markitup[all] ``` From source: ```bash -git clone git@github.com:microsoft/markitdown.git -cd markitdown -pip install -e packages/markitdown[all] +git clone git@github.com:microsoft/markitup.git +cd markitup +pip install -e packages/markitup[all] ``` ## Usage @@ -26,22 +26,22 @@ pip install -e packages/markitdown[all] ### Command-Line ```bash -markitdown path-to-file.pdf > document.md +markitup path-to-file.pdf > document.md ``` ### Python API ```python -from markitdown import MarkItDown +from markitup import MarkItUp -md = MarkItDown() +md = MarkItUp() result = md.convert("test.xlsx") print(result.text_content) ``` ### More Information -For more information, and full documentation, see the project [README.md](https://github.com/microsoft/markitdown) on GitHub. +For more information, and full documentation, see the project [README.md](https://github.com/microsoft/markitup) on GitHub. ## Trademarks diff --git a/packages/markitdown/ThirdPartyNotices.md b/packages/markitup/ThirdPartyNotices.md similarity index 97% rename from packages/markitdown/ThirdPartyNotices.md rename to packages/markitup/ThirdPartyNotices.md index 44edd8f..fc7f8eb 100644 --- a/packages/markitdown/ThirdPartyNotices.md +++ b/packages/markitup/ThirdPartyNotices.md @@ -3,7 +3,7 @@ **Do Not Translate or Localize** This project incorporates components from the projects listed below. The original copyright notices and the licenses -under which MarkItDown received such components are set forth below. MarkItDown reserves all rights not expressly +under which MarkItUp received such components are set forth below. MarkItUp reserves all rights not expressly granted herein, whether by implication, estoppel or otherwise. 1.dwml (https://github.com/xiilei/dwml) @@ -16,11 +16,11 @@ NOTE 1: What follows is a verbatim copy of dwml's LICENSE file, as it appeared o placeholders for the copyright owner and year. NOTE 2: The Apache License, Version 2.0, requires that modifications to the dwml source code be documented. -The following section summarizes these changes. The full details are available in the MarkItDown source code -repository under PR #1160 (https://github.com/microsoft/markitdown/pull/1160) +The following section summarizes these changes. The full details are available in the MarkItUp source code +repository under PR #1160 (https://github.com/microsoft/markitup/pull/1160) This project incorporates `dwml/latex_dict.py` and `dwml/omml.py` files without any additional logic modifications (which -lives in `packages/markitdown/src/markitdown/converter_utils/docx/math` location). However, we have reformatted the code +lives in `packages/markitup/src/markitup/converter_utils/docx/math` location). However, we have reformatted the code according to `black` code formatter. From `tests/docx.py` file, we have used `DOCXML_ROOT` XML namespaces and the rest of the file is not used. diff --git a/packages/markitdown/pyproject.toml b/packages/markitup/pyproject.toml similarity index 79% rename from packages/markitdown/pyproject.toml rename to packages/markitup/pyproject.toml index 79f67d2..cb8e31d 100644 --- a/packages/markitdown/pyproject.toml +++ b/packages/markitup/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "markitdown" +name = "markitup" dynamic = ["version"] description = 'Utility tool for converting various files to Markdown' readme = "README.md" @@ -58,15 +58,15 @@ youtube-transcription = ["youtube-transcript-api"] az-doc-intel = ["azure-ai-documentintelligence", "azure-identity"] [project.urls] -Documentation = "https://github.com/microsoft/markitdown#readme" -Issues = "https://github.com/microsoft/markitdown/issues" -Source = "https://github.com/microsoft/markitdown" +Documentation = "https://github.com/microsoft/markitup#readme" +Issues = "https://github.com/microsoft/markitup/issues" +Source = "https://github.com/microsoft/markitup" [tool.hatch.version] -path = "src/markitdown/__about__.py" +path = "src/markitup/__about__.py" [project.scripts] -markitdown = "markitdown.__main__:main" +markitup = "markitup.__main__:main" [tool.hatch.envs.default] features = ["all"] @@ -85,19 +85,19 @@ extra-dependencies = [ ] [tool.hatch.envs.types.scripts] -check = "mypy --install-types --non-interactive --ignore-missing-imports {args:src/markitdown tests}" +check = "mypy --install-types --non-interactive --ignore-missing-imports {args:src/markitup tests}" [tool.coverage.run] -source_pkgs = ["markitdown", "tests"] +source_pkgs = ["markitup", "tests"] branch = true parallel = true omit = [ - "src/markitdown/__about__.py", + "src/markitup/__about__.py", ] [tool.coverage.paths] -markitdown = ["src/markitdown", "*/markitdown/src/markitdown"] -tests = ["tests", "*/markitdown/tests"] +markitup = ["src/markitup", "*/markitup/src/markitup"] +tests = ["tests", "*/markitup/tests"] [tool.coverage.report] exclude_lines = [ @@ -107,4 +107,4 @@ exclude_lines = [ ] [tool.hatch.build.targets.sdist] -only-include = ["src/markitdown"] +only-include = ["src/markitup"] diff --git a/packages/markitdown/src/markitdown/__about__.py b/packages/markitup/src/markitup/__about__.py similarity index 100% rename from packages/markitdown/src/markitdown/__about__.py rename to packages/markitup/src/markitup/__about__.py diff --git a/packages/markitdown/src/markitdown/__init__.py b/packages/markitup/src/markitup/__init__.py similarity index 87% rename from packages/markitdown/src/markitdown/__init__.py rename to packages/markitup/src/markitup/__init__.py index af356dd..18b38aa 100644 --- a/packages/markitdown/src/markitdown/__init__.py +++ b/packages/markitup/src/markitup/__init__.py @@ -3,15 +3,15 @@ # SPDX-License-Identifier: MIT from .__about__ import __version__ -from ._markitdown import ( - MarkItDown, +from ._markitup import ( + MarkItUp, PRIORITY_SPECIFIC_FILE_FORMAT, PRIORITY_GENERIC_FILE_FORMAT, ) from ._base_converter import DocumentConverterResult, DocumentConverter from ._stream_info import StreamInfo from ._exceptions import ( - MarkItDownException, + MarkItUpException, MissingDependencyException, FailedConversionAttempt, FileConversionException, @@ -20,10 +20,10 @@ from ._exceptions import ( __all__ = [ "__version__", - "MarkItDown", + "MarkItUp", "DocumentConverter", "DocumentConverterResult", - "MarkItDownException", + "MarkItUpException", "MissingDependencyException", "FailedConversionAttempt", "FileConversionException", diff --git a/packages/markitdown/src/markitdown/__main__.py b/packages/markitup/src/markitup/__main__.py similarity index 88% rename from packages/markitdown/src/markitdown/__main__.py rename to packages/markitup/src/markitup/__main__.py index cfb1c6e..fb24577 100644 --- a/packages/markitdown/src/markitdown/__main__.py +++ b/packages/markitup/src/markitup/__main__.py @@ -8,40 +8,40 @@ import locale from textwrap import dedent from importlib.metadata import entry_points from .__about__ import __version__ -from ._markitdown import MarkItDown, StreamInfo, DocumentConverterResult +from ._markitup import MarkItUp, StreamInfo, DocumentConverterResult def main(): parser = argparse.ArgumentParser( description="Convert various file formats to markdown.", - prog="markitdown", + prog="markitup", formatter_class=argparse.RawDescriptionHelpFormatter, usage=dedent( """ SYNTAX: - markitdown - If FILENAME is empty, markitdown reads from stdin. + markitup + If FILENAME is empty, markitup reads from stdin. EXAMPLE: - markitdown example.pdf + markitup example.pdf OR - cat example.pdf | markitdown + cat example.pdf | markitup OR - markitdown < example.pdf + markitup < example.pdf OR to save to a file use - markitdown example.pdf -o example.md + markitup example.pdf -o example.md OR - markitdown example.pdf > example.md + markitup example.pdf > example.md """ ).strip(), ) @@ -158,12 +158,12 @@ def main(): if args.list_plugins: # List installed plugins, then exit - print("Installed MarkItDown 3rd-party Plugins:\n") - plugin_entry_points = list(entry_points(group="markitdown.plugin")) + print("Installed MarkItUp 3rd-party Plugins:\n") + plugin_entry_points = list(entry_points(group="markitup.plugin")) if len(plugin_entry_points) == 0: print(" * No 3rd-party plugins installed.") print( - "\nFind plugins by searching for the hashtag #markitdown-plugin on GitHub.\n" + "\nFind plugins by searching for the hashtag #markitup-plugin on GitHub.\n" ) else: for entry_point in plugin_entry_points: @@ -181,20 +181,20 @@ def main(): elif args.filename is None: _exit_with_error("Filename is required when using Document Intelligence.") - markitdown = MarkItDown( + markitup = MarkItUp( enable_plugins=args.use_plugins, docintel_endpoint=args.endpoint ) else: - markitdown = MarkItDown(enable_plugins=args.use_plugins) + markitup = MarkItUp(enable_plugins=args.use_plugins) if args.filename is None: - result = markitdown.convert_stream( + result = markitup.convert_stream( sys.stdin.buffer, stream_info=stream_info, keep_data_uris=args.keep_data_uris, ) else: - result = markitdown.convert( + result = markitup.convert( args.filename, stream_info=stream_info, keep_data_uris=args.keep_data_uris ) diff --git a/packages/markitdown/src/markitdown/_base_converter.py b/packages/markitup/src/markitup/_base_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/_base_converter.py rename to packages/markitup/src/markitup/_base_converter.py diff --git a/packages/markitdown/src/markitdown/_exceptions.py b/packages/markitup/src/markitup/_exceptions.py similarity index 81% rename from packages/markitdown/src/markitdown/_exceptions.py rename to packages/markitup/src/markitup/_exceptions.py index 2f87ac8..fca098f 100644 --- a/packages/markitdown/src/markitdown/_exceptions.py +++ b/packages/markitup/src/markitup/_exceptions.py @@ -1,24 +1,24 @@ from typing import Optional, List, Any -MISSING_DEPENDENCY_MESSAGE = """{converter} recognized the input as a potential {extension} file, but the dependencies needed to read {extension} files have not been installed. To resolve this error, include the optional dependency [{feature}] or [all] when installing MarkItDown. For example: +MISSING_DEPENDENCY_MESSAGE = """{converter} recognized the input as a potential {extension} file, but the dependencies needed to read {extension} files have not been installed. To resolve this error, include the optional dependency [{feature}] or [all] when installing MarkItUp. For example: -* pip install markitdown[{feature}] -* pip install markitdown[all] -* pip install markitdown[{feature}, ...] +* pip install markitup[{feature}] +* pip install markitup[all] +* pip install markitup[{feature}, ...] * etc.""" -class MarkItDownException(Exception): +class MarkItUpException(Exception): """ - Base exception class for MarkItDown. + Base exception class for MarkItUp. """ pass -class MissingDependencyException(MarkItDownException): +class MissingDependencyException(MarkItUpException): """ - Converters shipped with MarkItDown may depend on optional + Converters shipped with MarkItUp may depend on optional dependencies. This exception is thrown when a converter's convert() method is called, but the required dependency is not installed. This is not necessarily a fatal error, as the converter @@ -31,7 +31,7 @@ class MissingDependencyException(MarkItDownException): pass -class UnsupportedFormatException(MarkItDownException): +class UnsupportedFormatException(MarkItUpException): """ Thrown when no suitable converter was found for the given file. """ @@ -49,7 +49,7 @@ class FailedConversionAttempt(object): self.exc_info = exc_info -class FileConversionException(MarkItDownException): +class FileConversionException(MarkItUpException): """ Thrown when a suitable converter was found, but the conversion process fails for any reason. diff --git a/packages/markitdown/src/markitdown/_markitdown.py b/packages/markitup/src/markitup/_markitup.py similarity index 99% rename from packages/markitdown/src/markitdown/_markitdown.py rename to packages/markitup/src/markitup/_markitup.py index 682902b..9777286 100644 --- a/packages/markitdown/src/markitdown/_markitdown.py +++ b/packages/markitup/src/markitup/_markitup.py @@ -75,7 +75,7 @@ def _load_plugins() -> Union[None, List[Any]]: # Load plugins _plugins = [] - for entry_point in entry_points(group="markitdown.plugin"): + for entry_point in entry_points(group="markitup.plugin"): try: _plugins.append(entry_point.load()) except Exception: @@ -93,7 +93,7 @@ class ConverterRegistration: priority: float -class MarkItDown: +class MarkItUp: """(In preview) An extremely simple text-based document reader, suitable for LLM use. This reader will convert common file-types or webpages to Markdown.""" @@ -176,7 +176,7 @@ class MarkItDown: PlainTextConverter(), priority=PRIORITY_GENERIC_FILE_FORMAT ) self.register_converter( - ZipConverter(markitdown=self), priority=PRIORITY_GENERIC_FILE_FORMAT + ZipConverter(markitup=self), priority=PRIORITY_GENERIC_FILE_FORMAT ) self.register_converter( HtmlConverter(), priority=PRIORITY_GENERIC_FILE_FORMAT diff --git a/packages/markitdown/src/markitdown/_stream_info.py b/packages/markitup/src/markitup/_stream_info.py similarity index 100% rename from packages/markitdown/src/markitdown/_stream_info.py rename to packages/markitup/src/markitup/_stream_info.py diff --git a/packages/markitdown/src/markitdown/_uri_utils.py b/packages/markitup/src/markitup/_uri_utils.py similarity index 100% rename from packages/markitdown/src/markitdown/_uri_utils.py rename to packages/markitup/src/markitup/_uri_utils.py diff --git a/packages/markitdown/src/markitdown/converter_utils/__init__.py b/packages/markitup/src/markitup/converter_utils/__init__.py similarity index 100% rename from packages/markitdown/src/markitdown/converter_utils/__init__.py rename to packages/markitup/src/markitup/converter_utils/__init__.py diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/__init__.py b/packages/markitup/src/markitup/converter_utils/docx/__init__.py similarity index 100% rename from packages/markitdown/src/markitdown/converter_utils/docx/__init__.py rename to packages/markitup/src/markitup/converter_utils/docx/__init__.py diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/math/__init__.py b/packages/markitup/src/markitup/converter_utils/docx/math/__init__.py similarity index 100% rename from packages/markitdown/src/markitdown/converter_utils/docx/math/__init__.py rename to packages/markitup/src/markitup/converter_utils/docx/math/__init__.py diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/math/latex_dict.py b/packages/markitup/src/markitup/converter_utils/docx/math/latex_dict.py similarity index 100% rename from packages/markitdown/src/markitdown/converter_utils/docx/math/latex_dict.py rename to packages/markitup/src/markitup/converter_utils/docx/math/latex_dict.py diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py b/packages/markitup/src/markitup/converter_utils/docx/math/omml.py similarity index 100% rename from packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py rename to packages/markitup/src/markitup/converter_utils/docx/math/omml.py diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py b/packages/markitup/src/markitup/converter_utils/docx/pre_process.py similarity index 100% rename from packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py rename to packages/markitup/src/markitup/converter_utils/docx/pre_process.py diff --git a/packages/markitdown/src/markitdown/converters/__init__.py b/packages/markitup/src/markitup/converters/__init__.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/__init__.py rename to packages/markitup/src/markitup/converters/__init__.py diff --git a/packages/markitdown/src/markitdown/converters/_audio_converter.py b/packages/markitup/src/markitup/converters/_audio_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_audio_converter.py rename to packages/markitup/src/markitup/converters/_audio_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py b/packages/markitup/src/markitup/converters/_bing_serp_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_bing_serp_converter.py rename to packages/markitup/src/markitup/converters/_bing_serp_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_csv_converter.py b/packages/markitup/src/markitup/converters/_csv_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_csv_converter.py rename to packages/markitup/src/markitup/converters/_csv_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py b/packages/markitup/src/markitup/converters/_doc_intel_converter.py similarity index 99% rename from packages/markitdown/src/markitdown/converters/_doc_intel_converter.py rename to packages/markitup/src/markitup/converters/_doc_intel_converter.py index d2dce91..c71d7cc 100644 --- a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py +++ b/packages/markitup/src/markitup/converters/_doc_intel_converter.py @@ -161,7 +161,7 @@ class DocumentIntelligenceConverter(DocumentConverter): # unless explicitly requested. if _dependency_exc_info is not None: raise MissingDependencyException( - "DocumentIntelligenceConverter requires the optional dependency [az-doc-intel] (or [all]) to be installed. E.g., `pip install markitdown[az-doc-intel]`" + "DocumentIntelligenceConverter requires the optional dependency [az-doc-intel] (or [all]) to be installed. E.g., `pip install markitup[az-doc-intel]`" ) from _dependency_exc_info[ 1 ].with_traceback( # type: ignore[union-attr] diff --git a/packages/markitdown/src/markitdown/converters/_docx_converter.py b/packages/markitup/src/markitup/converters/_docx_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_docx_converter.py rename to packages/markitup/src/markitup/converters/_docx_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_epub_converter.py b/packages/markitup/src/markitup/converters/_epub_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_epub_converter.py rename to packages/markitup/src/markitup/converters/_epub_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_exiftool.py b/packages/markitup/src/markitup/converters/_exiftool.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_exiftool.py rename to packages/markitup/src/markitup/converters/_exiftool.py diff --git a/packages/markitdown/src/markitdown/converters/_html_converter.py b/packages/markitup/src/markitup/converters/_html_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_html_converter.py rename to packages/markitup/src/markitup/converters/_html_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_image_converter.py b/packages/markitup/src/markitup/converters/_image_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_image_converter.py rename to packages/markitup/src/markitup/converters/_image_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py b/packages/markitup/src/markitup/converters/_ipynb_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_ipynb_converter.py rename to packages/markitup/src/markitup/converters/_ipynb_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_llm_caption.py b/packages/markitup/src/markitup/converters/_llm_caption.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_llm_caption.py rename to packages/markitup/src/markitup/converters/_llm_caption.py diff --git a/packages/markitdown/src/markitdown/converters/_markdownify.py b/packages/markitup/src/markitup/converters/_markdownify.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_markdownify.py rename to packages/markitup/src/markitup/converters/_markdownify.py diff --git a/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py b/packages/markitup/src/markitup/converters/_outlook_msg_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py rename to packages/markitup/src/markitup/converters/_outlook_msg_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_pdf_converter.py b/packages/markitup/src/markitup/converters/_pdf_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_pdf_converter.py rename to packages/markitup/src/markitup/converters/_pdf_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py b/packages/markitup/src/markitup/converters/_plain_text_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_plain_text_converter.py rename to packages/markitup/src/markitup/converters/_plain_text_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_pptx_converter.py b/packages/markitup/src/markitup/converters/_pptx_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_pptx_converter.py rename to packages/markitup/src/markitup/converters/_pptx_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_rss_converter.py b/packages/markitup/src/markitup/converters/_rss_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_rss_converter.py rename to packages/markitup/src/markitup/converters/_rss_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_transcribe_audio.py b/packages/markitup/src/markitup/converters/_transcribe_audio.py similarity index 95% rename from packages/markitdown/src/markitdown/converters/_transcribe_audio.py rename to packages/markitup/src/markitup/converters/_transcribe_audio.py index d558e46..5e09d23 100644 --- a/packages/markitdown/src/markitdown/converters/_transcribe_audio.py +++ b/packages/markitup/src/markitup/converters/_transcribe_audio.py @@ -24,7 +24,7 @@ def transcribe_audio(file_stream: BinaryIO, *, audio_format: str = "wav") -> str # Check for installed dependencies if _dependency_exc_info is not None: raise MissingDependencyException( - "Speech transcription requires installing MarkItdown with the [audio-transcription] optional dependencies. E.g., `pip install markitdown[audio-transcription]` or `pip install markitdown[all]`" + "Speech transcription requires installing MarkItdown with the [audio-transcription] optional dependencies. E.g., `pip install markitup[audio-transcription]` or `pip install markitup[all]`" ) from _dependency_exc_info[ 1 ].with_traceback( # type: ignore[union-attr] diff --git a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py b/packages/markitup/src/markitup/converters/_wikipedia_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_wikipedia_converter.py rename to packages/markitup/src/markitup/converters/_wikipedia_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_xlsx_converter.py b/packages/markitup/src/markitup/converters/_xlsx_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_xlsx_converter.py rename to packages/markitup/src/markitup/converters/_xlsx_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_youtube_converter.py b/packages/markitup/src/markitup/converters/_youtube_converter.py similarity index 100% rename from packages/markitdown/src/markitdown/converters/_youtube_converter.py rename to packages/markitup/src/markitup/converters/_youtube_converter.py diff --git a/packages/markitdown/src/markitdown/converters/_zip_converter.py b/packages/markitup/src/markitup/converters/_zip_converter.py similarity index 95% rename from packages/markitdown/src/markitdown/converters/_zip_converter.py rename to packages/markitup/src/markitup/converters/_zip_converter.py index cb1a7e6..897ff72 100644 --- a/packages/markitdown/src/markitdown/converters/_zip_converter.py +++ b/packages/markitup/src/markitup/converters/_zip_converter.py @@ -11,7 +11,7 @@ from .._exceptions import UnsupportedFormatException, FileConversionException # Break otherwise circular import for type hinting if TYPE_CHECKING: - from .._markitdown import MarkItDown + from .._markitup import MarkItUp ACCEPTED_MIME_TYPE_PREFIXES = [ "application/zip", @@ -62,10 +62,10 @@ class ZipConverter(DocumentConverter): def __init__( self, *, - markitdown: "MarkItDown", + markitup: "MarkItUp", ): super().__init__() - self._markitdown = markitdown + self._markitup = markitup def accepts( self, @@ -102,7 +102,7 @@ class ZipConverter(DocumentConverter): extension=os.path.splitext(name)[1], filename=os.path.basename(name), ) - result = self._markitdown.convert_stream( + result = self._markitup.convert_stream( stream=z_file_stream, stream_info=z_file_stream_info, ) diff --git a/packages/markitdown/src/markitdown/py.typed b/packages/markitup/src/markitup/py.typed similarity index 100% rename from packages/markitdown/src/markitdown/py.typed rename to packages/markitup/src/markitup/py.typed diff --git a/packages/markitdown/tests/__init__.py b/packages/markitup/tests/__init__.py similarity index 100% rename from packages/markitdown/tests/__init__.py rename to packages/markitup/tests/__init__.py diff --git a/packages/markitdown/tests/_test_vectors.py b/packages/markitup/tests/_test_vectors.py similarity index 99% rename from packages/markitdown/tests/_test_vectors.py rename to packages/markitup/tests/_test_vectors.py index 74fa9bd..f46cd23 100644 --- a/packages/markitdown/tests/_test_vectors.py +++ b/packages/markitup/tests/_test_vectors.py @@ -183,7 +183,7 @@ GENERAL_TEST_VECTORS = [ must_include=[ "# Test Notebook", "```python", - 'print("markitdown")', + 'print("markitup")', "```", "## Code Cell Below", ], @@ -223,7 +223,7 @@ GENERAL_TEST_VECTORS = [ url=None, must_include=[ "**Authors:** Test Author", - "A test EPUB document for MarkItDown testing", + "A test EPUB document for MarkItUp testing", "# Chapter 1: Test Content", "This is a **test** paragraph with some formatting", "* A bullet point", diff --git a/packages/markitdown/tests/test_cli_misc.py b/packages/markitup/tests/test_cli_misc.py similarity index 82% rename from packages/markitdown/tests/test_cli_misc.py rename to packages/markitup/tests/test_cli_misc.py index 345d5cc..3e22fec 100644 --- a/packages/markitdown/tests/test_cli_misc.py +++ b/packages/markitup/tests/test_cli_misc.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -m pytest import subprocess import pytest -from markitdown import __version__ +from markitup import __version__ # This file contains CLI tests that are not directly tested by the FileTestVectors. # This includes things like help messages, version numbers, and invalid flags. @@ -9,7 +9,7 @@ from markitdown import __version__ def test_version() -> None: result = subprocess.run( - ["python", "-m", "markitdown", "--version"], capture_output=True, text=True + ["python", "-m", "markitup", "--version"], capture_output=True, text=True ) assert result.returncode == 0, f"CLI exited with error: {result.stderr}" @@ -18,7 +18,7 @@ def test_version() -> None: def test_invalid_flag() -> None: result = subprocess.run( - ["python", "-m", "markitdown", "--foobar"], capture_output=True, text=True + ["python", "-m", "markitup", "--foobar"], capture_output=True, text=True ) assert result.returncode != 0, f"CLI exited with error: {result.stderr}" diff --git a/packages/markitdown/tests/test_cli_vectors.py b/packages/markitup/tests/test_cli_vectors.py similarity index 95% rename from packages/markitdown/tests/test_cli_vectors.py rename to packages/markitup/tests/test_cli_vectors.py index 6030482..d24ad61 100644 --- a/packages/markitdown/tests/test_cli_vectors.py +++ b/packages/markitup/tests/test_cli_vectors.py @@ -19,8 +19,8 @@ else: FileTestVector, ) -from markitdown import ( - MarkItDown, +from markitup import ( + MarkItUp, UnsupportedFormatException, FileConversionException, StreamInfo, @@ -31,7 +31,7 @@ skip_remote = ( ) # Don't run these tests in CI TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "test_files") -TEST_FILES_URL = "https://raw.githubusercontent.com/microsoft/markitdown/refs/heads/main/packages/markitdown/tests/test_files" +TEST_FILES_URL = "https://raw.githubusercontent.com/microsoft/markitup/refs/heads/main/packages/markitup/tests/test_files" # Prepare CLI test vectors (remove vectors that require mockig the url) @@ -55,7 +55,7 @@ def test_output_to_stdout(shared_tmp_dir, test_vector) -> None: [ "python", "-m", - "markitdown", + "markitup", os.path.join(TEST_FILES_DIR, test_vector.filename), ], capture_output=True, @@ -78,7 +78,7 @@ def test_output_to_file(shared_tmp_dir, test_vector) -> None: [ "python", "-m", - "markitdown", + "markitup", "-o", output_file, os.path.join(TEST_FILES_DIR, test_vector.filename), @@ -113,7 +113,7 @@ def test_input_from_stdin_without_hints(shared_tmp_dir, test_vector) -> None: [ "python", "-m", - "markitdown", + "markitup", os.path.join(TEST_FILES_DIR, test_vector.filename), ], input=test_input, @@ -140,11 +140,11 @@ def test_convert_url(shared_tmp_dir, test_vector): """Test the conversion of a stream with no stream info.""" # Note: tmp_dir is not used here, but is needed to match the signature - markitdown = MarkItDown() + markitup = MarkItUp() time.sleep(1) # Ensure we don't hit rate limits result = subprocess.run( - ["python", "-m", "markitdown", TEST_FILES_URL + "/" + test_vector.filename], + ["python", "-m", "markitup", TEST_FILES_URL + "/" + test_vector.filename], capture_output=True, text=False, ) @@ -166,7 +166,7 @@ def test_output_to_file_with_data_uris(shared_tmp_dir, test_vector) -> None: [ "python", "-m", - "markitdown", + "markitup", "--keep-data-uris", "-o", output_file, diff --git a/packages/markitdown/tests/test_files/equations.docx b/packages/markitup/tests/test_files/equations.docx similarity index 100% rename from packages/markitdown/tests/test_files/equations.docx rename to packages/markitup/tests/test_files/equations.docx diff --git a/packages/markitdown/tests/test_files/random.bin b/packages/markitup/tests/test_files/random.bin similarity index 100% rename from packages/markitdown/tests/test_files/random.bin rename to packages/markitup/tests/test_files/random.bin diff --git a/packages/markitdown/tests/test_files/test.docx b/packages/markitup/tests/test_files/test.docx similarity index 100% rename from packages/markitdown/tests/test_files/test.docx rename to packages/markitup/tests/test_files/test.docx diff --git a/packages/markitdown/tests/test_files/test.epub b/packages/markitup/tests/test_files/test.epub similarity index 100% rename from packages/markitdown/tests/test_files/test.epub rename to packages/markitup/tests/test_files/test.epub diff --git a/packages/markitdown/tests/test_files/test.jpg b/packages/markitup/tests/test_files/test.jpg similarity index 100% rename from packages/markitdown/tests/test_files/test.jpg rename to packages/markitup/tests/test_files/test.jpg diff --git a/packages/markitdown/tests/test_files/test.json b/packages/markitup/tests/test_files/test.json similarity index 100% rename from packages/markitdown/tests/test_files/test.json rename to packages/markitup/tests/test_files/test.json diff --git a/packages/markitdown/tests/test_files/test.m4a b/packages/markitup/tests/test_files/test.m4a similarity index 100% rename from packages/markitdown/tests/test_files/test.m4a rename to packages/markitup/tests/test_files/test.m4a diff --git a/packages/markitdown/tests/test_files/test.mp3 b/packages/markitup/tests/test_files/test.mp3 similarity index 100% rename from packages/markitdown/tests/test_files/test.mp3 rename to packages/markitup/tests/test_files/test.mp3 diff --git a/packages/markitdown/tests/test_files/test.pdf b/packages/markitup/tests/test_files/test.pdf similarity index 100% rename from packages/markitdown/tests/test_files/test.pdf rename to packages/markitup/tests/test_files/test.pdf diff --git a/packages/markitdown/tests/test_files/test.pptx b/packages/markitup/tests/test_files/test.pptx similarity index 100% rename from packages/markitdown/tests/test_files/test.pptx rename to packages/markitup/tests/test_files/test.pptx diff --git a/packages/markitdown/tests/test_files/test.wav b/packages/markitup/tests/test_files/test.wav similarity index 100% rename from packages/markitdown/tests/test_files/test.wav rename to packages/markitup/tests/test_files/test.wav diff --git a/packages/markitdown/tests/test_files/test.xls b/packages/markitup/tests/test_files/test.xls similarity index 100% rename from packages/markitdown/tests/test_files/test.xls rename to packages/markitup/tests/test_files/test.xls diff --git a/packages/markitdown/tests/test_files/test.xlsx b/packages/markitup/tests/test_files/test.xlsx similarity index 100% rename from packages/markitdown/tests/test_files/test.xlsx rename to packages/markitup/tests/test_files/test.xlsx diff --git a/packages/markitdown/tests/test_files/test_blog.html b/packages/markitup/tests/test_files/test_blog.html similarity index 100% rename from packages/markitdown/tests/test_files/test_blog.html rename to packages/markitup/tests/test_files/test_blog.html diff --git a/packages/markitdown/tests/test_files/test_files.zip b/packages/markitup/tests/test_files/test_files.zip similarity index 100% rename from packages/markitdown/tests/test_files/test_files.zip rename to packages/markitup/tests/test_files/test_files.zip diff --git a/packages/markitdown/tests/test_files/test_llm.jpg b/packages/markitup/tests/test_files/test_llm.jpg similarity index 100% rename from packages/markitdown/tests/test_files/test_llm.jpg rename to packages/markitup/tests/test_files/test_llm.jpg diff --git a/packages/markitdown/tests/test_files/test_mskanji.csv b/packages/markitup/tests/test_files/test_mskanji.csv similarity index 100% rename from packages/markitdown/tests/test_files/test_mskanji.csv rename to packages/markitup/tests/test_files/test_mskanji.csv diff --git a/packages/markitdown/tests/test_files/test_notebook.ipynb b/packages/markitup/tests/test_files/test_notebook.ipynb similarity index 85% rename from packages/markitdown/tests/test_files/test_notebook.ipynb rename to packages/markitup/tests/test_files/test_notebook.ipynb index 28a546f..7ca6536 100644 --- a/packages/markitdown/tests/test_files/test_notebook.ipynb +++ b/packages/markitup/tests/test_files/test_notebook.ipynb @@ -10,21 +10,10 @@ }, { "cell_type": "code", - "execution_count": 11, "id": "3f2a5bbd", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "markitdown\n" - ] - } - ], - "source": [ - "print(\"markitdown\")" - ] + "outputs": [], + "source": "print(\"markitup\")" }, { "cell_type": "markdown", @@ -86,4 +75,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/packages/markitdown/tests/test_files/test_outlook_msg.msg b/packages/markitup/tests/test_files/test_outlook_msg.msg similarity index 100% rename from packages/markitdown/tests/test_files/test_outlook_msg.msg rename to packages/markitup/tests/test_files/test_outlook_msg.msg diff --git a/packages/markitdown/tests/test_files/test_rss.xml b/packages/markitup/tests/test_files/test_rss.xml similarity index 100% rename from packages/markitdown/tests/test_files/test_rss.xml rename to packages/markitup/tests/test_files/test_rss.xml diff --git a/packages/markitdown/tests/test_files/test_serp.html b/packages/markitup/tests/test_files/test_serp.html similarity index 100% rename from packages/markitdown/tests/test_files/test_serp.html rename to packages/markitup/tests/test_files/test_serp.html diff --git a/packages/markitdown/tests/test_files/test_wikipedia.html b/packages/markitup/tests/test_files/test_wikipedia.html similarity index 100% rename from packages/markitdown/tests/test_files/test_wikipedia.html rename to packages/markitup/tests/test_files/test_wikipedia.html diff --git a/packages/markitdown/tests/test_files/test_with_comment.docx b/packages/markitup/tests/test_files/test_with_comment.docx similarity index 100% rename from packages/markitdown/tests/test_files/test_with_comment.docx rename to packages/markitup/tests/test_files/test_with_comment.docx diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitup/tests/test_module_misc.py similarity index 89% rename from packages/markitdown/tests/test_module_misc.py rename to packages/markitup/tests/test_module_misc.py index 1819183..5e50c03 100644 --- a/packages/markitdown/tests/test_module_misc.py +++ b/packages/markitup/tests/test_module_misc.py @@ -6,10 +6,10 @@ import shutil import openai import pytest -from markitdown._uri_utils import parse_data_uri, file_uri_to_path +from markitup._uri_utils import parse_data_uri, file_uri_to_path -from markitdown import ( - MarkItDown, +from markitup import ( + MarkItUp, UnsupportedFormatException, FileConversionException, StreamInfo, @@ -253,20 +253,20 @@ def test_file_uris() -> None: def test_docx_comments() -> None: - markitdown = MarkItDown() + markitup = MarkItUp() # Test DOCX processing, with comments and setting style_map on init - markitdown_with_style_map = MarkItDown(style_map="comment-reference => ") - result = markitdown_with_style_map.convert( + markitup_with_style_map = MarkItUp(style_map="comment-reference => ") + result = markitup_with_style_map.convert( os.path.join(TEST_FILES_DIR, "test_with_comment.docx") ) validate_strings(result, DOCX_COMMENT_TEST_STRINGS) def test_docx_equations() -> None: - markitdown = MarkItDown() + markitup = MarkItUp() docx_file = os.path.join(TEST_FILES_DIR, "equations.docx") - result = markitdown.convert(docx_file) + result = markitup.convert(docx_file) # Check for inline equation m=1 (wrapped with single $) is present assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found" @@ -277,16 +277,16 @@ def test_docx_equations() -> None: def test_input_as_strings() -> None: - markitdown = MarkItDown() + markitup = MarkItUp() # Test input from a stream input_data = b"

Test

" - result = markitdown.convert_stream(io.BytesIO(input_data)) + result = markitup.convert_stream(io.BytesIO(input_data)) assert "# Test" in result.text_content # Test input with leading blank characters input_data = b" \n\n\n

Test

" - result = markitdown.convert_stream(io.BytesIO(input_data)) + result = markitup.convert_stream(io.BytesIO(input_data)) assert "# Test" in result.text_content @@ -294,16 +294,16 @@ def test_input_as_strings() -> None: skip_remote, reason="do not run tests that query external urls", ) -def test_markitdown_remote() -> None: - markitdown = MarkItDown() +def test_markitup_remote() -> None: + markitup = MarkItUp() # By URL - result = markitdown.convert(PDF_TEST_URL) + result = markitup.convert(PDF_TEST_URL) for test_string in PDF_TEST_STRINGS: assert test_string in result.text_content # Youtube - result = markitdown.convert(YOUTUBE_TEST_URL) + result = markitup.convert(YOUTUBE_TEST_URL) for test_string in YOUTUBE_TEST_STRINGS: assert test_string in result.text_content @@ -313,11 +313,11 @@ def test_markitdown_remote() -> None: reason="do not run remotely run speech transcription tests", ) def test_speech_transcription() -> None: - markitdown = MarkItDown() + markitup = MarkItUp() # Test WAV files, MP3 and M4A files for file_name in ["test.wav", "test.mp3", "test.m4a"]: - result = markitdown.convert(os.path.join(TEST_FILES_DIR, file_name)) + result = markitup.convert(os.path.join(TEST_FILES_DIR, file_name)) result_lower = result.text_content.lower() assert ( ("1" in result_lower or "one" in result_lower) @@ -330,13 +330,13 @@ def test_speech_transcription() -> None: def test_exceptions() -> None: # Check that an exception is raised when trying to convert an unsupported format - markitdown = MarkItDown() + markitup = MarkItUp() with pytest.raises(UnsupportedFormatException): - markitdown.convert(os.path.join(TEST_FILES_DIR, "random.bin")) + markitup.convert(os.path.join(TEST_FILES_DIR, "random.bin")) # Check that an exception is raised when trying to convert a file that is corrupted with pytest.raises(FileConversionException) as exc_info: - markitdown.convert( + markitup.convert( os.path.join(TEST_FILES_DIR, "random.bin"), file_extension=".pptx" ) assert len(exc_info.value.attempts) == 1 @@ -347,27 +347,27 @@ def test_exceptions() -> None: skip_exiftool, reason="do not run if exiftool is not installed", ) -def test_markitdown_exiftool() -> None: +def test_markitup_exiftool() -> None: which_exiftool = shutil.which("exiftool") assert which_exiftool is not None # Test explicitly setting the location of exiftool - markitdown = MarkItDown(exiftool_path=which_exiftool) - result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.jpg")) + markitup = MarkItUp(exiftool_path=which_exiftool) + result = markitup.convert(os.path.join(TEST_FILES_DIR, "test.jpg")) for key in JPG_TEST_EXIFTOOL: target = f"{key}: {JPG_TEST_EXIFTOOL[key]}" assert target in result.text_content # Test setting the exiftool path through an environment variable os.environ["EXIFTOOL_PATH"] = which_exiftool - markitdown = MarkItDown() - result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.jpg")) + markitup = MarkItUp() + result = markitup.convert(os.path.join(TEST_FILES_DIR, "test.jpg")) for key in JPG_TEST_EXIFTOOL: target = f"{key}: {JPG_TEST_EXIFTOOL[key]}" assert target in result.text_content # Test some other media types - result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.mp3")) + result = markitup.convert(os.path.join(TEST_FILES_DIR, "test.mp3")) for key in MP3_TEST_EXIFTOOL: target = f"{key}: {MP3_TEST_EXIFTOOL[key]}" assert target in result.text_content @@ -377,11 +377,11 @@ def test_markitdown_exiftool() -> None: skip_llm, reason="do not run llm tests without a key", ) -def test_markitdown_llm() -> None: +def test_markitup_llm() -> None: client = openai.OpenAI() - markitdown = MarkItDown(llm_client=client, llm_model="gpt-4o") + markitup = MarkItUp(llm_client=client, llm_model="gpt-4o") - result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_llm.jpg")) + result = markitup.convert(os.path.join(TEST_FILES_DIR, "test_llm.jpg")) for test_string in LLM_TEST_STRINGS: assert test_string in result.text_content @@ -391,7 +391,7 @@ def test_markitdown_llm() -> None: assert test_string in result.text_content.lower() # Images embedded in PPTX files - result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.pptx")) + result = markitup.convert(os.path.join(TEST_FILES_DIR, "test.pptx")) # LLM Captions are included for test_string in LLM_TEST_STRINGS: assert test_string in result.text_content @@ -407,11 +407,11 @@ if __name__ == "__main__": test_file_uris, test_docx_comments, test_input_as_strings, - test_markitdown_remote, + test_markitup_remote, test_speech_transcription, test_exceptions, - test_markitdown_exiftool, - test_markitdown_llm, + test_markitup_exiftool, + test_markitup_llm, ]: print(f"Running {test.__name__}...", end="") test() diff --git a/packages/markitdown/tests/test_module_vectors.py b/packages/markitup/tests/test_module_vectors.py similarity index 90% rename from packages/markitdown/tests/test_module_vectors.py rename to packages/markitup/tests/test_module_vectors.py index 98fd0c7..1c38985 100644 --- a/packages/markitdown/tests/test_module_vectors.py +++ b/packages/markitup/tests/test_module_vectors.py @@ -12,8 +12,8 @@ if __name__ == "__main__": else: from ._test_vectors import GENERAL_TEST_VECTORS, DATA_URI_TEST_VECTORS -from markitdown import ( - MarkItDown, +from markitup import ( + MarkItUp, UnsupportedFormatException, FileConversionException, StreamInfo, @@ -24,19 +24,19 @@ skip_remote = ( ) # Don't run these tests in CI TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "test_files") -TEST_FILES_URL = "https://raw.githubusercontent.com/microsoft/markitdown/refs/heads/main/packages/markitdown/tests/test_files" +TEST_FILES_URL = "https://raw.githubusercontent.com/microsoft/markitup/refs/heads/main/packages/markitup/tests/test_files" @pytest.mark.parametrize("test_vector", GENERAL_TEST_VECTORS) def test_guess_stream_info(test_vector): """Test the ability to guess stream info.""" - markitdown = MarkItDown() + markitup = MarkItUp() local_path = os.path.join(TEST_FILES_DIR, test_vector.filename) expected_extension = os.path.splitext(test_vector.filename)[1] with open(local_path, "rb") as stream: - guesses = markitdown._get_stream_info_guesses( + guesses = markitup._get_stream_info_guesses( stream, base_guess=StreamInfo( filename=os.path.basename(test_vector.filename), @@ -60,9 +60,9 @@ def test_guess_stream_info(test_vector): @pytest.mark.parametrize("test_vector", GENERAL_TEST_VECTORS) def test_convert_local(test_vector): """Test the conversion of a local file.""" - markitdown = MarkItDown() + markitup = MarkItUp() - result = markitdown.convert( + result = markitup.convert( os.path.join(TEST_FILES_DIR, test_vector.filename), url=test_vector.url ) for string in test_vector.must_include: @@ -74,7 +74,7 @@ def test_convert_local(test_vector): @pytest.mark.parametrize("test_vector", GENERAL_TEST_VECTORS) def test_convert_stream_with_hints(test_vector): """Test the conversion of a stream with full stream info.""" - markitdown = MarkItDown() + markitup = MarkItUp() stream_info = StreamInfo( extension=os.path.splitext(test_vector.filename)[1], @@ -83,7 +83,7 @@ def test_convert_stream_with_hints(test_vector): ) with open(os.path.join(TEST_FILES_DIR, test_vector.filename), "rb") as stream: - result = markitdown.convert( + result = markitup.convert( stream, stream_info=stream_info, url=test_vector.url ) for string in test_vector.must_include: @@ -95,10 +95,10 @@ def test_convert_stream_with_hints(test_vector): @pytest.mark.parametrize("test_vector", GENERAL_TEST_VECTORS) def test_convert_stream_without_hints(test_vector): """Test the conversion of a stream with no stream info.""" - markitdown = MarkItDown() + markitup = MarkItUp() with open(os.path.join(TEST_FILES_DIR, test_vector.filename), "rb") as stream: - result = markitdown.convert(stream, url=test_vector.url) + result = markitup.convert(stream, url=test_vector.url) for string in test_vector.must_include: assert string in result.markdown for string in test_vector.must_not_include: @@ -112,11 +112,11 @@ def test_convert_stream_without_hints(test_vector): @pytest.mark.parametrize("test_vector", GENERAL_TEST_VECTORS) def test_convert_http_uri(test_vector): """Test the conversion of an HTTP:// or HTTPS:// URI.""" - markitdown = MarkItDown() + markitup = MarkItUp() time.sleep(1) # Ensure we don't hit rate limits - result = markitdown.convert( + result = markitup.convert( TEST_FILES_URL + "/" + test_vector.filename, url=test_vector.url, # Mock where this file would be found ) @@ -129,9 +129,9 @@ def test_convert_http_uri(test_vector): @pytest.mark.parametrize("test_vector", GENERAL_TEST_VECTORS) def test_convert_file_uri(test_vector): """Test the conversion of a file:// URI.""" - markitdown = MarkItDown() + markitup = MarkItUp() - result = markitdown.convert( + result = markitup.convert( Path(os.path.join(TEST_FILES_DIR, test_vector.filename)).as_uri(), url=test_vector.url, ) @@ -144,7 +144,7 @@ def test_convert_file_uri(test_vector): @pytest.mark.parametrize("test_vector", GENERAL_TEST_VECTORS) def test_convert_data_uri(test_vector): """Test the conversion of a data URI.""" - markitdown = MarkItDown() + markitup = MarkItUp() data = "" with open(os.path.join(TEST_FILES_DIR, test_vector.filename), "rb") as stream: @@ -152,7 +152,7 @@ def test_convert_data_uri(test_vector): mimetype = test_vector.mimetype data_uri = f"data:{mimetype};base64,{data}" - result = markitdown.convert( + result = markitup.convert( data_uri, url=test_vector.url, ) @@ -165,10 +165,10 @@ def test_convert_data_uri(test_vector): @pytest.mark.parametrize("test_vector", DATA_URI_TEST_VECTORS) def test_convert_keep_data_uris(test_vector): """Test API functionality when keep_data_uris is enabled""" - markitdown = MarkItDown() + markitup = MarkItUp() # Test local file conversion - result = markitdown.convert( + result = markitup.convert( os.path.join(TEST_FILES_DIR, test_vector.filename), keep_data_uris=True, url=test_vector.url, @@ -183,7 +183,7 @@ def test_convert_keep_data_uris(test_vector): @pytest.mark.parametrize("test_vector", DATA_URI_TEST_VECTORS) def test_convert_stream_keep_data_uris(test_vector): """Test the conversion of a stream with no stream info.""" - markitdown = MarkItDown() + markitup = MarkItUp() stream_info = StreamInfo( extension=os.path.splitext(test_vector.filename)[1], @@ -192,7 +192,7 @@ def test_convert_stream_keep_data_uris(test_vector): ) with open(os.path.join(TEST_FILES_DIR, test_vector.filename), "rb") as stream: - result = markitdown.convert( + result = markitup.convert( stream, stream_info=stream_info, keep_data_uris=True, url=test_vector.url )