Add API endpoints for file conversion

This commit is contained in:
Brian Yang 2025-01-06 00:45:58 -05:00
parent 05b78e7ce1
commit da1007085c
4 changed files with 74 additions and 2 deletions

View file

@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*
RUN pip install markitdown
RUN pip install markitdown fastapi uvicorn
# Default USERID and GROUPID
ARG USERID=10000
@ -20,4 +20,4 @@ ARG GROUPID=10000
USER $USERID:$GROUPID
ENTRYPOINT [ "markitdown" ]
ENTRYPOINT ["uvicorn", "markitdown.api:app", "--host", "0.0.0.0", "--port", "8000"]

View file

@ -66,6 +66,29 @@ result = md.convert("example.jpg")
print(result.text_content)
```
### RESTful API
MarkItDown also provides a RESTful API using FastAPI. You can deploy the application on Vercel or another hosting service.
#### Endpoints
- `GET /`: Returns a welcome message.
- `POST /convert`: Converts an uploaded file to Markdown.
#### Example Usage
Using `curl`:
```sh
curl -X POST "http://<your-deployment-url>/convert" -F "file=@path-to-file.pdf"
```
Using `httpie`:
```sh
http --form POST "http://<your-deployment-url>/convert" file@path-to-file.pdf
```
### Docker
```sh

21
src/markitdown/api.py Normal file
View file

@ -0,0 +1,21 @@
from fastapi import FastAPI, HTTPException, UploadFile, File
from markitdown import MarkItDown, UnsupportedFormatException, FileConversionException
app = FastAPI()
@app.get("/")
async def root():
return {"message": "Welcome to the MarkItDown API"}
@app.post("/convert")
async def convert(file: UploadFile = File(...)):
try:
markitdown = MarkItDown()
result = markitdown.convert_stream(file.file, file_extension=file.filename.split('.')[-1])
return {"title": result.title, "text_content": result.text_content}
except UnsupportedFormatException:
raise HTTPException(status_code=400, detail="Unsupported file format")
except FileConversionException:
raise HTTPException(status_code=500, detail="File conversion error")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

28
tests/test_api.py Normal file
View file

@ -0,0 +1,28 @@
import pytest
from fastapi.testclient import TestClient
from markitdown.api import app
client = TestClient(app)
def test_root():
response = client.get("/")
assert response.status_code == 200
assert response.json() == {"message": "Welcome to the MarkItDown API"}
def test_convert_success():
with open("tests/test_files/test.docx", "rb") as file:
response = client.post("/convert", files={"file": file})
assert response.status_code == 200
assert "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation" in response.json()["text_content"]
def test_convert_unsupported_format():
with open("tests/test_files/test.unsupported", "rb") as file:
response = client.post("/convert", files={"file": file})
assert response.status_code == 400
assert response.json() == {"detail": "Unsupported file format"}
def test_convert_conversion_error():
with open("tests/test_files/test_corrupted.docx", "rb") as file:
response = client.post("/convert", files={"file": file})
assert response.status_code == 500
assert response.json() == {"detail": "File conversion error"}