Add API endpoints for file conversion
This commit is contained in:
parent
05b78e7ce1
commit
da1007085c
4 changed files with 74 additions and 2 deletions
|
|
@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
ffmpeg \
|
ffmpeg \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN pip install markitdown
|
RUN pip install markitdown fastapi uvicorn
|
||||||
|
|
||||||
# Default USERID and GROUPID
|
# Default USERID and GROUPID
|
||||||
ARG USERID=10000
|
ARG USERID=10000
|
||||||
|
|
@ -20,4 +20,4 @@ ARG GROUPID=10000
|
||||||
|
|
||||||
USER $USERID:$GROUPID
|
USER $USERID:$GROUPID
|
||||||
|
|
||||||
ENTRYPOINT [ "markitdown" ]
|
ENTRYPOINT ["uvicorn", "markitdown.api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
|
||||||
23
README.md
23
README.md
|
|
@ -66,6 +66,29 @@ result = md.convert("example.jpg")
|
||||||
print(result.text_content)
|
print(result.text_content)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### RESTful API
|
||||||
|
|
||||||
|
MarkItDown also provides a RESTful API using FastAPI. You can deploy the application on Vercel or another hosting service.
|
||||||
|
|
||||||
|
#### Endpoints
|
||||||
|
|
||||||
|
- `GET /`: Returns a welcome message.
|
||||||
|
- `POST /convert`: Converts an uploaded file to Markdown.
|
||||||
|
|
||||||
|
#### Example Usage
|
||||||
|
|
||||||
|
Using `curl`:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
curl -X POST "http://<your-deployment-url>/convert" -F "file=@path-to-file.pdf"
|
||||||
|
```
|
||||||
|
|
||||||
|
Using `httpie`:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
http --form POST "http://<your-deployment-url>/convert" file@path-to-file.pdf
|
||||||
|
```
|
||||||
|
|
||||||
### Docker
|
### Docker
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
|
|
|
||||||
21
src/markitdown/api.py
Normal file
21
src/markitdown/api.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
from fastapi import FastAPI, HTTPException, UploadFile, File
|
||||||
|
from markitdown import MarkItDown, UnsupportedFormatException, FileConversionException
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
return {"message": "Welcome to the MarkItDown API"}
|
||||||
|
|
||||||
|
@app.post("/convert")
|
||||||
|
async def convert(file: UploadFile = File(...)):
|
||||||
|
try:
|
||||||
|
markitdown = MarkItDown()
|
||||||
|
result = markitdown.convert_stream(file.file, file_extension=file.filename.split('.')[-1])
|
||||||
|
return {"title": result.title, "text_content": result.text_content}
|
||||||
|
except UnsupportedFormatException:
|
||||||
|
raise HTTPException(status_code=400, detail="Unsupported file format")
|
||||||
|
except FileConversionException:
|
||||||
|
raise HTTPException(status_code=500, detail="File conversion error")
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
28
tests/test_api.py
Normal file
28
tests/test_api.py
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
from markitdown.api import app
|
||||||
|
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
def test_root():
|
||||||
|
response = client.get("/")
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json() == {"message": "Welcome to the MarkItDown API"}
|
||||||
|
|
||||||
|
def test_convert_success():
|
||||||
|
with open("tests/test_files/test.docx", "rb") as file:
|
||||||
|
response = client.post("/convert", files={"file": file})
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation" in response.json()["text_content"]
|
||||||
|
|
||||||
|
def test_convert_unsupported_format():
|
||||||
|
with open("tests/test_files/test.unsupported", "rb") as file:
|
||||||
|
response = client.post("/convert", files={"file": file})
|
||||||
|
assert response.status_code == 400
|
||||||
|
assert response.json() == {"detail": "Unsupported file format"}
|
||||||
|
|
||||||
|
def test_convert_conversion_error():
|
||||||
|
with open("tests/test_files/test_corrupted.docx", "rb") as file:
|
||||||
|
response = client.post("/convert", files={"file": file})
|
||||||
|
assert response.status_code == 500
|
||||||
|
assert response.json() == {"detail": "File conversion error"}
|
||||||
Loading…
Reference in a new issue