Add API endpoints for file conversion
This commit is contained in:
parent
05b78e7ce1
commit
da1007085c
4 changed files with 74 additions and 2 deletions
|
|
@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install markitdown
|
||||
RUN pip install markitdown fastapi uvicorn
|
||||
|
||||
# Default USERID and GROUPID
|
||||
ARG USERID=10000
|
||||
|
|
@ -20,4 +20,4 @@ ARG GROUPID=10000
|
|||
|
||||
USER $USERID:$GROUPID
|
||||
|
||||
ENTRYPOINT [ "markitdown" ]
|
||||
ENTRYPOINT ["uvicorn", "markitdown.api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
|
|
|||
23
README.md
23
README.md
|
|
@ -66,6 +66,29 @@ result = md.convert("example.jpg")
|
|||
print(result.text_content)
|
||||
```
|
||||
|
||||
### RESTful API
|
||||
|
||||
MarkItDown also provides a RESTful API using FastAPI. You can deploy the application on Vercel or another hosting service.
|
||||
|
||||
#### Endpoints
|
||||
|
||||
- `GET /`: Returns a welcome message.
|
||||
- `POST /convert`: Converts an uploaded file to Markdown.
|
||||
|
||||
#### Example Usage
|
||||
|
||||
Using `curl`:
|
||||
|
||||
```sh
|
||||
curl -X POST "http://<your-deployment-url>/convert" -F "file=@path-to-file.pdf"
|
||||
```
|
||||
|
||||
Using `httpie`:
|
||||
|
||||
```sh
|
||||
http --form POST "http://<your-deployment-url>/convert" file@path-to-file.pdf
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```sh
|
||||
|
|
|
|||
21
src/markitdown/api.py
Normal file
21
src/markitdown/api.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
from fastapi import FastAPI, HTTPException, UploadFile, File
|
||||
from markitdown import MarkItDown, UnsupportedFormatException, FileConversionException
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"message": "Welcome to the MarkItDown API"}
|
||||
|
||||
@app.post("/convert")
|
||||
async def convert(file: UploadFile = File(...)):
|
||||
try:
|
||||
markitdown = MarkItDown()
|
||||
result = markitdown.convert_stream(file.file, file_extension=file.filename.split('.')[-1])
|
||||
return {"title": result.title, "text_content": result.text_content}
|
||||
except UnsupportedFormatException:
|
||||
raise HTTPException(status_code=400, detail="Unsupported file format")
|
||||
except FileConversionException:
|
||||
raise HTTPException(status_code=500, detail="File conversion error")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
28
tests/test_api.py
Normal file
28
tests/test_api.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from markitdown.api import app
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
def test_root():
|
||||
response = client.get("/")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"message": "Welcome to the MarkItDown API"}
|
||||
|
||||
def test_convert_success():
|
||||
with open("tests/test_files/test.docx", "rb") as file:
|
||||
response = client.post("/convert", files={"file": file})
|
||||
assert response.status_code == 200
|
||||
assert "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation" in response.json()["text_content"]
|
||||
|
||||
def test_convert_unsupported_format():
|
||||
with open("tests/test_files/test.unsupported", "rb") as file:
|
||||
response = client.post("/convert", files={"file": file})
|
||||
assert response.status_code == 400
|
||||
assert response.json() == {"detail": "Unsupported file format"}
|
||||
|
||||
def test_convert_conversion_error():
|
||||
with open("tests/test_files/test_corrupted.docx", "rb") as file:
|
||||
response = client.post("/convert", files={"file": file})
|
||||
assert response.status_code == 500
|
||||
assert response.json() == {"detail": "File conversion error"}
|
||||
Loading…
Reference in a new issue