diff --git a/Dockerfile b/Dockerfile index 0072d9e..82d0706 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ffmpeg \ && rm -rf /var/lib/apt/lists/* -RUN pip install markitdown +RUN pip install markitdown fastapi uvicorn # Default USERID and GROUPID ARG USERID=10000 @@ -20,4 +20,4 @@ ARG GROUPID=10000 USER $USERID:$GROUPID -ENTRYPOINT [ "markitdown" ] +ENTRYPOINT ["uvicorn", "src.markitdown.api:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index d2314c3..1bb6345 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,42 @@ print(result.text_content) docker build -t markitdown:latest . docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md ``` + +### Web API + +You can also use MarkItDown via a REST endpoint. The Web API is built using FastAPI and can be run using Docker. + +#### Running the Web API + +1. Build the Docker image: + +```sh +docker build -t markitdown-api:latest . +``` + +2. Run the Docker container: + +```sh +docker run --rm -p 8000:8000 markitdown-api:latest +``` + +The Web API will be available at `http://localhost:8000`. + +#### Using the Web API + +The Web API provides a single endpoint `/convert` that accepts a file and returns the converted markdown. + +- **Endpoint:** `/convert` +- **Method:** `POST` +- **Request Body:** Multipart form data with a file field named `file` +- **Response:** JSON object with a `markdown` field containing the converted markdown + +Example using `curl`: + +```sh +curl -X POST "http://localhost:8000/convert" -F "file=@path-to-file.pdf" +``` +
Batch Processing Multiple Files diff --git a/src/markitdown/api.py b/src/markitdown/api.py new file mode 100644 index 0000000..b57a4cf --- /dev/null +++ b/src/markitdown/api.py @@ -0,0 +1,31 @@ +from fastapi import FastAPI, File, UploadFile, HTTPException +from fastapi.responses import FileResponse +from markitdown import MarkItDown +import os + +app = FastAPI() + +@app.post("/convert") +async def convert(file: UploadFile = File(...)): + if not file.filename: + raise HTTPException(status_code=400, detail="No file uploaded") + + try: + contents = await file.read() + temp_file_path = f"/tmp/{file.filename}" + with open(temp_file_path, "wb") as temp_file: + temp_file.write(contents) + + markitdown = MarkItDown() + result = markitdown.convert(temp_file_path) + + output_file_path = f"/tmp/{os.path.splitext(file.filename)[0]}.md" + with open(output_file_path, "w") as output_file: + output_file.write(result.text_content) + + os.remove(temp_file_path) + + return FileResponse(output_file_path, filename=f"{os.path.splitext(file.filename)[0]}.md") + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e))