Merge d46cff8857 into f58a864951
This commit is contained in:
commit
c47f856250
5 changed files with 81 additions and 3 deletions
|
|
@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
ffmpeg \
|
ffmpeg \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN pip install markitdown
|
RUN pip install markitdown fastapi uvicorn
|
||||||
|
|
||||||
# Default USERID and GROUPID
|
# Default USERID and GROUPID
|
||||||
ARG USERID=10000
|
ARG USERID=10000
|
||||||
|
|
@ -20,4 +20,4 @@ ARG GROUPID=10000
|
||||||
|
|
||||||
USER $USERID:$GROUPID
|
USER $USERID:$GROUPID
|
||||||
|
|
||||||
ENTRYPOINT [ "markitdown" ]
|
ENTRYPOINT ["uvicorn", "src.markitdown.api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
|
||||||
36
README.md
36
README.md
|
|
@ -69,6 +69,42 @@ print(result.text_content)
|
||||||
docker build -t markitdown:latest .
|
docker build -t markitdown:latest .
|
||||||
docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md
|
docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Web API
|
||||||
|
|
||||||
|
You can also use MarkItDown via a REST endpoint. The Web API is built using FastAPI and can be run using Docker.
|
||||||
|
|
||||||
|
#### Running the Web API
|
||||||
|
|
||||||
|
1. Build the Docker image:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker build -t markitdown-api:latest .
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run the Docker container:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker run --rm -p 8000:8000 markitdown-api:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
The Web API will be available at `http://localhost:8000`.
|
||||||
|
|
||||||
|
#### Using the Web API
|
||||||
|
|
||||||
|
The Web API provides a single endpoint `/convert` that accepts a file and returns the converted markdown.
|
||||||
|
|
||||||
|
- **Endpoint:** `/convert`
|
||||||
|
- **Method:** `POST`
|
||||||
|
- **Request Body:** Multipart form data with a file field named `file`
|
||||||
|
- **Response:** JSON object with a `markdown` field containing the converted markdown
|
||||||
|
|
||||||
|
Example using `curl`:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
curl -X POST "http://localhost:8000/convert" -F "file=@path-to-file.pdf"
|
||||||
|
```
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
|
||||||
<summary>Batch Processing Multiple Files</summary>
|
<summary>Batch Processing Multiple Files</summary>
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,8 @@ dependencies = [
|
||||||
"pathvalidate",
|
"pathvalidate",
|
||||||
"charset-normalizer",
|
"charset-normalizer",
|
||||||
"openai",
|
"openai",
|
||||||
|
"fastapi",
|
||||||
|
"uvicorn",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import sys
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
from .__about__ import __version__
|
from .__about__ import __version__
|
||||||
from ._markitdown import MarkItDown, DocumentConverterResult
|
from ._markitdown import MarkItDown, DocumentConverterResult
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
@ -57,9 +58,16 @@ def main():
|
||||||
"--output",
|
"--output",
|
||||||
help="Output file name. If not provided, output is written to stdout.",
|
help="Output file name. If not provided, output is written to stdout.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--api",
|
||||||
|
action="api",
|
||||||
|
help="Start the API server",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.filename is None:
|
if args.api:
|
||||||
|
uvicorn.run("src.markitdown.api:app", host="0.0.0.0", port=8000)
|
||||||
|
elif args.filename is None:
|
||||||
markitdown = MarkItDown()
|
markitdown = MarkItDown()
|
||||||
result = markitdown.convert_stream(sys.stdin.buffer)
|
result = markitdown.convert_stream(sys.stdin.buffer)
|
||||||
_handle_output(args, result)
|
_handle_output(args, result)
|
||||||
|
|
|
||||||
32
src/markitdown/api.py
Normal file
32
src/markitdown/api.py
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
from markitdown import MarkItDown
|
||||||
|
import os
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
@app.post("/convert")
|
||||||
|
async def convert(file: UploadFile = File(...)):
|
||||||
|
if not file.filename:
|
||||||
|
raise HTTPException(status_code=400, detail="No file uploaded")
|
||||||
|
|
||||||
|
try:
|
||||||
|
contents = await file.read()
|
||||||
|
temp_file_path = f"/tmp/{file.filename}"
|
||||||
|
with open(temp_file_path, "wb") as temp_file:
|
||||||
|
temp_file.write(contents)
|
||||||
|
|
||||||
|
markitdown = MarkItDown()
|
||||||
|
result = markitdown.convert(temp_file_path)
|
||||||
|
|
||||||
|
# output_file_path = f"/tmp/{os.path.splitext(file.filename)[0]}.md"
|
||||||
|
# with open(output_file_path, "w") as output_file:
|
||||||
|
# output_file.write(result.text_content)
|
||||||
|
|
||||||
|
os.remove(temp_file_path)
|
||||||
|
|
||||||
|
# return FileResponse(output_file_path, filename=f"{os.path.splitext(file.filename)[0]}.md")
|
||||||
|
return {"markdown": result.text_content}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
Loading…
Reference in a new issue