Merge dd977ca1d8 into 731b39e7f5
This commit is contained in:
commit
7548720917
4 changed files with 76 additions and 0 deletions
17
README.md
17
README.md
|
|
@ -72,6 +72,23 @@ print(result.text_content)
|
|||
docker build -t markitdown:latest .
|
||||
docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md
|
||||
```
|
||||
|
||||
### Serve Command
|
||||
|
||||
You can start a CORS-enabled Flask server to convert files to markdown using the `serve` command:
|
||||
|
||||
```sh
|
||||
markitdown serve
|
||||
```
|
||||
|
||||
The server will be available at `http://localhost:5000`. You can send a POST request to the `/convert` endpoint with a file to convert it to markdown.
|
||||
|
||||
Example using `curl`:
|
||||
|
||||
```sh
|
||||
curl -X POST -F 'file=@path-to-file.pdf' http://localhost:5000/convert
|
||||
```
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Batch Processing Multiple Files</summary>
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@ dependencies = [
|
|||
"pathvalidate",
|
||||
"charset-normalizer",
|
||||
"openai",
|
||||
"flask",
|
||||
"flask-cors",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
|
@ -54,6 +56,7 @@ path = "src/markitdown/__about__.py"
|
|||
|
||||
[project.scripts]
|
||||
markitdown = "markitdown.__main__:main"
|
||||
serve = "markitdown.server:app.run"
|
||||
|
||||
[tool.hatch.envs.types]
|
||||
extra-dependencies = [
|
||||
|
|
|
|||
24
src/markitdown/server.py
Normal file
24
src/markitdown/server.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
from markitdown import MarkItDown
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
markitdown = MarkItDown()
|
||||
|
||||
@app.route('/convert', methods=['POST'])
|
||||
def convert():
|
||||
if 'file' in request.files:
|
||||
file = request.files['file']
|
||||
result = markitdown.convert(file.stream, file_extension=file.filename.split('.')[-1])
|
||||
return jsonify({'content': result.text_content})
|
||||
elif 'url' in request.form:
|
||||
url = request.form['url']
|
||||
result = markitdown.convert(url)
|
||||
return jsonify({'content': result.text_content})
|
||||
else:
|
||||
return jsonify({'error': 'No file or URL provided'}), 400
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
|
|
@ -145,6 +145,11 @@ LLM_TEST_STRINGS = [
|
|||
"5bda1dd6",
|
||||
]
|
||||
|
||||
# New test strings for the serve command
|
||||
SERVE_TEST_STRINGS = [
|
||||
"While there is contemporaneous exploration of multi-agent approaches"
|
||||
]
|
||||
|
||||
|
||||
# --- Helper Functions ---
|
||||
def validate_strings(result, expected_strings, exclude_strings=None):
|
||||
|
|
@ -330,6 +335,32 @@ def test_markitdown_llm() -> None:
|
|||
assert test_string in result.text_content.lower()
|
||||
|
||||
|
||||
# New test for the serve command
|
||||
def test_markitdown_serve() -> None:
|
||||
from src.markitdown.server import app
|
||||
|
||||
client = app.test_client()
|
||||
|
||||
# Test with file
|
||||
response = client.post(
|
||||
"/convert",
|
||||
data={"file": (io.BytesIO(b"test content"), "test.pdf")},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
for test_string in SERVE_TEST_STRINGS:
|
||||
assert test_string in response.json["content"]
|
||||
|
||||
# Test with URL
|
||||
response = client.post(
|
||||
"/convert",
|
||||
data={"url": PDF_TEST_URL},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
for test_string in SERVE_TEST_STRINGS:
|
||||
assert test_string in response.json["content"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""Runs this file's tests from the command line."""
|
||||
test_markitdown_remote()
|
||||
|
|
@ -337,3 +368,4 @@ if __name__ == "__main__":
|
|||
test_markitdown_exiftool()
|
||||
test_markitdown_deprecation()
|
||||
test_markitdown_llm()
|
||||
test_markitdown_serve()
|
||||
|
|
|
|||
Loading…
Reference in a new issue