Merge dd977ca1d8 into 731b39e7f5
This commit is contained in:
commit
7548720917
4 changed files with 76 additions and 0 deletions
17
README.md
17
README.md
|
|
@ -72,6 +72,23 @@ print(result.text_content)
|
||||||
docker build -t markitdown:latest .
|
docker build -t markitdown:latest .
|
||||||
docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md
|
docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Serve Command
|
||||||
|
|
||||||
|
You can start a CORS-enabled Flask server to convert files to markdown using the `serve` command:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
markitdown serve
|
||||||
|
```
|
||||||
|
|
||||||
|
The server will be available at `http://localhost:5000`. You can send a POST request to the `/convert` endpoint with a file to convert it to markdown.
|
||||||
|
|
||||||
|
Example using `curl`:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
curl -X POST -F 'file=@path-to-file.pdf' http://localhost:5000/convert
|
||||||
|
```
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
|
||||||
<summary>Batch Processing Multiple Files</summary>
|
<summary>Batch Processing Multiple Files</summary>
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,8 @@ dependencies = [
|
||||||
"pathvalidate",
|
"pathvalidate",
|
||||||
"charset-normalizer",
|
"charset-normalizer",
|
||||||
"openai",
|
"openai",
|
||||||
|
"flask",
|
||||||
|
"flask-cors",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
|
@ -54,6 +56,7 @@ path = "src/markitdown/__about__.py"
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
markitdown = "markitdown.__main__:main"
|
markitdown = "markitdown.__main__:main"
|
||||||
|
serve = "markitdown.server:app.run"
|
||||||
|
|
||||||
[tool.hatch.envs.types]
|
[tool.hatch.envs.types]
|
||||||
extra-dependencies = [
|
extra-dependencies = [
|
||||||
|
|
|
||||||
24
src/markitdown/server.py
Normal file
24
src/markitdown/server.py
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
from flask_cors import CORS
|
||||||
|
from markitdown import MarkItDown
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
CORS(app)
|
||||||
|
|
||||||
|
markitdown = MarkItDown()
|
||||||
|
|
||||||
|
@app.route('/convert', methods=['POST'])
|
||||||
|
def convert():
|
||||||
|
if 'file' in request.files:
|
||||||
|
file = request.files['file']
|
||||||
|
result = markitdown.convert(file.stream, file_extension=file.filename.split('.')[-1])
|
||||||
|
return jsonify({'content': result.text_content})
|
||||||
|
elif 'url' in request.form:
|
||||||
|
url = request.form['url']
|
||||||
|
result = markitdown.convert(url)
|
||||||
|
return jsonify({'content': result.text_content})
|
||||||
|
else:
|
||||||
|
return jsonify({'error': 'No file or URL provided'}), 400
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(host='0.0.0.0', port=5000)
|
||||||
|
|
@ -145,6 +145,11 @@ LLM_TEST_STRINGS = [
|
||||||
"5bda1dd6",
|
"5bda1dd6",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# New test strings for the serve command
|
||||||
|
SERVE_TEST_STRINGS = [
|
||||||
|
"While there is contemporaneous exploration of multi-agent approaches"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# --- Helper Functions ---
|
# --- Helper Functions ---
|
||||||
def validate_strings(result, expected_strings, exclude_strings=None):
|
def validate_strings(result, expected_strings, exclude_strings=None):
|
||||||
|
|
@ -330,6 +335,32 @@ def test_markitdown_llm() -> None:
|
||||||
assert test_string in result.text_content.lower()
|
assert test_string in result.text_content.lower()
|
||||||
|
|
||||||
|
|
||||||
|
# New test for the serve command
|
||||||
|
def test_markitdown_serve() -> None:
|
||||||
|
from src.markitdown.server import app
|
||||||
|
|
||||||
|
client = app.test_client()
|
||||||
|
|
||||||
|
# Test with file
|
||||||
|
response = client.post(
|
||||||
|
"/convert",
|
||||||
|
data={"file": (io.BytesIO(b"test content"), "test.pdf")},
|
||||||
|
content_type="multipart/form-data",
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
for test_string in SERVE_TEST_STRINGS:
|
||||||
|
assert test_string in response.json["content"]
|
||||||
|
|
||||||
|
# Test with URL
|
||||||
|
response = client.post(
|
||||||
|
"/convert",
|
||||||
|
data={"url": PDF_TEST_URL},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
for test_string in SERVE_TEST_STRINGS:
|
||||||
|
assert test_string in response.json["content"]
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
"""Runs this file's tests from the command line."""
|
"""Runs this file's tests from the command line."""
|
||||||
test_markitdown_remote()
|
test_markitdown_remote()
|
||||||
|
|
@ -337,3 +368,4 @@ if __name__ == "__main__":
|
||||||
test_markitdown_exiftool()
|
test_markitdown_exiftool()
|
||||||
test_markitdown_deprecation()
|
test_markitdown_deprecation()
|
||||||
test_markitdown_llm()
|
test_markitdown_llm()
|
||||||
|
test_markitdown_serve()
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue