Merge e2470fc413 into 4678c8a2a4
This commit is contained in:
commit
a6bfa30628
10 changed files with 232 additions and 20 deletions
|
|
@ -1,32 +1,21 @@
|
|||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
|
||||
{
|
||||
"name": "Existing Dockerfile",
|
||||
"build": {
|
||||
// Sets the run context to one level up instead of the .devcontainer folder.
|
||||
"context": "..",
|
||||
// Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
|
||||
"dockerfile": "../Dockerfile",
|
||||
"args": {
|
||||
"INSTALL_GIT": "true"
|
||||
}
|
||||
},
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
"features": {
|
||||
"ghcr.io/devcontainers-extra/features/hatch:2": {}
|
||||
"ghcr.io/devcontainers-extra/features/hatch:2": {},
|
||||
"ghcr.io/devcontainers/features/python:1": {
|
||||
"version": "3.10"
|
||||
},
|
||||
"ghcr.io/devcontainers/features/node:1": {
|
||||
"version": "16"
|
||||
},
|
||||
"ghcr.io/devcontainers/features/ollama:1": {}
|
||||
},
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Uncomment the next line to run commands after the container is created.
|
||||
// "postCreateCommand": "cat /etc/os-release",
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
||||
// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
|
||||
"remoteUser": "root"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install markitdown
|
||||
RUN pip install markitdown ollama
|
||||
|
||||
# Default USERID and GROUPID
|
||||
ARG USERID=10000
|
||||
|
|
|
|||
12
README.md
12
README.md
|
|
@ -66,6 +66,18 @@ result = md.convert("example.jpg")
|
|||
print(result.text_content)
|
||||
```
|
||||
|
||||
To use Ollama for image descriptions, provide `ollama_client`:
|
||||
|
||||
```python
|
||||
from markitdown import MarkItDown
|
||||
from ollama import Ollama
|
||||
|
||||
client = Ollama(api_key="your-api-key")
|
||||
md = MarkItDown(ollama_client=client)
|
||||
result = md.convert("example.jpg")
|
||||
print(result.text_content)
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```sh
|
||||
|
|
|
|||
|
|
@ -1077,6 +1077,54 @@ class ImageConverter(MediaConverter):
|
|||
return response.choices[0].message.content
|
||||
|
||||
|
||||
class OllamaConverter(DocumentConverter):
|
||||
"""
|
||||
Converts images to markdown via description using Ollama API.
|
||||
"""
|
||||
|
||||
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
|
||||
# Bail if not an image
|
||||
extension = kwargs.get("file_extension", "")
|
||||
if extension.lower() not in [".jpg", ".jpeg", ".png"]:
|
||||
return None
|
||||
|
||||
md_content = ""
|
||||
|
||||
# Try describing the image with Ollama
|
||||
ollama_client = kwargs.get("ollama_client")
|
||||
if ollama_client is not None:
|
||||
md_content += (
|
||||
"\n# Description:\n"
|
||||
+ self._get_ollama_description(
|
||||
local_path,
|
||||
extension,
|
||||
ollama_client,
|
||||
prompt=kwargs.get("ollama_prompt"),
|
||||
).strip()
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
return DocumentConverterResult(
|
||||
title=None,
|
||||
text_content=md_content,
|
||||
)
|
||||
|
||||
def _get_ollama_description(self, local_path, extension, client, prompt=None):
|
||||
if prompt is None or prompt.strip() == "":
|
||||
prompt = "Write a detailed caption for this image."
|
||||
|
||||
data_uri = ""
|
||||
with open(local_path, "rb") as image_file:
|
||||
content_type, encoding = mimetypes.guess_type("_dummy" + extension)
|
||||
if content_type is None:
|
||||
content_type = "image/jpeg"
|
||||
image_base64 = base64.b64encode(image_file.read()).decode("utf-8")
|
||||
data_uri = f"data:{content_type};base64,{image_base64}"
|
||||
|
||||
response = client.describe_image(data_uri, prompt)
|
||||
return response["description"]
|
||||
|
||||
|
||||
class ZipConverter(DocumentConverter):
|
||||
"""Converts ZIP files to markdown by extracting and converting all contained files.
|
||||
|
||||
|
|
@ -1224,6 +1272,7 @@ class MarkItDown:
|
|||
llm_client: Optional[Any] = None,
|
||||
llm_model: Optional[str] = None,
|
||||
style_map: Optional[str] = None,
|
||||
ollama_client: Optional[Any] = None,
|
||||
# Deprecated
|
||||
mlm_client: Optional[Any] = None,
|
||||
mlm_model: Optional[str] = None,
|
||||
|
|
@ -1265,6 +1314,7 @@ class MarkItDown:
|
|||
self._llm_client = llm_client
|
||||
self._llm_model = llm_model
|
||||
self._style_map = style_map
|
||||
self._ollama_client = ollama_client
|
||||
|
||||
self._page_converters: List[DocumentConverter] = []
|
||||
|
||||
|
|
@ -1286,6 +1336,7 @@ class MarkItDown:
|
|||
self.register_page_converter(IpynbConverter())
|
||||
self.register_page_converter(PdfConverter())
|
||||
self.register_page_converter(ZipConverter())
|
||||
self.register_page_converter(OllamaConverter())
|
||||
|
||||
def convert(
|
||||
self, source: Union[str, requests.Response, Path], **kwargs: Any
|
||||
|
|
@ -1446,6 +1497,9 @@ class MarkItDown:
|
|||
if "llm_model" not in _kwargs and self._llm_model is not None:
|
||||
_kwargs["llm_model"] = self._llm_model
|
||||
|
||||
if "ollama_client" not in _kwargs and self._ollama_client is not None:
|
||||
_kwargs["ollama_client"] = self._ollama_client
|
||||
|
||||
# Add the list of converters for nested processing
|
||||
_kwargs["_parent_converters"] = self._page_converters
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,13 @@ except ModuleNotFoundError:
|
|||
# Skip exiftool tests if not installed
|
||||
skip_exiftool = shutil.which("exiftool") is None
|
||||
|
||||
# Skip Ollama tests if not installed
|
||||
skip_ollama = False if os.environ.get("OLLAMA_API_KEY") else True
|
||||
try:
|
||||
import ollama
|
||||
except ModuleNotFoundError:
|
||||
skip_ollama = True
|
||||
|
||||
TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "test_files")
|
||||
|
||||
JPG_TEST_EXIFTOOL = {
|
||||
|
|
@ -130,6 +137,11 @@ LLM_TEST_STRINGS = [
|
|||
"5bda1dd6",
|
||||
]
|
||||
|
||||
OLLAMA_TEST_STRINGS = [
|
||||
"detailed caption",
|
||||
"image",
|
||||
]
|
||||
|
||||
|
||||
# --- Helper Functions ---
|
||||
def validate_strings(result, expected_strings, exclude_strings=None):
|
||||
|
|
@ -300,6 +312,20 @@ def test_markitdown_llm() -> None:
|
|||
assert test_string in result.text_content.lower()
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
skip_ollama,
|
||||
reason="do not run ollama tests without a key",
|
||||
)
|
||||
def test_markitdown_ollama() -> None:
|
||||
client = ollama.Ollama(api_key=os.environ.get("OLLAMA_API_KEY"))
|
||||
markitdown = MarkItDown(ollama_client=client)
|
||||
|
||||
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_ollama.jpg"))
|
||||
|
||||
for test_string in OLLAMA_TEST_STRINGS:
|
||||
assert test_string in result.text_content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""Runs this file's tests from the command line."""
|
||||
test_markitdown_remote()
|
||||
|
|
@ -307,3 +333,4 @@ if __name__ == "__main__":
|
|||
test_markitdown_exiftool()
|
||||
test_markitdown_deprecation()
|
||||
test_markitdown_llm()
|
||||
test_markitdown_ollama()
|
||||
|
|
|
|||
37
web-ui/package.json
Normal file
37
web-ui/package.json
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"name": "markitdown-web-ui",
|
||||
"version": "1.0.0",
|
||||
"description": "Web-based UI for MarkItDown",
|
||||
"main": "src/App.js",
|
||||
"scripts": {
|
||||
"start": "react-scripts start",
|
||||
"build": "react-scripts build",
|
||||
"test": "react-scripts test",
|
||||
"eject": "react-scripts eject"
|
||||
},
|
||||
"dependencies": {
|
||||
"react": "^17.0.2",
|
||||
"react-dom": "^17.0.2",
|
||||
"react-scripts": "4.0.3",
|
||||
"axios": "^0.21.1",
|
||||
"react-markdown": "^7.0.0"
|
||||
},
|
||||
"eslintConfig": {
|
||||
"extends": [
|
||||
"react-app",
|
||||
"react-app/jest"
|
||||
]
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
">0.2%",
|
||||
"not dead",
|
||||
"not op_mini all"
|
||||
],
|
||||
"development": [
|
||||
"last 1 chrome version",
|
||||
"last 1 firefox version",
|
||||
"last 1 safari version"
|
||||
]
|
||||
}
|
||||
}
|
||||
42
web-ui/src/App.js
Normal file
42
web-ui/src/App.js
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import React, { useState } from 'react';
|
||||
import FileUpload from './components/FileUpload';
|
||||
import MarkdownPreview from './components/MarkdownPreview';
|
||||
import DownloadButton from './components/DownloadButton';
|
||||
import axios from 'axios';
|
||||
|
||||
function App() {
|
||||
const [markdownContent, setMarkdownContent] = useState('');
|
||||
const [fileName, setFileName] = useState('');
|
||||
|
||||
const handleFileUpload = async (file) => {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
try {
|
||||
const response = await axios.post('/api/convert', formData, {
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data',
|
||||
},
|
||||
});
|
||||
setMarkdownContent(response.data.markdown);
|
||||
setFileName(file.name);
|
||||
} catch (error) {
|
||||
console.error('Error uploading file:', error);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="App">
|
||||
<header className="App-header">
|
||||
<h1>MarkItDown Web UI</h1>
|
||||
</header>
|
||||
<main>
|
||||
<FileUpload onFileUpload={handleFileUpload} />
|
||||
<MarkdownPreview content={markdownContent} />
|
||||
<DownloadButton content={markdownContent} fileName={fileName} />
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default App;
|
||||
21
web-ui/src/components/DownloadButton.js
Normal file
21
web-ui/src/components/DownloadButton.js
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import React from 'react';
|
||||
|
||||
function DownloadButton({ content, fileName }) {
|
||||
const handleDownload = () => {
|
||||
const element = document.createElement('a');
|
||||
const file = new Blob([content], { type: 'text/markdown' });
|
||||
element.href = URL.createObjectURL(file);
|
||||
element.download = fileName.replace(/\.[^/.]+$/, "") + ".md";
|
||||
document.body.appendChild(element);
|
||||
element.click();
|
||||
document.body.removeChild(element);
|
||||
};
|
||||
|
||||
return (
|
||||
<button onClick={handleDownload}>
|
||||
Download Markdown
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
export default DownloadButton;
|
||||
18
web-ui/src/components/FileUpload.js
Normal file
18
web-ui/src/components/FileUpload.js
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import React from 'react';
|
||||
|
||||
function FileUpload({ onFileUpload }) {
|
||||
const handleFileChange = (event) => {
|
||||
const file = event.target.files[0];
|
||||
if (file) {
|
||||
onFileUpload(file);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="file-upload">
|
||||
<input type="file" onChange={handleFileChange} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default FileUpload;
|
||||
12
web-ui/src/components/MarkdownPreview.js
Normal file
12
web-ui/src/components/MarkdownPreview.js
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import React from 'react';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
|
||||
function MarkdownPreview({ content }) {
|
||||
return (
|
||||
<div className="markdown-preview">
|
||||
<ReactMarkdown>{content}</ReactMarkdown>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default MarkdownPreview;
|
||||
Loading…
Reference in a new issue