diff --git a/pyproject.toml b/pyproject.toml index 9c113ad..43322ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "pathvalidate", "charset-normalizer", "openai", + "ollama" ] [project.urls] diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 33806e1..7ab0bc1 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -13,6 +13,7 @@ import sys import tempfile import traceback import zipfile +from http.client import responses from xml.dom import minidom from typing import Any, Dict, List, Optional, Union from pathlib import Path @@ -1096,24 +1097,42 @@ class ImageConverter(MediaConverter): content_type = "image/jpeg" image_base64 = base64.b64encode(image_file.read()).decode("utf-8") data_uri = f"data:{content_type};base64,{image_base64}" + # check if Ollama client + if str(type(client)) == "": + messages = [ + { + "role": "user", + "content": prompt, + 'images': [local_path] - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": prompt}, - { - "type": "image_url", - "image_url": { - "url": data_uri, + } + ] + + response = client.chat( + model = model, + messages = messages, + + ) + + return response.message.content + + else:# use openai + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": data_uri, + }, }, - }, - ], - } - ] - - response = client.chat.completions.create(model=model, messages=messages) - return response.choices[0].message.content + ], + } + ] + response = client.chat.completions.create(model=model, messages=messages) + return response.choices[0].message.content class OutlookMsgConverter(DocumentConverter):