Add LLM-based image description to PptxConverter
Signed-off-by: Hankyeol Kyung <kghnkl0103@gmail.com>
This commit is contained in:
parent
125e206047
commit
7fe32073de
1 changed files with 36 additions and 0 deletions
|
|
@ -768,6 +768,17 @@ class PptxConverter(HtmlConverter):
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Try describing the image using GPTV
|
||||||
|
llm_client = kwargs.get("llm_client")
|
||||||
|
llm_model = kwargs.get("llm_model")
|
||||||
|
if llm_client is not None and llm_model is not None:
|
||||||
|
alt_text += self._get_llm_description(
|
||||||
|
shape.image.blob,
|
||||||
|
llm_client,
|
||||||
|
llm_model,
|
||||||
|
prompt=kwargs.get("llm_prompt"),
|
||||||
|
).strip()
|
||||||
|
|
||||||
# A placeholder name
|
# A placeholder name
|
||||||
filename = re.sub(r"\W", "", shape.name) + ".jpg"
|
filename = re.sub(r"\W", "", shape.name) + ".jpg"
|
||||||
md_content += (
|
md_content += (
|
||||||
|
|
@ -857,6 +868,31 @@ class PptxConverter(HtmlConverter):
|
||||||
separator = "|" + "|".join(["---"] * len(data[0])) + "|"
|
separator = "|" + "|".join(["---"] * len(data[0])) + "|"
|
||||||
return md + "\n".join([header, separator] + markdown_table[1:])
|
return md + "\n".join([header, separator] + markdown_table[1:])
|
||||||
|
|
||||||
|
def _get_llm_description(self, image_blob, client, model, prompt=None):
|
||||||
|
if prompt is None or prompt.strip() == "":
|
||||||
|
prompt = "Write a caption for this image."
|
||||||
|
content_type = "image/jpeg"
|
||||||
|
image_base64 = base64.b64encode(image_blob).decode("utf-8")
|
||||||
|
data_uri = f"data:{content_type};base64,{image_base64}"
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": data_uri,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = client.chat.completions.create(model=model, messages=messages)
|
||||||
|
return response.choices[0].message.content
|
||||||
|
|
||||||
|
|
||||||
class MediaConverter(DocumentConverter):
|
class MediaConverter(DocumentConverter):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue