Update LLM description method to accept image object and validate content type

Signed-off-by: Hankyeol Kyung <kghnkl0103@gmail.com>
This commit is contained in:
Hankyeol Kyung 2024-12-27 15:28:54 +09:00
parent 7fe32073de
commit 9449d5b959
No known key found for this signature in database
GPG key ID: 0430C7F42578E222

View file

@ -773,7 +773,7 @@ class PptxConverter(HtmlConverter):
llm_model = kwargs.get("llm_model") llm_model = kwargs.get("llm_model")
if llm_client is not None and llm_model is not None: if llm_client is not None and llm_model is not None:
alt_text += self._get_llm_description( alt_text += self._get_llm_description(
shape.image.blob, shape.image,
llm_client, llm_client,
llm_model, llm_model,
prompt=kwargs.get("llm_prompt"), prompt=kwargs.get("llm_prompt"),
@ -868,12 +868,18 @@ class PptxConverter(HtmlConverter):
separator = "|" + "|".join(["---"] * len(data[0])) + "|" separator = "|" + "|".join(["---"] * len(data[0])) + "|"
return md + "\n".join([header, separator] + markdown_table[1:]) return md + "\n".join([header, separator] + markdown_table[1:])
def _get_llm_description(self, image_blob, client, model, prompt=None): def _get_llm_description(self, image, client, model, prompt=None):
if image.content_type not in [
"image/jpeg",
"image/png",
"image/webp",
"image/gif",
]:
return "" # https://platform.openai.com/docs/guides/vision#what-type-of-files-can-i-upload
if prompt is None or prompt.strip() == "": if prompt is None or prompt.strip() == "":
prompt = "Write a caption for this image." prompt = "Write a caption for this image."
content_type = "image/jpeg" image_base64 = base64.b64encode(image.blob).decode("utf-8")
image_base64 = base64.b64encode(image_blob).decode("utf-8") data_uri = f"data:{image.content_type};base64,{image_base64}"
data_uri = f"data:{content_type};base64,{image_base64}"
messages = [ messages = [
{ {