Updated prompt to extract text and format it in Markdown, including additional visual details, instead of only describing the image.

2024-12-22 18:02:29 +01:00 · 2024-12-22 18:02:29 +01:00 · 3b8ecace0b
commit 3b8ecace0b
parent 125e206047
1 changed files with 15 additions and 1 deletions
--- a/src/markitdown/_markitdown.py
+++ b/src/markitdown/_markitdown.py
@ -1047,7 +1047,21 @@ class ImageConverter(MediaConverter):

    def _get_llm_description(self, local_path, extension, client, model, prompt=None):
        if prompt is None or prompt.strip() == "":
-            prompt = "Write a detailed caption for this image."
+            prompt = '''
+                Analyze the image and extract all visible text in the original language.
+                Reproduce the extracted text in a structured Markdown format, preserving
+                any formatting such as headings, bullet points, and highlights. Ensure
+                the output accurately reflects the structure and style of the original
+                document. 
+
+                Additionally, if the image includes any visual elements (e.g., diagrams,
+                logos, or specific layouts) that cannot be represented directly in Markdown,
+                describe them in plain text as part of the Markdown document under a section
+                titled "Visual Notes."
+
+                Output only the converted Markdown text without any additional commentary
+                or explanations.
+            '''

        data_uri = ""
        with open(local_path, "rb") as image_file: