Updated prompt to extract text and format it in Markdown, including additional visual details, instead of only describing the image.
This commit is contained in:
parent
125e206047
commit
3b8ecace0b
1 changed files with 15 additions and 1 deletions
|
|
@ -1047,7 +1047,21 @@ class ImageConverter(MediaConverter):
|
||||||
|
|
||||||
def _get_llm_description(self, local_path, extension, client, model, prompt=None):
|
def _get_llm_description(self, local_path, extension, client, model, prompt=None):
|
||||||
if prompt is None or prompt.strip() == "":
|
if prompt is None or prompt.strip() == "":
|
||||||
prompt = "Write a detailed caption for this image."
|
prompt = '''
|
||||||
|
Analyze the image and extract all visible text in the original language.
|
||||||
|
Reproduce the extracted text in a structured Markdown format, preserving
|
||||||
|
any formatting such as headings, bullet points, and highlights. Ensure
|
||||||
|
the output accurately reflects the structure and style of the original
|
||||||
|
document.
|
||||||
|
|
||||||
|
Additionally, if the image includes any visual elements (e.g., diagrams,
|
||||||
|
logos, or specific layouts) that cannot be represented directly in Markdown,
|
||||||
|
describe them in plain text as part of the Markdown document under a section
|
||||||
|
titled "Visual Notes."
|
||||||
|
|
||||||
|
Output only the converted Markdown text without any additional commentary
|
||||||
|
or explanations.
|
||||||
|
'''
|
||||||
|
|
||||||
data_uri = ""
|
data_uri = ""
|
||||||
with open(local_path, "rb") as image_file:
|
with open(local_path, "rb") as image_file:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue