feat: Add OCR fallback when MLM is unavailable for image processing
feat: Add OCR fallback when MLM is unavailable for image processing - Add OCR text extraction using easyocr when MLM client/model is not configured - Support both Chinese and English text recognition - Add OCR results under "OCR Text" section in markdown output - Only execute OCR as fallback when MLM description is not available
This commit is contained in:
parent
81e3f24acd
commit
02cc0cef84
1 changed files with 14 additions and 0 deletions
|
|
@ -798,6 +798,20 @@ class ImageConverter(MediaConverter):
|
||||||
).strip()
|
).strip()
|
||||||
+ "\n"
|
+ "\n"
|
||||||
)
|
)
|
||||||
|
# add ocr only when MLM is not available
|
||||||
|
if mlm_client is None or mlm_model is None:
|
||||||
|
try:
|
||||||
|
import easyocr
|
||||||
|
reader = easyocr.Reader(['ch_sim','en']) # support chinese and english
|
||||||
|
ocr_result = reader.readtext(local_path)
|
||||||
|
if ocr_result:
|
||||||
|
md_content += "\n"
|
||||||
|
for detection in ocr_result:
|
||||||
|
text = detection[1] # extract text
|
||||||
|
md_content += f"- {text}\n"
|
||||||
|
except ImportError:
|
||||||
|
# easyocr not installed
|
||||||
|
pass
|
||||||
|
|
||||||
return DocumentConverterResult(
|
return DocumentConverterResult(
|
||||||
title=None,
|
title=None,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue