feat: Add OCR fallback when MLM is unavailable for image processing
feat: Add OCR fallback when MLM is unavailable for image processing - Add OCR text extraction using easyocr when MLM client/model is not configured - Support both Chinese and English text recognition - Add OCR results under "OCR Text" section in markdown output - Only execute OCR as fallback when MLM description is not available
This commit is contained in:
parent
81e3f24acd
commit
02cc0cef84
1 changed files with 14 additions and 0 deletions
|
|
@ -798,6 +798,20 @@ class ImageConverter(MediaConverter):
|
|||
).strip()
|
||||
+ "\n"
|
||||
)
|
||||
# add ocr only when MLM is not available
|
||||
if mlm_client is None or mlm_model is None:
|
||||
try:
|
||||
import easyocr
|
||||
reader = easyocr.Reader(['ch_sim','en']) # support chinese and english
|
||||
ocr_result = reader.readtext(local_path)
|
||||
if ocr_result:
|
||||
md_content += "\n"
|
||||
for detection in ocr_result:
|
||||
text = detection[1] # extract text
|
||||
md_content += f"- {text}\n"
|
||||
except ImportError:
|
||||
# easyocr not installed
|
||||
pass
|
||||
|
||||
return DocumentConverterResult(
|
||||
title=None,
|
||||
|
|
|
|||
Loading…
Reference in a new issue