update: add parameter "method" for PdfConverter

This commit is contained in:
tungsten106 2024-12-23 10:24:51 +08:00
parent df5f14ef23
commit 263e0b5bd4

View file

@ -685,12 +685,13 @@ class PdfConverter(DocumentConverter):
extension = kwargs.get("file_extension", "") extension = kwargs.get("file_extension", "")
if extension.lower() != ".pdf": if extension.lower() != ".pdf":
return None return None
method = kwargs.get("method", "pdfminer")
# return DocumentConverterResult( if method == "pdfminer":
# title=None, text_content = pdfminer.high_level.extract_text(local_path)
# text_content=pdfminer.high_level.extract_text(local_path), elif method == "pymupdf4llm":
# )
text_content = pymupdf4llm.to_markdown(local_path, show_progress=False) text_content = pymupdf4llm.to_markdown(local_path, show_progress=False)
else:
return None # unknown method
return DocumentConverterResult(title=None, text_content=text_content) return DocumentConverterResult(title=None, text_content=text_content)