update: add parameter "method" for PdfConverter
This commit is contained in:
parent
df5f14ef23
commit
263e0b5bd4
1 changed files with 7 additions and 6 deletions
|
|
@ -685,12 +685,13 @@ class PdfConverter(DocumentConverter):
|
||||||
extension = kwargs.get("file_extension", "")
|
extension = kwargs.get("file_extension", "")
|
||||||
if extension.lower() != ".pdf":
|
if extension.lower() != ".pdf":
|
||||||
return None
|
return None
|
||||||
|
method = kwargs.get("method", "pdfminer")
|
||||||
# return DocumentConverterResult(
|
if method == "pdfminer":
|
||||||
# title=None,
|
text_content = pdfminer.high_level.extract_text(local_path)
|
||||||
# text_content=pdfminer.high_level.extract_text(local_path),
|
elif method == "pymupdf4llm":
|
||||||
# )
|
|
||||||
text_content = pymupdf4llm.to_markdown(local_path, show_progress=False)
|
text_content = pymupdf4llm.to_markdown(local_path, show_progress=False)
|
||||||
|
else:
|
||||||
|
return None # unknown method
|
||||||
return DocumentConverterResult(title=None, text_content=text_content)
|
return DocumentConverterResult(title=None, text_content=text_content)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue