From 263e0b5bd4956745da43169eccaaec66ec855a7e Mon Sep 17 00:00:00 2001 From: tungsten106 Date: Mon, 23 Dec 2024 10:24:51 +0800 Subject: [PATCH] update: add parameter "method" for PdfConverter --- src/markitdown/_markitdown.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 85841ae..147acb8 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -685,12 +685,13 @@ class PdfConverter(DocumentConverter): extension = kwargs.get("file_extension", "") if extension.lower() != ".pdf": return None - - # return DocumentConverterResult( - # title=None, - # text_content=pdfminer.high_level.extract_text(local_path), - # ) - text_content = pymupdf4llm.to_markdown(local_path, show_progress=False) + method = kwargs.get("method", "pdfminer") + if method == "pdfminer": + text_content = pdfminer.high_level.extract_text(local_path) + elif method == "pymupdf4llm": + text_content = pymupdf4llm.to_markdown(local_path, show_progress=False) + else: + return None # unknown method return DocumentConverterResult(title=None, text_content=text_content)