From 84824778f6ea2332a5c75b5e177038be847984d5 Mon Sep 17 00:00:00 2001 From: tungsten106 Date: Thu, 26 Dec 2024 17:16:38 +0800 Subject: [PATCH] update: use smaller test-pdf size --- tests/test_markitdown.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/test_markitdown.py b/tests/test_markitdown.py index 1b5347a..75cec3e 100644 --- a/tests/test_markitdown.py +++ b/tests/test_markitdown.py @@ -307,7 +307,8 @@ def test_markitdown_pdf() -> None: result = markitdown.convert( os.path.join(TEST_FILES_DIR, "2308.08155v2.pdf"), engine="pymupdf4llm", - engine_kwargs={"show_progress": False}, # additional kwargs + + engine_kwargs={"show_progress": False, "pages": [i for i in range(10)],}, # additional kwargs ) for test_string in PDF_TEST_STRINGS: assert test_string in result.text_content @@ -320,6 +321,7 @@ def test_markitdown_pdf() -> None: "show_progress": False, "write_images": True, "image_path": "tests/out", + "pages": [i for i in range(10)], }, # `write_images` must be True, setting `image_path` for images saving dir. ) for test_string in PDF_TEST_STRINGS: @@ -327,16 +329,17 @@ def test_markitdown_pdf() -> None: # By pdfminer result = markitdown.convert( - os.path.join(TEST_FILES_DIR, "2308.08155v2.pdf"), engine="pdfminer" + os.path.join(TEST_FILES_DIR, "2308.08155v2.pdf"), engine="pdfminer", + enging_kwargs={"page_numbers": [i for i in range(10)],} ) for test_string in PDF_TEST_STRINGS: assert test_string in result.text_content if __name__ == "__main__": """Runs this file's tests from the command line.""" - test_markitdown_remote() - test_markitdown_local() - test_markitdown_exiftool() - test_markitdown_deprecation() - test_markitdown_llm() + # test_markitdown_remote() + # test_markitdown_local() + # test_markitdown_exiftool() + # test_markitdown_deprecation() + # test_markitdown_llm() test_markitdown_pdf()