From d6debbdaf769bc66014cb71f6f23615cca6d1917 Mon Sep 17 00:00:00 2001 From: Kenny Zhang Date: Thu, 9 Jan 2025 13:43:16 -0500 Subject: [PATCH] added cli params for doc intel --- src/markitdown/__main__.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/markitdown/__main__.py b/src/markitdown/__main__.py index b6cf963..45652f1 100644 --- a/src/markitdown/__main__.py +++ b/src/markitdown/__main__.py @@ -57,17 +57,35 @@ def main(): "--output", help="Output file name. If not provided, output is written to stdout.", ) + parser.add_argument( + "-d", + "--use-docintel", + action="store_true", + help="Use Document Intelligence to extract text instead of offline conversion. Requires a valid Document Intelligence Endpoint.", + ) + parser.add_argument( + "-e", + "--endpoint", + type=str, + help="Document Intelligence Endpoint. Required if using Document Intelligence.", + ) args = parser.parse_args() - if args.filename is None: - markitdown = MarkItDown() - result = markitdown.convert_stream(sys.stdin.buffer) - _handle_output(args, result) + if args.use_docintel: + if args.endpoint is None: + raise ValueError("Document Intelligence Endpoint is required when using Document Intelligence.") + elif args.filename is None: + raise ValueError("Filename is required when using Document Intelligence.") + markitdown = MarkItDown(endpoint=args.endpoint) else: markitdown = MarkItDown() - result = markitdown.convert(args.filename) - _handle_output(args, result) + if args.filename is None: + result = markitdown.convert_stream(sys.stdin.buffer) + else: + result = markitdown.convert(args.filename) + + _handle_output(args, result) def _handle_output(args, result: DocumentConverterResult): """Handle output to stdout or file"""