added cli params for doc intel

This commit is contained in:
Kenny Zhang 2025-01-09 13:43:16 -05:00
parent f58a864951
commit d6debbdaf7

View file

@ -57,17 +57,35 @@ def main():
"--output", "--output",
help="Output file name. If not provided, output is written to stdout.", help="Output file name. If not provided, output is written to stdout.",
) )
parser.add_argument(
"-d",
"--use-docintel",
action="store_true",
help="Use Document Intelligence to extract text instead of offline conversion. Requires a valid Document Intelligence Endpoint.",
)
parser.add_argument(
"-e",
"--endpoint",
type=str,
help="Document Intelligence Endpoint. Required if using Document Intelligence.",
)
args = parser.parse_args() args = parser.parse_args()
if args.filename is None: if args.use_docintel:
markitdown = MarkItDown() if args.endpoint is None:
result = markitdown.convert_stream(sys.stdin.buffer) raise ValueError("Document Intelligence Endpoint is required when using Document Intelligence.")
_handle_output(args, result) elif args.filename is None:
raise ValueError("Filename is required when using Document Intelligence.")
markitdown = MarkItDown(endpoint=args.endpoint)
else: else:
markitdown = MarkItDown() markitdown = MarkItDown()
result = markitdown.convert(args.filename)
_handle_output(args, result)
if args.filename is None:
result = markitdown.convert_stream(sys.stdin.buffer)
else:
result = markitdown.convert(args.filename)
_handle_output(args, result)
def _handle_output(args, result: DocumentConverterResult): def _handle_output(args, result: DocumentConverterResult):
"""Handle output to stdout or file""" """Handle output to stdout or file"""