diff --git a/packages/markitdown/src/markitdown/__main__.py b/packages/markitdown/src/markitdown/__main__.py index 6a24391..ed43289 100644 --- a/packages/markitdown/src/markitdown/__main__.py +++ b/packages/markitdown/src/markitdown/__main__.py @@ -8,101 +8,77 @@ from importlib.metadata import entry_points from .__about__ import __version__ from ._markitdown import MarkItDown, DocumentConverterResult +parser = argparse.ArgumentParser( + description="Convert various file formats to markdown.", + prog="markitdown", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=dedent( + """\ + examples: + markitdown example.pdf + markitdown -o example.md example.pdf + cat example.pdf | markitdown > example.md""" + ), +) +parser.add_argument( + "-v", + "--version", + action="version", + version=f"%(prog)s {__version__}", + help="show the version number and exit", +) +parser.add_argument( + "-o", + "--output", + metavar="OUTFILENAME", + help="if unspecified, defaults to stdout", +) +parser.add_argument( + "-d", + "--use-docintel", + action="store_true", + help="use online Document Intelligence to extract text (requires a valid `--endpoint`)", +) +parser.add_argument( + "-e", + "--endpoint", + type=str, + help="required for `--use-docintel`", +) +parser.add_argument( + "-p", + "--use-plugins", + action="store_true", + help="use 3rd-party plugins to convert files (see `--list-plugins`)", +) +parser.add_argument( + "--list-plugins", + action="store_true", + help="list installed 3rd-party plugins (loaded with `--use-plugin`)", +) +parser.add_argument( + "filename", metavar="FILENAME", nargs="?", help="if unspecified, defaults to stdin" +) -def main(): - parser = argparse.ArgumentParser( - description="Convert various file formats to markdown.", - prog="markitdown", - formatter_class=argparse.RawDescriptionHelpFormatter, - usage=dedent( - """ - SYNTAX: - markitdown - If FILENAME is empty, markitdown reads from stdin. - - EXAMPLE: - - markitdown example.pdf - - OR - - cat example.pdf | markitdown - - OR - - markitdown < example.pdf - - OR to save to a file use - - markitdown example.pdf -o example.md - - OR - - markitdown example.pdf > example.md - """ - ).strip(), - ) - - parser.add_argument( - "-v", - "--version", - action="version", - version=f"%(prog)s {__version__}", - help="show the version number and exit", - ) - - parser.add_argument( - "-o", - "--output", - help="Output file name. If not provided, output is written to stdout.", - ) - - parser.add_argument( - "-d", - "--use-docintel", - action="store_true", - help="Use Document Intelligence to extract text instead of offline conversion. Requires a valid Document Intelligence Endpoint.", - ) - - parser.add_argument( - "-e", - "--endpoint", - type=str, - help="Document Intelligence Endpoint. Required if using Document Intelligence.", - ) - - parser.add_argument( - "-p", - "--use-plugins", - action="store_true", - help="Use 3rd-party plugins to convert files. Use --list-plugins to see installed plugins.", - ) - - parser.add_argument( - "--list-plugins", - action="store_true", - help="List installed 3rd-party plugins. Plugins are loaded when using the -p or --use-plugin option.", - ) - - parser.add_argument("filename", nargs="?") - args = parser.parse_args() +def main(args=None): + args = parser.parse_args(args) if args.list_plugins: # List installed plugins, then exit print("Installed MarkItDown 3rd-party Plugins:\n") plugin_entry_points = list(entry_points(group="markitdown.plugin")) - if len(plugin_entry_points) == 0: - print(" * No 3rd-party plugins installed.") - print( - "\nFind plugins by searching for the hashtag #markitdown-plugin on GitHub.\n" - ) - else: + if plugin_entry_points: for entry_point in plugin_entry_points: print(f" * {entry_point.name:<16}\t(package: {entry_point.value})") print( "\nUse the -p (or --use-plugins) option to enable 3rd-party plugins.\n" ) + else: + print("No 3rd-party plugins installed.") + print( + "\nFind plugins by searching for the hashtag #markitdown-plugin on GitHub.\n" + ) sys.exit(0) if args.use_docintel: @@ -112,25 +88,20 @@ def main(): ) elif args.filename is None: raise ValueError("Filename is required when using Document Intelligence.") - markitdown = MarkItDown( - enable_plugins=args.use_plugins, docintel_endpoint=args.endpoint - ) - else: - markitdown = MarkItDown(enable_plugins=args.use_plugins) - if args.filename is None: - result = markitdown.convert_stream(sys.stdin.buffer) - else: + markitdown = MarkItDown( + enable_plugins=args.use_plugins, + docintel_endpoint=args.endpoint if args.use_docintel else None, + ) + + if args.filename: result = markitdown.convert(args.filename) + else: + result = markitdown.convert_stream(sys.stdin.buffer) - _handle_output(args, result) - - -def _handle_output(args, result: DocumentConverterResult): - """Handle output to stdout or file""" if args.output: with open(args.output, "w", encoding="utf-8") as f: - f.write(result.text_content) + print(result.text_content, file=f) else: print(result.text_content) diff --git a/packages/markitdown/tests/test_cli.py b/packages/markitdown/tests/test_cli.py index 7c8afc2..02004af 100644 --- a/packages/markitdown/tests/test_cli.py +++ b/packages/markitdown/tests/test_cli.py @@ -33,7 +33,7 @@ def test_invalid_flag(shared_tmp_dir) -> None: assert ( "unrecognized arguments" in result.stderr ), f"Expected 'unrecognized arguments' to appear in STDERR" - assert "SYNTAX" in result.stderr, f"Expected 'SYNTAX' to appear in STDERR" + assert "usage" in result.stderr, f"Expected 'usage' to appear in STDERR" def test_output_to_stdout(shared_tmp_dir) -> None: