Merge branch 'main' into dependabot/github_actions/actions/setup-python-5

This commit is contained in:
afourney 2024-12-20 16:17:08 -08:00 committed by GitHub
commit 1566e0f8c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 31 additions and 2 deletions

View file

@ -5,6 +5,7 @@
[![PyPI](https://img.shields.io/pypi/v/markitdown.svg)](https://pypi.org/project/markitdown/)
![PyPI - Downloads](https://img.shields.io/pypi/dd/markitdown)
[![Built by AutoGen Team](https://img.shields.io/badge/Built%20by-AutoGen%20Team-blue)](https://github.com/microsoft/autogen)
MarkItDown is a utility for converting various files to Markdown (e.g., for indexing, text analysis, etc).
@ -29,6 +30,12 @@ To install MarkItDown, use pip: `pip install markitdown`. Alternatively, you can
markitdown path-to-file.pdf > document.md
```
Or use `-o` to specify the output file:
```bash
markitdown path-to-file.pdf -o document.md
```
You can also pipe content:
```bash

View file

@ -4,7 +4,7 @@
import sys
import argparse
from textwrap import dedent
from ._markitdown import MarkItDown
from ._markitdown import MarkItDown, DocumentConverterResult
def main():
@ -29,20 +29,42 @@ def main():
OR
markitdown < example.pdf
OR to save to a file use
markitdown example.pdf -o example.md
OR
markitdown example.pdf > example.md
"""
).strip(),
)
parser.add_argument("filename", nargs="?")
parser.add_argument(
"-o",
"--output",
help="Output file name. If not provided, output is written to stdout.",
)
args = parser.parse_args()
if args.filename is None:
markitdown = MarkItDown()
result = markitdown.convert_stream(sys.stdin.buffer)
print(result.text_content)
_handle_output(args, result)
else:
markitdown = MarkItDown()
result = markitdown.convert(args.filename)
_handle_output(args, result)
def _handle_output(args, result: DocumentConverterResult):
"""Handle output to stdout or file"""
if args.output:
with open(args.output, "w", encoding="utf-8") as f:
f.write(result.text_content)
else:
print(result.text_content)