Merge branch 'main' into dependabot/github_actions/actions/setup-python-5
This commit is contained in:
commit
1566e0f8c7
2 changed files with 31 additions and 2 deletions
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
[](https://pypi.org/project/markitdown/)
|
||||

|
||||
[](https://github.com/microsoft/autogen)
|
||||
|
||||
|
||||
MarkItDown is a utility for converting various files to Markdown (e.g., for indexing, text analysis, etc).
|
||||
|
|
@ -29,6 +30,12 @@ To install MarkItDown, use pip: `pip install markitdown`. Alternatively, you can
|
|||
markitdown path-to-file.pdf > document.md
|
||||
```
|
||||
|
||||
Or use `-o` to specify the output file:
|
||||
|
||||
```bash
|
||||
markitdown path-to-file.pdf -o document.md
|
||||
```
|
||||
|
||||
You can also pipe content:
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
import sys
|
||||
import argparse
|
||||
from textwrap import dedent
|
||||
from ._markitdown import MarkItDown
|
||||
from ._markitdown import MarkItDown, DocumentConverterResult
|
||||
|
||||
|
||||
def main():
|
||||
|
|
@ -29,20 +29,42 @@ def main():
|
|||
OR
|
||||
|
||||
markitdown < example.pdf
|
||||
|
||||
OR to save to a file use
|
||||
|
||||
markitdown example.pdf -o example.md
|
||||
|
||||
OR
|
||||
|
||||
markitdown example.pdf > example.md
|
||||
"""
|
||||
).strip(),
|
||||
)
|
||||
|
||||
parser.add_argument("filename", nargs="?")
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
help="Output file name. If not provided, output is written to stdout.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.filename is None:
|
||||
markitdown = MarkItDown()
|
||||
result = markitdown.convert_stream(sys.stdin.buffer)
|
||||
print(result.text_content)
|
||||
_handle_output(args, result)
|
||||
else:
|
||||
markitdown = MarkItDown()
|
||||
result = markitdown.convert(args.filename)
|
||||
_handle_output(args, result)
|
||||
|
||||
|
||||
def _handle_output(args, result: DocumentConverterResult):
|
||||
"""Handle output to stdout or file"""
|
||||
if args.output:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
f.write(result.text_content)
|
||||
else:
|
||||
print(result.text_content)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue