This commit is contained in:
AbSadiki 2025-01-06 21:44:54 +01:00 committed by GitHub
commit 94876e873e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 19 additions and 2 deletions

View file

@ -49,6 +49,7 @@ from markitdown import MarkItDown
md = MarkItDown() md = MarkItDown()
result = md.convert("test.xlsx") result = md.convert("test.xlsx")
print(result.text_content) print(result.text_content)
result.save("test.md")
``` ```
To use Large Language Models for image descriptions, provide `llm_client` and `llm_model`: To use Large Language Models for image descriptions, provide `llm_client` and `llm_model`:

View file

@ -57,6 +57,12 @@ def main():
"--output", "--output",
help="Output file name. If not provided, output is written to stdout.", help="Output file name. If not provided, output is written to stdout.",
) )
parser.add_argument(
"-e",
"--encoding",
help="Encoding of the output file. Defaults to utf-8.",
default="utf-8",
)
args = parser.parse_args() args = parser.parse_args()
if args.filename is None: if args.filename is None:
@ -72,8 +78,7 @@ def main():
def _handle_output(args, result: DocumentConverterResult): def _handle_output(args, result: DocumentConverterResult):
"""Handle output to stdout or file""" """Handle output to stdout or file"""
if args.output: if args.output:
with open(args.output, "w", encoding="utf-8") as f: result.save(args.output, encoding=args.encoding)
f.write(result.text_content)
else: else:
print(result.text_content) print(result.text_content)

View file

@ -149,6 +149,17 @@ class DocumentConverterResult:
self.title: Union[str, None] = title self.title: Union[str, None] = title
self.text_content: str = text_content self.text_content: str = text_content
def save(self, file_path: str, encoding: str = "utf-8") -> None:
"""
Save the converted document result `text_content` to a file.
params:
file_path: The path to save the document result to.
encoding: The encoding to use when writing the document.
"""
with open(file_path, "w", encoding=encoding) as f:
f.write(self.text_content)
class DocumentConverter: class DocumentConverter:
"""Abstract superclass of all DocumentConverters.""" """Abstract superclass of all DocumentConverters."""