From 9f9fb6bf632610ade0d208734d87da5d95eb6574 Mon Sep 17 00:00:00 2001 From: Robin Hwang <61886677+rhwang1226@users.noreply.github.com> Date: Fri, 25 Apr 2025 22:56:26 -0400 Subject: [PATCH] Create csvconvertor.py --- .../markitdown/src/markitdown/csvconvertor.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 packages/markitdown/src/markitdown/csvconvertor.py diff --git a/packages/markitdown/src/markitdown/csvconvertor.py b/packages/markitdown/src/markitdown/csvconvertor.py new file mode 100644 index 0000000..a965aa0 --- /dev/null +++ b/packages/markitdown/src/markitdown/csvconvertor.py @@ -0,0 +1,32 @@ +# File: packages/markitdown/csv_converter.py + +import csv +import io +from .document_converter import DocumentConverter, DocumentConverterResult + +class CsvConverter(DocumentConverter): + def accepts(self, stream_info): + return ( + stream_info.extension == ".csv" + or stream_info.mime_type in ["text/csv", "application/csv"] + ) + + def convert(self, stream_info): + content = stream_info.read_text() + reader = csv.reader(io.StringIO(content)) + rows = list(reader) + + if not rows: + return DocumentConverterResult("") + + header = rows[0] + table = [] + table.append("| " + " | ".join(header) + " |") + table.append("| " + " | ".join(["---"] * len(header)) + " |") + + for row in rows[1:]: + row += [""] * (len(header) - len(row)) # Pad missing cells + table.append("| " + " | ".join(row[:len(header)]) + " |") + + markdown_output = "\n".join(table) + return DocumentConverterResult(markdown_output)