JsonConverter for Converting JSON Files into Structured Markdown Files

Converts Jsons to Markdown. The output preserves the structure of the JSON file as closely as possible, while using Markdown syntax for readability.
This commit is contained in:
ZeyuTeng96 2025-01-03 14:10:40 +08:00 committed by GitHub
parent 125e206047
commit 0f948ade40
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1204,6 +1204,42 @@ class ZipConverter(DocumentConverter):
text_content=f"[ERROR] Failed to process zip file {local_path}: {str(e)}",
)
class JsonConverter(DocumentConverter):
"""
Converts Jsons to Markdown. The output preserves the structure of the JSON file as closely as possible, while using Markdown syntax for readability.
"""
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a Json
extension = kwargs.get("file_extension", "")
if extension.lower() != ".json":
return None
list_heading = kwargs.get("list_heading", "Elem")
with open(local_path, "r") as f:
json_data = json.load(f)
md_content = ""
if isinstance(json_data, dict):
md_content += json.dumps(json_data, indent=4, ensure_ascii=False) + "\n"
elif isinstance(json_data, list):
for idx, item in enumerate(json_data, start=1):
md_content += f"# {list_heading} {idx}\n"
if isinstance(item, (dict, list)):
md_content += json.dumps(item, indent=4, ensure_ascii=False) + "\n\n"
else:
md_content += f"{item}\n\n"
else:
md_content += f"{json_data}\n"
# removing tailing \n
md_content = md_content.strip()
return DocumentConverterResult(
title=None,
text_content=md_content,
)
class FileConversionException(BaseException):
pass
@ -1276,6 +1312,7 @@ class MarkItDown:
self.register_page_converter(WikipediaConverter())
self.register_page_converter(YouTubeConverter())
self.register_page_converter(BingSerpConverter())
self.register_page_converter(JsonConverter())
self.register_page_converter(DocxConverter())
self.register_page_converter(XlsxConverter())
self.register_page_converter(PptxConverter())