Added Json converter

This commit is contained in:
Certainly Not a Genius 2024-12-25 21:37:08 +01:00
parent 125e206047
commit 23e1fbb064

View file

@ -1205,6 +1205,56 @@ class ZipConverter(DocumentConverter):
)
class JsonConverter(DocumentConverter):
def convert(
self, local_path: str, **kwargs: Any
) -> Union[None, DocumentConverterResult]:
# Bail if not an image
extension = kwargs.get("file_extension", "")
if extension.lower() not in [".json"]:
return None
with open(local_path) as test_json:
try:
json_data = json.load(test_json)
except json.JSONDecodeError:
return None
_prefix, _ = os.path.splitext(os.path.basename(local_path))
md_content = self._json_traversal(json_data, level=1, prefix=_prefix)
return DocumentConverterResult(
title=None,
text_content=md_content,
)
def _json_traversal(self, d: Union[dict, list], level: int, prefix: str) -> str:
_md = ""
if type(d) is dict:
for key in d.keys():
if type(d[key]) is dict or type(d[key]) is list:
_md += "%s %s.%s\n" % ("#" * level, prefix, str(key))
_md += self._json_traversal(d[key], level + 1, prefix + "." + key)
elif type(d) is list:
_md += "---\n"
for index, item in enumerate(d):
_md += (
str(index)
+ ". "
+ self._json_traversal(
item, level=0, prefix=prefix + "[" + str(index) + "]"
)
)
_md += "---\n"
else:
_md += "`null` \n" if d is None else "`%s:%s` \n" % (prefix, repr(d))
return _md
class FileConversionException(BaseException):
pass
@ -1285,6 +1335,7 @@ class MarkItDown:
self.register_page_converter(IpynbConverter())
self.register_page_converter(PdfConverter())
self.register_page_converter(ZipConverter())
self.register_page_converter(JsonConverter())
def convert(
self, source: Union[str, requests.Response, Path], **kwargs: Any