Don't have ZipConverter accept OOXML files. This will never yield a good result.

This commit is contained in:
Adam Fourney 2025-02-28 09:48:31 -08:00
parent 9a19fdd134
commit c0c48bdf80

View file

@ -77,6 +77,10 @@ class ZipConverter(DocumentConverter):
try: try:
# Extract the zip file safely # Extract the zip file safely
with zipfile.ZipFile(local_path, "r") as zipObj: with zipfile.ZipFile(local_path, "r") as zipObj:
# Bail if we discover it's an Office OOXML file
if "[Content_Types].xml" in zipObj.namelist():
return None
# Safeguard against path traversal # Safeguard against path traversal
for member in zipObj.namelist(): for member in zipObj.namelist():
member_path = os.path.normpath(os.path.join(extraction_dir, member)) member_path = os.path.normpath(os.path.join(extraction_dir, member))