Sort PPTX shapes to be read in top-to-bottom, left-to-right order

Referenced from 39bef65b31/pptx2md/parser.py (L249)
This commit is contained in:
Richard Ye 2025-03-07 14:02:19 -05:00 committed by GitHub
parent 82d84e3edd
commit 288a44ecf7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -160,10 +160,12 @@ class PptxConverter(DocumentConverter):
# Group Shapes
if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.GROUP:
for subshape in shape.shapes:
sorted_shapes = sorted(shape.shapes, key=attrgetter('top', 'left'))
for subshape in sorted_shapes:
get_shape_content(subshape, **kwargs)
for shape in slide.shapes:
sorted_shapes = sorted(slide.shapes, key=attrgetter('top', 'left'))
for shape in sorted_shapes:
get_shape_content(shape, **kwargs)
md_content = md_content.strip()