diff --git a/packages/markitdown/src/markitdown/converters/_pptx_converter.py b/packages/markitdown/src/markitdown/converters/_pptx_converter.py index afb37a0..76c481a 100644 --- a/packages/markitdown/src/markitdown/converters/_pptx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_pptx_converter.py @@ -64,7 +64,9 @@ class PptxConverter(HtmlConverter): md_content += f"\n\n\n" title = slide.shapes.title - for shape in slide.shapes: + + def get_shape_content(shape, **kwargs): + nonlocal md_content # Pictures if self._is_picture(shape): # https://github.com/scanny/python-pptx/pull/512#issuecomment-1713100069 @@ -134,6 +136,14 @@ class PptxConverter(HtmlConverter): else: md_content += shape.text + "\n" + # Group Shapes + if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.GROUP: + for subshape in shape.shapes: + get_shape_content(subshape, **kwargs) + + for shape in slide.shapes: + get_shape_content(shape, **kwargs) + md_content = md_content.strip() if slide.has_notes_slide: diff --git a/packages/markitdown/tests/test_files/test.pptx b/packages/markitdown/tests/test_files/test.pptx index ea1bbcb..e6d16f3 100644 Binary files a/packages/markitdown/tests/test_files/test.pptx and b/packages/markitdown/tests/test_files/test.pptx differ