From c782df2b20928b9cc9497e47ffce59dd97aa7912 Mon Sep 17 00:00:00 2001 From: Sathindu Ganhala Arachchige Date: Thu, 27 Mar 2025 14:58:50 -0400 Subject: [PATCH] test: add test cases for docx equation rendering --- .../markitdown/tests/test_files/equations.docx | Bin 0 -> 15235 bytes packages/markitdown/tests/test_module_misc.py | 13 +++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 packages/markitdown/tests/test_files/equations.docx diff --git a/packages/markitdown/tests/test_files/equations.docx b/packages/markitdown/tests/test_files/equations.docx new file mode 100644 index 0000000000000000000000000000000000000000..6a05cd77f62aeb61624be36c05e12773177a8071 GIT binary patch literal 15235 zcmeHuWpEuyvhEQxGfS3aF|#a|EM|+D$zo<^W(JE{7BgDR%*@QpPcwJ-&8&BCyomSr z-agS4r@QLQrp&CYsxM_FKtNFe-~dPf06+v_jImNS0s;W?!2kdh03@)wpp~V)p{2dH zf{V4GohGfbg*jmkC@}dC0Psiq|K0uvzk%BLVY6;}#IN@OZ+?^Y3*yb>`8|U+rMJj1Xq zp0&<>B+ii=;|rJk;2`K^LjuGWk5fI_NRVKwPnt~2YQ#Sa(5rAJZ#L+u#( z!cRGr5nSvutY15qffzCeU|xS)#8prG7Bj;U$wOG4@DNpX@0P2y&us0Td~>Kg|2g({ z@tVIa7PSau(Y?hWi1$OY-rqq0vj1Y9_%YZG=N~oFAJzx+VV>HyhURv3w11TUYnK0m zZSil9UKZQ+VV1Cb=YFq#lWlTKU6{Gjbovv^n2QjQ>SB^8%L``nZ?9Yn3&5I(x+23< z({U57cIm0rek6PX8&#lgYgn-t32Io2JE!ecZJEPYjVlj##zc5v_ z&~a?=n1|rx(N2_IN};>mLKxFe6yr1cBsIC2Hj4B2$(~F`^$1Yh;WtqqwEUHm9c}O^?wlgT}sSojh{Q?I`RG3803` zh6mavNR*PJ9Tq`DI~sn5PkbkS-w$qbkNm z#w|L`^O`igh5>6j@)9s;%gf71*y1jV;qGsOeZ90f+uYsVB`v>pnkSF0Ak5~0f3B`D zsRk8?ArFW{Vy=2VeJcYCmgQP$j}-3=#CNZHP>7KVd+> zRY>5(7xJbYaDpsw?m~qiMx#S-2bpF0Tw!L?jK2TL7f+NTi4A-c?T0P&z6P719hA!T z81RHV9s(lxPZ(GaD@66vk_^sh8vr6fcJ_SsNL#jAiH;&jH9YWEnVIOFuBbZrJSXg6 z5pf-YAmg!UMt8K?p8pYeBr)dI5ZGr(`Zw`JO5mMqO3v+0B%QYv1b4@7hB_s1ke7>V zpm67(xIigqYmgRbx)|UEY1td?t)F!1PQ4QEF=pJv-fv>=z4@7{G&Gb0=Ba;BA4{Yj z&>r%L;YpOSBy@ToBL&S|=dOJDAvo!rpX8%)D&L6)UF~=*YSlPnX=f2*4+8@?WH4qI1@VMG*q1a{(fchYv?d}nZ>Avjg8E$Y_D;egk?l&^%0XX41Ta`+d7 z){(0?m*LI*kIdL#gA)CCBgS z7^A`DwLz(?b63BhG`tl%k4BAgzA}ViAt7J`1w|#Z0mFcEqn%>05xLbVJlqvD85 zmQ~H2Nk+(fD>i|8l>xrflVXaUdwnCR$v+5P+@W0Gnanau-qOw=mBUQ6s0(s6MJ=z$ z$60M9ZyM_GFd6*G%q&q;R@^>5VcC-BMa1^24rfWE4WUAtm*5q%g$mM>qT$79>`<=m z1@xV`4QuZ5O`bdec)asUOV!?JaY(G!c>V36RJ;&rM%+tVDpP@%{Ql5+4kOsyEpG3y2sH#? zMf1KRx-+-Sa72tvi!QTS-6-vb%Ay$asy=Sit`czhK{4g#O{i)7L>ZV#Asy)H-l z^5eRV1-P07EU=*3!AcE?p!aSQESb#IthkIHh}d;u=icdgY-AxGIgnv>WCk=(@{1v}aR`u6*(P>hrY1o0EjEfcM8;-Zcm3p`ryo>dwPte*H zb@L_%!eIBJsI1pS-Zh4So<$<s6+V|bNjTWTKqMRcjV?ey&6VP2PLKkQd16B1JvqRi^_7^?)O>tk&i=Y8JNpEC zj`;9C)6@0*@KDw6cF8Z=8>NT6KyH{q(+1BwhaqEQz>32u$WNdfNTbsP7%D<`Se5Wh zWBK(auGCrtlw62!NW>kYKAkVvxlmoAbMh9uq?0r!Pf~2gem_Y-Z)QUyBot{bZ299o(N@A|aAewcgrFR3wSB%Qe(s6IJJ>>_ zBb(>D2R5KzziG-}0$sgiZY8jx$B1t*_p4Kl7G>H)_}f+en^F0;B5W%-X;i`~3w-LU z)@SwA;l%O3t3QZ$Xx=}8Yxx2D<}R`}D_Jp{N!(yF<{OMdTg{nS zwl(+{7U_L%Mt&6C=~aV_qQDXLcBDmR2i8N%rNEWY_<|nqhrh?*0|U`RpQGW}qE$=+ z@^j1eII5m~g`b`{!#?6sDQOGjA$e1!HXTR4G|s2h{c`K%^17zKBZJ$;_U-|rGU}C2 zW#>+3$9Pxk{o!_f-3#dL={9IyrlV@x1^xYKpHBPrt-qrf1H9ZStIhT8WYO+e zOzY?dISj!TqK!37hVK{Vc$ivAZ{d?ul0cvy)ey%WJ4}93eg6BeIzOyv2z}5sAW|G# z`wf~afqsrh!Tk~eN`o;6T7wk;apt2qlr_?vmSRz!gT5YUkQv)kUBQEtF}HJKYhB6e zXFRKtGGyZe9~Jzh3GabQ1T=%0L1ci`5>=%;55i>>KV1yCXE@+9XBk9{3O5lCxU(jI zt*%6he`7VbvqrEduPQ#zMnR#t2Pa`o2*xyOh2IAbRO+KAQQhG|^@-S-^o@Y?vm@d@ zB^KcK@?~!rTrq<2JCh>6PMkf3FhEKb{d_SJxDpg^6vNGV3{D?jo^FF`P?dF=0xJgz zUtAgcV-H@=;t&FJxA_@gIIv}Wec8znyBZ=GJB4XEk#A-t3WhOYprCbBfVj6Ft6o^d z_zWkzEeg@6(ApQV@i_%fM!$QH;tHE{)cpvr(S>$V@?9T;r&qzDJW90*Hx!f|;FLD{b;- z2iu~){N4ke!G!woK+juWJ(hr;WHk5)qF$wyM1y>fwHIYheH?n4`fkBqfz+z(3c0HQ zZpJsDSGI(xuUmR%_+KtON-|pgt5vsmHd(3CXbn+bE-_fS*`wfNxFo12hMV`QD<0S9 zSzMkUt5ciGjA%=-S_UABjHZBvwL!{h-D(r*@Ar&Dp*WCXeD9iQJsMY@<9tqL;Am>w zO+rB)8`H-+A7PJ-JBuE0!#2phtI`Ydj4vML>&E#do2aww8x$L`39m1_r4#R+OU9<0 z(&Pc5wd>^-&xXaHW7pKU+aNVd=V`RH>nkA17aNFTQbtJR>Z6N(a*h*aD?1WVr=p|7 z*JubRNd(ekgepE*MBE{j@n#<(iiTKac%O(Y(ulG0tlBa3+CaEj$g@jztBVRbBI$%ejP>b8WN z)hxem!^t*(B??xyEYWJzJfYPguenGP>QIgT;Ow%w40(b<=k^^DYtYzx9 zWYu@M=nf{qwwH#vvpiAa)ok$8p;csx{1c?atihSjOO*`zu+x3B*YT@IBW2ys za8s4MW_VgN-PgDFo!yQO2o<$G-BBFo;Zf=G3F-!j0P&yGP0geUppqLXQ(pgD`u7>Jo8^oOusi>&E5~Srp}-J=UJqh zxb>?apa8%L+}}pYc82!$rk2Kbe{_De%Ij9ktcacjmG4~_*SN`vp@+$u1K%~L=d{&OogfHsw_hgYb!B)ui6k1;qtXE2d%1A5RAvc zEtXAFKk*A#2`Gr3BDm=IjO`7u$-#TH2g?SNqRthEkt43F3r65`F#h<>!eRCTntn{` zaTiffEE)SPB2B&3gMD8P;(_BZa<28hY=WhiaEU$2b!}neag#EO7y&mD8MH-u^c%*$ zjqoZ8`k|U2J85^-sb46YF%HJ$+JUp*X5s8boSyNVP6d=6N$Dq7}@* z4utE00^x3D@WBeFC19rEViXk|;peOOQpvgsRgQRV;VrG3?--2s>F3$q@X*-;HHh&?K^xt!C zt2^X5!*y8=Dz}nfN7X5SVvd@;20(hE0%+^@Wn_u(`=Ng_$g|`fMx>|2seF^S2u9Rc zN#vO6OXDE^>I243DEl3UoZ!<#*Ulq|FvrTY)lNg=1A%NH`t5l(H9?{V#TXB6DfiCP zy!6z}^Fyoq=6B*G$IBEu2YH8B6Xd|@&+-wD!TT*SH3x#gWRP>GXPLy=Oes={ZK69; zuMPyrqrmfNx1YYV!CF2=7av1(l^~?hkm<(X5QP;p-MTq7i@MXDMCfq9z4V-aHIlQQ z##w$hW=?rL%VDcmM5cXyU7>!pW;B(l5z9O5NMn^ofmm_8t8pBA6P4t9wO-b-ZT$j;{%jQrl zsyEAbUeOO9v?TR&1EnQmZDO8+lfPrgw<)%ER-x1^c4Y_1y^9`;Zn3R6tE*|5k zvAqq)Q?*4}Txhu^bt}K(?2U1=-M*l>qucILqWnc-w@9svn3GnR3-~qmNb3*DbJ*JD zuVmL{Cz9h0j2&@cO6d006GOkG(N$IR4Be6kfGheTFwq=~cfktd39R3;yS0N;Nd~w} zE=roXvs;~sO@4mka1OsjSs7&~gMLd>ATd4=)1KAJ8&$UmjcipKw`VCxDBXvBk|@TD z+&1nj=;;Xix_*S^W4nT{x}-U$EVU6YidlxXavTa1_Nq(I7HpTfdPEoVBSyrs8dB+$ z%SP~;`^U&hfZcm|7-=lK6~lLI)`E51euRS36kc0oGHjROP8^yW8Rirkynd+ye)`u2 z3;AcZ)2ILJ&aOJzuvJECCYOi3;~00T8t+qy&rm)G>8M3Qpq|0OO;C*?SZul0jY%T%{iQPFce4x-n z#gtGTj7Yg1Pp`37Lyg|L_r)eD-@C>pK5sPw7;i62)DI)=S3qLE5N%OjIZ z3Y^8K3dD_Cs3K~6hIKcKNI)aU4$i^EhnTG&C3iMk1fcjSOlXA;*@_+W;*v)IiZMdW zp+BXEKZqCi%O!8pMsFKr3?{aFD-$M=v^5k&Po-6{7tb4px<*r&$pxqY%>;JrssRh% zM@dA-(w0Tn2U7;p0Yz$)ss`C*=4jpFY3R0S(ke*WZ^G25YXv!`CNF=cSx@{7`xY*W zO4wG_7!FE+D2j`2y6m7CBu&DvS3xi`i*#e4DDey0U61_p&$)Wj`fLbkn~v2l!Z_o+ zWJ`1Ys|vx=`k^rA7G^S-=^JYoWizm`-(}2sus6a1{F0wNshNq*vB=L#)orRn4D&fg zE6Ani$mTK^KzKQs03@@Dko~isMf}AGDu$opZaCA|i1X~LdMIdHa!iAtLn8Q4+JLG! zbmIFpdA~{8YEHetJL~#!Q^RXh&1i%D49!gRh`5_-IEicK*6cEa@m%m9`pI$a6bf^& zLmjCF&w=4A(nU?o@v1?V#x`;{x|YwTVFT_n$XtBb7-Y699YN@7TQ z2Yd7W?xYOw+p87t3O8Npb9hn==)my)@I&upPKnZZRMfC@S5lcfr4m;X@7UKWgM=5g z6?{6?OT#u-1)AzE0T+%u+z9IR^efU`A@C` z#)N*aXGXed;zt-`!`o^})#E6b+w9w<#cf_)pnB{MBle(fy1Z_Y`AP|$VyKPF?PkQLb#3^> zDutZ+<;z|e^JWK)^IOx-pwTm(l|A7yT$RfH)wK!dq+EQ$;IS2hc5y3}*J#StO)lfW z%-XTNowdc=I`+xT)SOYrhNJ_}e5#N3g z=fsXIL@NFfpW0Fodvu`707$;er)Q)#UM^U^)u3%TV(flmR^~_%2@~8Xg$Z_i(jPWl zK9PNwJq+0WCaMF$)Lqfp}M030!fGj4CI3t0Mq31jeN<55kK5g6h z6IIl{X^c_Y|2F1x5A)o`5??ng9USw>#n2yrU~$KaGz=z#xeO62A$i0OIm>7kW>PxZD76~ zxph^l3soY%MLuO2o-^Y6qG3^njjxLX)G8JhD+zvD35xV02WO0yftq49J+uk8tOJ)c zYpL|fA~S%*B)@q>JT7p3?WZ%*a5bG@%vvxtiW671U~2`muHI$B;_u^u^uNU~Ims5Sy>F+x4c0J+V^Bh(csGP%Ek;X`(^0 z|<}Q|t$M^sYzl$;Eb(RQxF&WeR>W{b(Tpf0CdS)6CeLZ0z&(7thZsF%v&L zC>xsy^0$P7=gpfK{osKu23wuUt`WLSM7Ljq(27$ra*$`?ppuy^R4R-+uAQArp21-xZ(U_pBWA z*}%Yrp7Jv@CK?=ecqj;*uz2Ui7kk{w&4xpEo$m8l1|1xt*x%0rMC9w0mAmj9bHLX{T3;-=L_-0@(Q*q;T#F`;^CcdpSCWB^ z&+bDiBN(3~oHsehI?BtEevay6wT#Y>9|Q5Np<&g9||TCMoFV4YN>}bfe;S%q(02{GUP~2eibWH&Yt^*QBhwVz8=*!9YK)ew00?YBhBX-td%nGJyKlEGk&dU5MlC* z*dx)zBJ>?(qdC~A%_8(tkO5UNa%?$xIPeQ%TM&pRWUVX_s9F(-FtjIRysVE)v@8*{ zS`i{JjRK?(Iwh|6ra$zljX=O1kw5_XAG=Amz&{}WXk)gqu=X9FL?PH0ozkBV22J2c zKuLcX$o~uW(H06V83GC|=SKiE(S$$%qDgkD2wZo-|KoE133n70@c(f=%}Y}{xhKmS z-rjLn*YkyQufrkMBjtNJOANCaf)dLFg3|XI$Rq*x5H$iW+(Xw~0#VLk`Ru|X?xa~w z4G#&>qhI^u%BwODjRzpw%?4Qk6Z4Wl4~;)blfPj(1+;#L3T?Gf&TC23nc+AaYOf$Y z0PoinQ$DV4r}*k9(%DEg%HCIqe_dgInk_ekUbT4F#kBQl%*DRxifU^tT?n=Zl=6g5i=ldH;;oVnYxh`uCy zq*5UWW;V(@ANv!i9GuRt;y~Fh1;LLRUA-Q8IHZqSGGkr=#96@SbTddaNfv+TTx>@} zl@cQ0M6+P;d{>>IlK`kfYyq%uaM{0#0{y;F&msazun2X2fPIU=={G-w>68#DV9x#! z7ykB#Kta%tCPcwz02JvDX$9B<4szwja`i>R3@6Fk;JT4S6@reiHiL@{utkZu$DL+_ z{E%gdgvu=Yjb?+Sqfrw{+`IzX*sqY}JZ<#Odatr3fv>W<3fm?5tR>w6Mg4b;(_bO^|J&*h>{1c_0+E~Uui`#MiHwhiq`V&@ ziX+T4>Sl!hDaYlXm499&HtSz1>*W6J4i5d8#h|o+9NG-NqD{8ZozYNXpy9mP{u+1b zl8Eux6%%qRIhQkbac`)5N%%D6bB?U0P@h2!Lo4o< z3!~R!Xk>GRWvWadon>slplEY-3WUWh^i+*Tv)z=-IOvuv*9v@ZH%n60HsQCrbTf7l!clgoH* zqiRwTh0B=R*Er5=#8Z87-=}++7O|17^$7JLBVfLW)FH(Jr2G309G#msjFfXkd)!5d z<{8va4}SY#S&kaZg`?e@dY^2@yTH)4n2^s3eNkKrv@4>;(>&p*Gz3qgVx#ZM1rCDS zMmj?2Zp6RVeh8x=?XQannIv$vuQ1SqkEk_RFz5_Bq+%v>U18Frg+~BcL7J%IGE=1U zetUNW5ul-~g9=JhpizI3zh@B`viu1E1-WlfqTrv zmtce8k?Y3=Y?La)!p{T<>Oq;Eg4KF^OYUXqC{=oe5FG??G1oeKE$&>B!OoT6?7g5I zCdLg2DH!6UdBh8Z;ul=1MJOY9GeDWHw~04>gF%mfb@h&uL8p{JA;*sPN6a~vn@N_`|0p84&NUN_@20! z02RKE-_?ef#x!O8n&%ki_P%IDt!c&4@LK+*nRWRfX5Z7pplvB5$%j zmJl&n*CIUD?m9zD38v^W3x<8>`4f496=$pFu2JjTOY*Z78Uz?7HTvcp zJaX?(mBO0fRvNS7{jv${=})Q)R55<872F21igBY)#4YVpGFIlEYZ%=VXvAN<0-*;K zv^VAF3ujtDEyx`sc3lG{?56I(+(y;k9c6G@(GfAoX^?!5$nQn^fvl#h+f8IFmP`9i zV4`>^#<(?<3vdEvkV=Pz-dX0q-g@STleD}&5FDX@x#jOm8X{BkSUUBY-Wo-lf#8Xa z_2I7Z7*{N^`do$^p($zq{RT|)TI!sU`SHDY`Uhbor=XLGQzdCEH`4Qg{dx*9+d1Ja zhjmT=1huO6k)YvV3P;JU5=ET2W3Li(JBZ)F)7vm!)LhmO!8^o%nrisG_9SZc!40AL zSOr1-=Tw7}q29N@ErO&^uUh^hLZFbIUJ zN_%#`=b+U61k#G&)l5R(XPCO5chf#Z?$8Q4x`IeWuUpMmnu4I&lAXGET7hjngLZ98 zlW+sUDLsv4iscsO`;~};i+J6XzJ0I|T>){jQC{gRZ??gNMNLk1XG23JN=Zb9$#iVQ z#1_nSkcN}`g`T>gyeM1XiQM8-`J1;XuZ}y|2>Z!1R#Z8^pwWCxP8QRbFY@;3?`YFN zJrx)Xp6s(oX~}C<=@&UC{TXAyeHweJgBmUr=p|X~hr#_Y1<_V5pKq3H53#rooC8yh zf8zKhmMcd8Dwq7#D~|oa&U@@BfYMzL@3%1Z6X+#8S^CtfUDi5t1)*e;MLh>k9nmdG zQMSIWB@m>=b=6Dv@%sOrqp=RB%_93ja^{2j+nlb^M=pYbuAaH!p9GEg<;W#^#K3c^ zTfCeVItDQv)!ND<{36#$Ern$W`bfdDh|>XmX{+@rJVL5w8r3={unWD%WWN|SQ5tX?AfK@UIqV2}sWC`+SYOw97 z)N)egxqY(0dC?hY%E?Zj`ZoXi*#eVrBV2%6qQiz52XOck&=gZ=@}(wkU!EaJ^5SlA4uP@zPH;}Qi#7uuU)5QFXpJD=+1At-YI1YA z+ituyxh>n`e6)Zgma4L6q-%HvSe5cciKM~j6jn~)`>8g;!2)e1ah&F|82@irn>t_d zPdqPQ8^aJD$BxGmS;@a+{P=$DXqS7cAQ$pOCSrDDH;@v}J(vt9bTn7&-ak5>z2z5M z6v{neX%WyF;wC@xPOP}xIRd3Q7$3Wdzu_$Zx-78<%qCYe?|F>~>XY^RXn+U7ib2a(fKw=84xbX0a1f)Jy zI+5`GJDZ(BtgG6K3{Vk#*{00!vvJ&}#~NZEyC&i+jyHbjr$Cf0pE#67UWZ>8&!vm( zk^)(agdWD-=b|^{KZ+0E-Ya?#6lkle52)&9t@?mS8jmYLs&gFyILS3vrpntkxgWNodi?dbHaZ2wfh54!#T^2Z$|pjMd9p$4{CNIYGj7*d z)tNx2tZc^DKhE3vhcjv35c-ad$95Ld_@{5aXbHYxrk$~duI};Hc-KRnZNA>hZ_{4y2~Dw zQ*iohphA=LXXkH{J zfZ8X=%z6gq(ch=d{`%n`|6OxsQ9jApe`xN|M=j#tHCNZ#`VYVPzl!^zwI5r${IK)~ zF|w8T5<%{)eEQRmlKKi+sve)DnTbpqw>TB{dP5wc_e{8KqF@}J(-Bt@rtjTPxh#uc z0*f*zj7{>tEzOm_Y=EkaiGLdi-8PGZTd(qYLITPGeJqi=tb*}@>j}Eec-1*M@u?C?8q4!9MfJ& z5|MGm6*q4~f_l_vGc6d(M_=d^IEr&Rv#W|!@F&nMz1z;Gek9flLn1;`wBQ)_umR6Dwg z@QBd;`qYxIGVa$#;MBEva&V_7djD)Ruk?1)~`@@t)%UAcPm(L>3}%GA^gJ2YQ6Oww4ddr9%Mq@vrmvpQ~(-(MI%kbS)G z|I$<-V49DB>py23{`pz{R{V$DLs^M`74Wb5P=CV$vOXO5-?F2A2mYQY@)xw@gBkj7 zX(PYG|CNdS7Z?D@fc*#jzu_hSF6nnN>R+;U;Qt$H>hB_cr}O+JV)ujI`scI!o!s*~ z{P*Sazu>jl|A7C?V*2m+-6D0{;%%#@9O+ M`p{5w_CJpP58I?#egFUf literal 0 HcmV?d00001 diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitdown/tests/test_module_misc.py index 4079107..ad0367b 100644 --- a/packages/markitdown/tests/test_module_misc.py +++ b/packages/markitdown/tests/test_module_misc.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 -m pytest import io import os +import re import shutil import openai import pytest @@ -187,6 +188,18 @@ def test_docx_comments() -> None: validate_strings(result, DOCX_COMMENT_TEST_STRINGS) +def test_docx_equations() -> None: + markitdown = MarkItDown() + docx_file = os.path.join(TEST_FILES_DIR, "equations.docx") + result = markitdown.convert(docx_file) + + # Check for inline equation m=1 (wrapped with single $) is present + assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found" + + # Find block equations wrapped with double $$ and check if they are present + block_equations = re.findall(r'\$\$(.+?)\$\$', result.text_content) + assert block_equations, "No block equations found in the document." + def test_input_as_strings() -> None: markitdown = MarkItDown()