refactor: reformatted with black

2025-03-28 16:24:27 -04:00 · 2025-03-28 16:24:27 -04:00 · 799a1caf97
commit 799a1caf97
parent 6a66b275bb
4 changed files with 559 additions and 519 deletions
--- a/packages/markitdown/src/markitdown/converter_utils/docx/math/latex_dict.py
+++ b/packages/markitdown/src/markitdown/converter_utils/docx/math/latex_dict.py
@ -7,269 +7,267 @@ On 25/03/2025
 from __future__ import unicode_literals
-CHARS = ('{','}', '_', '^', '#', '&', '$', '%', '~')
+CHARS = ("{", "}", "_", "^", "#", "&", "$", "%", "~")
-BLANK = ''
+BLANK = ""
-BACKSLASH = '\\'
+BACKSLASH = "\\"
-ALN = '&'
+ALN = "&"
 CHR = {
-	#Unicode : Latex Math Symbols
+    # Unicode : Latex Math Symbols
-	#Top accents
+    # Top accents
-	'\u0300' : '\\grave{{{0}}}',
+    "\u0300": "\\grave{{{0}}}",
-	'\u0301' : '\\acute{{{0}}}',
+    "\u0301": "\\acute{{{0}}}",
-	'\u0302' : '\\hat{{{0}}}',
+    "\u0302": "\\hat{{{0}}}",
-	'\u0303' : '\\tilde{{{0}}}',
+    "\u0303": "\\tilde{{{0}}}",
-	'\u0304' : '\\bar{{{0}}}',
+    "\u0304": "\\bar{{{0}}}",
-	'\u0305' : '\\overbar{{{0}}}',
+    "\u0305": "\\overbar{{{0}}}",
-	'\u0306' : '\\breve{{{0}}}',
+    "\u0306": "\\breve{{{0}}}",
-	'\u0307' : '\\dot{{{0}}}',
+    "\u0307": "\\dot{{{0}}}",
-	'\u0308' : '\\ddot{{{0}}}',
+    "\u0308": "\\ddot{{{0}}}",
-	'\u0309' : '\\ovhook{{{0}}}',
+    "\u0309": "\\ovhook{{{0}}}",
-	'\u030a' : '\\ocirc{{{0}}}}',
+    "\u030a": "\\ocirc{{{0}}}}",
-	'\u030c' : '\\check{{{0}}}}',
+    "\u030c": "\\check{{{0}}}}",
-	'\u0310' : '\\candra{{{0}}}',
+    "\u0310": "\\candra{{{0}}}",
-	'\u0312' : '\\oturnedcomma{{{0}}}',
+    "\u0312": "\\oturnedcomma{{{0}}}",
-	'\u0315' : '\\ocommatopright{{{0}}}',
+    "\u0315": "\\ocommatopright{{{0}}}",
-	'\u031a' : '\\droang{{{0}}}',
+    "\u031a": "\\droang{{{0}}}",
-	'\u0338' : '\\not{{{0}}}',
+    "\u0338": "\\not{{{0}}}",
-	'\u20d0' : '\\leftharpoonaccent{{{0}}}',
+    "\u20d0": "\\leftharpoonaccent{{{0}}}",
-	'\u20d1' : '\\rightharpoonaccent{{{0}}}',
+    "\u20d1": "\\rightharpoonaccent{{{0}}}",
-	'\u20d2' : '\\vertoverlay{{{0}}}',
+    "\u20d2": "\\vertoverlay{{{0}}}",
-	'\u20d6' : '\\overleftarrow{{{0}}}',
+    "\u20d6": "\\overleftarrow{{{0}}}",
-	'\u20d7' : '\\vec{{{0}}}',
+    "\u20d7": "\\vec{{{0}}}",
-	'\u20db' : '\\dddot{{{0}}}',
+    "\u20db": "\\dddot{{{0}}}",
-	'\u20dc' : '\\ddddot{{{0}}}',
+    "\u20dc": "\\ddddot{{{0}}}",
-	'\u20e1' : '\\overleftrightarrow{{{0}}}',
+    "\u20e1": "\\overleftrightarrow{{{0}}}",
-	'\u20e7' : '\\annuity{{{0}}}',
+    "\u20e7": "\\annuity{{{0}}}",
-	'\u20e9' : '\\widebridgeabove{{{0}}}',
+    "\u20e9": "\\widebridgeabove{{{0}}}",
-	'\u20f0' : '\\asteraccent{{{0}}}',
+    "\u20f0": "\\asteraccent{{{0}}}",
-	 #Bottom accents
+    # Bottom accents
-	'\u0330' : '\\wideutilde{{{0}}}',
+    "\u0330": "\\wideutilde{{{0}}}",
-	'\u0331' : '\\underbar{{{0}}}',
+    "\u0331": "\\underbar{{{0}}}",
-	'\u20e8' : '\\threeunderdot{{{0}}}',
+    "\u20e8": "\\threeunderdot{{{0}}}",
-	'\u20ec' : '\\underrightharpoondown{{{0}}}',
+    "\u20ec": "\\underrightharpoondown{{{0}}}",
-	'\u20ed' : '\\underleftharpoondown{{{0}}}',
+    "\u20ed": "\\underleftharpoondown{{{0}}}",
-	'\u20ee' : '\\underledtarrow{{{0}}}',
+    "\u20ee": "\\underledtarrow{{{0}}}",
-	'\u20ef' : '\\underrightarrow{{{0}}}',
+    "\u20ef": "\\underrightarrow{{{0}}}",
-	#Over | group
+    # Over | group
-	'\u23b4' : '\\overbracket{{{0}}}',
+    "\u23b4": "\\overbracket{{{0}}}",
-	'\u23dc' : '\\overparen{{{0}}}',
+    "\u23dc": "\\overparen{{{0}}}",
-	'\u23de' : '\\overbrace{{{0}}}',
+    "\u23de": "\\overbrace{{{0}}}",
-	#Under| group
+    # Under| group
-	'\u23b5' : '\\underbracket{{{0}}}',
+    "\u23b5": "\\underbracket{{{0}}}",
-	'\u23dd' : '\\underparen{{{0}}}',
+    "\u23dd": "\\underparen{{{0}}}",
-	'\u23df' : '\\underbrace{{{0}}}',
+    "\u23df": "\\underbrace{{{0}}}",
 }
 CHR_BO = {
-	#Big operators,
+    # Big operators,
-	'\u2140' : '\\Bbbsum',
+    "\u2140": "\\Bbbsum",
-	'\u220f' : '\\prod',
+    "\u220f": "\\prod",
-	'\u2210' : '\\coprod',
+    "\u2210": "\\coprod",
-	'\u2211' : '\\sum',
+    "\u2211": "\\sum",
-	'\u222b' : '\\int',
+    "\u222b": "\\int",
-	'\u22c0' : '\\bigwedge',
+    "\u22c0": "\\bigwedge",
-	'\u22c1' : '\\bigvee',
+    "\u22c1": "\\bigvee",
-	'\u22c2' : '\\bigcap',
+    "\u22c2": "\\bigcap",
-	'\u22c3' : '\\bigcup',
+    "\u22c3": "\\bigcup",
-	'\u2a00' : '\\bigodot',
+    "\u2a00": "\\bigodot",
-	'\u2a01' : '\\bigoplus',
+    "\u2a01": "\\bigoplus",
-	'\u2a02' : '\\bigotimes',
+    "\u2a02": "\\bigotimes",
 }
 T = {
-
+    "\u2192": "\\rightarrow ",
-	'\u2192' : '\\rightarrow ',
+    # Greek letters
-	#Greek letters
+    "\U0001d6fc": "\\alpha ",
-	'\U0001d6fc' : '\\alpha ',
+    "\U0001d6fd": "\\beta ",
-	'\U0001d6fd' : '\\beta ',
+    "\U0001d6fe": "\\gamma ",
-	'\U0001d6fe' : '\\gamma ',
+    "\U0001d6ff": "\\theta ",
-	'\U0001d6ff' : '\\theta ',
+    "\U0001d700": "\\epsilon ",
-	'\U0001d700' : '\\epsilon ',
+    "\U0001d701": "\\zeta ",
-	'\U0001d701' : '\\zeta ',
+    "\U0001d702": "\\eta ",
-	'\U0001d702' : '\\eta ',
+    "\U0001d703": "\\theta ",
-	'\U0001d703' : '\\theta ',
+    "\U0001d704": "\\iota ",
-	'\U0001d704' : '\\iota ',
+    "\U0001d705": "\\kappa ",
-	'\U0001d705' : '\\kappa ',
+    "\U0001d706": "\\lambda ",
-	'\U0001d706' : '\\lambda ',
+    "\U0001d707": "\\m ",
-	'\U0001d707' : '\\m ',
+    "\U0001d708": "\\n ",
-	'\U0001d708' : '\\n ',
+    "\U0001d709": "\\xi ",
-	'\U0001d709' : '\\xi ',
+    "\U0001d70a": "\\omicron ",
-	'\U0001d70a' : '\\omicron ',
+    "\U0001d70b": "\\pi ",
-	'\U0001d70b' : '\\pi ',
+    "\U0001d70c": "\\rho ",
-	'\U0001d70c' : '\\rho ',
+    "\U0001d70d": "\\varsigma ",
-	'\U0001d70d' : '\\varsigma ',
+    "\U0001d70e": "\\sigma ",
-	'\U0001d70e' : '\\sigma ',
+    "\U0001d70f": "\\ta ",
-	'\U0001d70f' : '\\ta ',
+    "\U0001d710": "\\upsilon ",
-	'\U0001d710' : '\\upsilon ',
+    "\U0001d711": "\\phi ",
-	'\U0001d711' : '\\phi ',
+    "\U0001d712": "\\chi ",
-	'\U0001d712' : '\\chi ',
+    "\U0001d713": "\\psi ",
-	'\U0001d713' : '\\psi ',
+    "\U0001d714": "\\omega ",
-	'\U0001d714' : '\\omega ',
+    "\U0001d715": "\\partial ",
-	'\U0001d715' : '\\partial ',
+    "\U0001d716": "\\varepsilon ",
-	'\U0001d716' : '\\varepsilon ',
+    "\U0001d717": "\\vartheta ",
-	'\U0001d717' : '\\vartheta ',
+    "\U0001d718": "\\varkappa ",
-	'\U0001d718' : '\\varkappa ',
+    "\U0001d719": "\\varphi ",
-	'\U0001d719' : '\\varphi ',
+    "\U0001d71a": "\\varrho ",
-	'\U0001d71a' : '\\varrho ',
+    "\U0001d71b": "\\varpi ",
-	'\U0001d71b' : '\\varpi ',
+    # Relation symbols
-	#Relation symbols
+    "\u2190": "\\leftarrow ",
-	'\u2190' : '\\leftarrow ',
+    "\u2191": "\\uparrow ",
-	'\u2191' : '\\uparrow ',
+    "\u2192": "\\rightarrow ",
-	'\u2192' : '\\rightarrow ',
+    "\u2193": "\\downright ",
-	'\u2193' : '\\downright ',
+    "\u2194": "\\leftrightarrow ",
-	'\u2194' : '\\leftrightarrow ',
+    "\u2195": "\\updownarrow ",
-	'\u2195' : '\\updownarrow ',
+    "\u2196": "\\nwarrow ",
-	'\u2196' : '\\nwarrow ',
+    "\u2197": "\\nearrow ",
-	'\u2197' : '\\nearrow ',
+    "\u2198": "\\searrow ",
-	'\u2198' : '\\searrow ',
+    "\u2199": "\\swarrow ",
-	'\u2199' : '\\swarrow ',
+    "\u22ee": "\\vdots ",
-	'\u22ee' : '\\vdots ',
+    "\u22ef": "\\cdots ",
-	'\u22ef' : '\\cdots ',
+    "\u22f0": "\\adots ",
-	'\u22f0' : '\\adots ',
+    "\u22f1": "\\ddots ",
-	'\u22f1' : '\\ddots ',
+    "\u2260": "\\ne ",
-	'\u2260' : '\\ne ',
+    "\u2264": "\\leq ",
-	'\u2264' : '\\leq ',
+    "\u2265": "\\geq ",
-	'\u2265' : '\\geq ',
+    "\u2266": "\\leqq ",
-	'\u2266' : '\\leqq ',
+    "\u2267": "\\geqq ",
-	'\u2267' : '\\geqq ',
+    "\u2268": "\\lneqq ",
-	'\u2268' : '\\lneqq ',
+    "\u2269": "\\gneqq ",
-	'\u2269' : '\\gneqq ',
+    "\u226a": "\\ll ",
-	'\u226a' : '\\ll ',
+    "\u226b": "\\gg ",
-	'\u226b' : '\\gg ',
+    "\u2208": "\\in ",
-	'\u2208' : '\\in ',
+    "\u2209": "\\notin ",
-	'\u2209' : '\\notin ',
+    "\u220b": "\\ni ",
-	'\u220b' : '\\ni ',
+    "\u220c": "\\nni ",
-	'\u220c' : '\\nni ',
+    # Ordinary symbols
-
+    "\u221e": "\\infty ",
-	#Ordinary symbols
+    # Binary relations
-	'\u221e' : '\\infty ',
+    "\u00b1": "\\pm ",
-	#Binary relations
+    "\u2213": "\\mp ",
-	'\u00b1' : '\\pm ',
+    # Italic, Latin, uppercase
-	'\u2213' : '\\mp ',
+    "\U0001d434": "A",
-	#Italic, Latin, uppercase
+    "\U0001d435": "B",
-	'\U0001d434' : 'A',
+    "\U0001d436": "C",
-	'\U0001d435' : 'B',
+    "\U0001d437": "D",
-	'\U0001d436' : 'C',
+    "\U0001d438": "E",
-	'\U0001d437' : 'D',
+    "\U0001d439": "F",
-	'\U0001d438' : 'E',
+    "\U0001d43a": "G",
-	'\U0001d439' : 'F',
+    "\U0001d43b": "H",
-	'\U0001d43a' : 'G',
+    "\U0001d43c": "I",
-	'\U0001d43b' : 'H',
+    "\U0001d43d": "J",
-	'\U0001d43c'  : 'I',
+    "\U0001d43e": "K",
-	'\U0001d43d'  : 'J',
+    "\U0001d43f": "L",
-	'\U0001d43e'  : 'K',
+    "\U0001d440": "M",
-	'\U0001d43f'   : 'L',
+    "\U0001d441": "N",
-	'\U0001d440'  : 'M',
+    "\U0001d442": "O",
-	'\U0001d441'  : 'N',
+    "\U0001d443": "P",
-	'\U0001d442'  : 'O',
+    "\U0001d444": "Q",
-	'\U0001d443'  : 'P',
+    "\U0001d445": "R",
-	'\U0001d444'  : 'Q',
+    "\U0001d446": "S",
-	'\U0001d445'  : 'R',
+    "\U0001d447": "T",
-	'\U0001d446'  : 'S',
+    "\U0001d448": "U",
-	'\U0001d447'  : 'T',
+    "\U0001d449": "V",
-	'\U0001d448'  : 'U',
+    "\U0001d44a": "W",
-	'\U0001d449'  : 'V',
+    "\U0001d44b": "X",
-	'\U0001d44a'  : 'W',
+    "\U0001d44c": "Y",
-	'\U0001d44b'  : 'X',
+    "\U0001d44d": "Z",
-	'\U0001d44c'  : 'Y',
+    # Italic, Latin, lowercase
-	'\U0001d44d'  : 'Z',
+    "\U0001d44e": "a",
-	#Italic, Latin, lowercase
+    "\U0001d44f": "b",
-	'\U0001d44e'  : 'a',
+    "\U0001d450": "c",
-	'\U0001d44f'   : 'b',
+    "\U0001d451": "d",
-	'\U0001d450'  : 'c',
+    "\U0001d452": "e",
-	'\U0001d451'  : 'd',
+    "\U0001d453": "f",
-	'\U0001d452'  : 'e',
+    "\U0001d454": "g",
-	'\U0001d453'  : 'f',
+    "\U0001d456": "i",
-	'\U0001d454'  : 'g',
+    "\U0001d457": "j",
-	'\U0001d456'  : 'i',
+    "\U0001d458": "k",
-	'\U0001d457'  : 'j',
+    "\U0001d459": "l",
-	'\U0001d458'  : 'k',
+    "\U0001d45a": "m",
-	'\U0001d459'  : 'l',
+    "\U0001d45b": "n",
-	'\U0001d45a'  : 'm',
+    "\U0001d45c": "o",
-	'\U0001d45b'  : 'n',
+    "\U0001d45d": "p",
-	'\U0001d45c'   : 'o',
+    "\U0001d45e": "q",
-	'\U0001d45d'  : 'p',
+    "\U0001d45f": "r",
-	'\U0001d45e'  : 'q',
+    "\U0001d460": "s",
-	'\U0001d45f'   : 'r',
+    "\U0001d461": "t",
-	'\U0001d460'  : 's',
+    "\U0001d462": "u",
-	'\U0001d461'  : 't',
+    "\U0001d463": "v",
-	'\U0001d462'  : 'u',
+    "\U0001d464": "w",
-	'\U0001d463'  : 'v',
+    "\U0001d465": "x",
-	'\U0001d464'  : 'w',
+    "\U0001d466": "y",
-	'\U0001d465'  : 'x',
+    "\U0001d467": "z",
 	'\U0001d466'  : 'y',
 	'\U0001d467'  : 'z',
 }
-FUNC ={
+FUNC = {
-	'sin' : '\\sin({fe})',
+    "sin": "\\sin({fe})",
-	'cos' : '\\cos({fe})',
+    "cos": "\\cos({fe})",
-	'tan' : '\\tan({fe})',
+    "tan": "\\tan({fe})",
-	'arcsin' : '\\arcsin({fe})',
+    "arcsin": "\\arcsin({fe})",
-	'arccos' : '\\arccos({fe})',
+    "arccos": "\\arccos({fe})",
-	'arctan' : '\\arctan({fe})',
+    "arctan": "\\arctan({fe})",
-	'arccot' : '\\arccot({fe})',
+    "arccot": "\\arccot({fe})",
-	'sinh' : '\\sinh({fe})',
+    "sinh": "\\sinh({fe})",
-	'cosh' : '\\cosh({fe})',
+    "cosh": "\\cosh({fe})",
-	'tanh' : '\\tanh({fe})',
+    "tanh": "\\tanh({fe})",
-	'coth' : '\\coth({fe})',
+    "coth": "\\coth({fe})",
-	'sec'  : '\\sec({fe})',
+    "sec": "\\sec({fe})",
-	'csc'  : '\\csc({fe})',
+    "csc": "\\csc({fe})",
 }
-FUNC_PLACE = '{fe}'
+FUNC_PLACE = "{fe}"
-BRK = '\\\\'
+BRK = "\\\\"
 CHR_DEFAULT = {
-	'ACC_VAL':'\\hat{{{0}}}',
+    "ACC_VAL": "\\hat{{{0}}}",
 }
 POS = {
-	'top' : '\\overline{{{0}}}', # not sure
+    "top": "\\overline{{{0}}}",  # not sure
-	'bot' : '\\underline{{{0}}}',
+    "bot": "\\underline{{{0}}}",
 }
 POS_DEFAULT = {
-	'BAR_VAL': '\\overline{{{0}}}',
+    "BAR_VAL": "\\overline{{{0}}}",
 }
-SUB = '_{{{0}}}'
+SUB = "_{{{0}}}"
-SUP = '^{{{0}}}'
+SUP = "^{{{0}}}"
 F = {
-	'bar': '\\frac{{{num}}}{{{den}}}',
+    "bar": "\\frac{{{num}}}{{{den}}}",
-	'skw': r'^{{{num}}}/_{{{den}}}',
+    "skw": r"^{{{num}}}/_{{{den}}}",
-	'noBar': '\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}',
+    "noBar": "\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}",
-	'lin' : '{{{num}}}/{{{den}}}',
+    "lin": "{{{num}}}/{{{den}}}",
 }
-F_DEFAULT = '\\frac{{{num}}}{{{den}}}'
+F_DEFAULT = "\\frac{{{num}}}{{{den}}}"
-D  = '\\left{left}{text}\\right{right}'
+D = "\\left{left}{text}\\right{right}"
 D_DEFAULT = {
-	'left':'(',
+    "left": "(",
-	'right':')',
+    "right": ")",
-	'null':'.',
+    "null": ".",
 }
-RAD = '\\sqrt[{deg}]{{{text}}}'
+RAD = "\\sqrt[{deg}]{{{text}}}"
-RAD_DEFAULT = '\\sqrt{{{text}}}'
+RAD_DEFAULT = "\\sqrt{{{text}}}"
-ARR = '\\begin{{array}}{{c}}{text}\end{{array}}'
+ARR = "\\begin{{array}}{{c}}{text}\end{{array}}"
 LIM_FUNC = {
-	'lim':'\\lim_{{{lim}}}',
+    "lim": "\\lim_{{{lim}}}",
-	'max':'\\max_{{{lim}}}',
+    "max": "\\max_{{{lim}}}",
-	'min':'\\min_{{{lim}}}',
+    "min": "\\min_{{{lim}}}",
 }
-LIM_TO  = ('\\rightarrow','\\to')
+LIM_TO = ("\\rightarrow", "\\to")
-LIM_UPP = '\\overset{{{lim}}}{{{text}}}'
+LIM_UPP = "\\overset{{{lim}}}{{{text}}}"
-M = '\\begin{{matrix}}{text}\end{{matrix}}'
+M = "\\begin{{matrix}}{text}\end{{matrix}}"
--- a/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
+++ b/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
@ -8,355 +8,395 @@ On 25/03/2025
 import xml.etree.ElementTree as ET
-from .latex_dict import (CHARS, CHR, CHR_BO, CHR_DEFAULT, POS, POS_DEFAULT
+from .latex_dict import (
-	, SUB, SUP, F, F_DEFAULT, T, FUNC, D, D_DEFAULT, RAD, RAD_DEFAULT, ARR
+    CHARS,
-	, LIM_FUNC, LIM_TO, LIM_UPP, M, BRK, BLANK, BACKSLASH, ALN, FUNC_PLACE)
+    CHR,
    CHR_BO,
    CHR_DEFAULT,
    POS,
    POS_DEFAULT,
    SUB,
    SUP,
    F,
    F_DEFAULT,
    T,
    FUNC,
    D,
    D_DEFAULT,
    RAD,
    RAD_DEFAULT,
    ARR,
    LIM_FUNC,
    LIM_TO,
    LIM_UPP,
    M,
    BRK,
    BLANK,
    BACKSLASH,
    ALN,
    FUNC_PLACE,
 )
 OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}"
 def load(stream):
-	tree = ET.parse(stream)
+    tree = ET.parse(stream)
-	for omath in tree.findall(OMML_NS+'oMath'):
+    for omath in tree.findall(OMML_NS + "oMath"):
-		yield oMath2Latex(omath)
+        yield oMath2Latex(omath)
 def load_string(string):
-	root = ET.fromstring(string)
+    root = ET.fromstring(string)
-	for omath in root.findall(OMML_NS+'oMath'):
+    for omath in root.findall(OMML_NS + "oMath"):
-		yield oMath2Latex(omath)
+        yield oMath2Latex(omath)
 def escape_latex(strs):
-	last = None
+    last = None
-	new_chr = []
+    new_chr = []
-	strs = strs.replace(r'\\','\\')
+    strs = strs.replace(r"\\", "\\")
-	for c in strs :
+    for c in strs:
-		if (c in CHARS) and (last !=BACKSLASH):
+        if (c in CHARS) and (last != BACKSLASH):
-			new_chr.append(BACKSLASH+c)
+            new_chr.append(BACKSLASH + c)
-		else:
+        else:
-			new_chr.append(c)
+            new_chr.append(c)
-		last = c
+        last = c
-	return BLANK.join(new_chr)
+    return BLANK.join(new_chr)
-def get_val(key,default=None,store=CHR):
+
-	if key is not None:
+def get_val(key, default=None, store=CHR):
-		return key if not store else store.get(key,key)
+    if key is not None:
-	else:
+        return key if not store else store.get(key, key)
-		return default
+    else:
        return default
 class Tag2Method(object):
-	def call_method(self,elm,stag=None):
+    def call_method(self, elm, stag=None):
-		getmethod = self.tag2meth.get
+        getmethod = self.tag2meth.get
-		if stag is None:
+        if stag is None:
-			stag = elm.tag.replace(OMML_NS,'')
+            stag = elm.tag.replace(OMML_NS, "")
-		method = getmethod(stag)
+        method = getmethod(stag)
-		if method:
+        if method:
-			return method(self,elm)
+            return method(self, elm)
-		else:
+        else:
-			return None
+            return None
-	def process_children_list(self,elm,include=None):
+    def process_children_list(self, elm, include=None):
-		"""
+        """
-		process children of the elm,return iterable
+        process children of the elm,return iterable
-		"""		
+        """
-		for _e in list(elm):
+        for _e in list(elm):
-			if (OMML_NS not in _e.tag):
+            if OMML_NS not in _e.tag:
-				continue
+                continue
-			stag = _e.tag.replace(OMML_NS,'')			
+            stag = _e.tag.replace(OMML_NS, "")
-			if include and (stag not in include):
+            if include and (stag not in include):
-				continue
+                continue
-			t = self.call_method(_e,stag=stag)
+            t = self.call_method(_e, stag=stag)
-			if t is None:
+            if t is None:
-				t = self.process_unknow(_e,stag)
+                t = self.process_unknow(_e, stag)
-				if t is None:
+                if t is None:
-					continue
+                    continue
-			yield (stag,t,_e)
+            yield (stag, t, _e)
-	def process_children_dict(self,elm,include=None):
+    def process_children_dict(self, elm, include=None):
-		"""
+        """
-		process children of the elm,return dict
+        process children of the elm,return dict
-		"""
+        """
-		latex_chars = dict()
+        latex_chars = dict()
-		for stag,t,e in self.process_children_list(elm,include):
+        for stag, t, e in self.process_children_list(elm, include):
-			latex_chars[stag] = t
+            latex_chars[stag] = t
-		return latex_chars
+        return latex_chars
-	def process_children(self,elm,include=None):
+    def process_children(self, elm, include=None):
-		"""
+        """
-		process children of the elm,return string
+        process children of the elm,return string
-		"""
+        """
-		return BLANK.join(( t if not isinstance(t,Tag2Method) else str(t) 
+        return BLANK.join(
-			for stag,t,e in self.process_children_list(elm,include)))
+            (
                t if not isinstance(t, Tag2Method) else str(t)
                for stag, t, e in self.process_children_list(elm, include)
            )
        )
-	def process_unknow(self,elm,stag):
+    def process_unknow(self, elm, stag):
-		return None
+        return None
 class Pr(Tag2Method):
-	text = ''
+    text = ""
-	__val_tags = ('chr','pos','begChr','endChr','type')
+    __val_tags = ("chr", "pos", "begChr", "endChr", "type")
-	__innerdict= None #can't use the __dict__
+    __innerdict = None  # can't use the __dict__
-	""" common properties of element"""
+    """ common properties of element"""
 	def __init__(self, elm):
 		self.__innerdict={}
 		self.text=self.process_children(elm)
-	def __str__(self):
+    def __init__(self, elm):
-		return self.text
+        self.__innerdict = {}
        self.text = self.process_children(elm)
-	def __unicode__(self):
+    def __str__(self):
-		return self.__str__(self)
+        return self.text
-	def __getattr__(self,name):
+    def __unicode__(self):
-		return self.__innerdict.get(name,None)
+        return self.__str__(self)
-	def do_brk(self,elm):
+    def __getattr__(self, name):
-		self.__innerdict['brk'] = BRK 
+        return self.__innerdict.get(name, None)
 		return BRK
-	def do_common(self,elm):
+    def do_brk(self, elm):
-		stag = elm.tag.replace(OMML_NS,'')
+        self.__innerdict["brk"] = BRK
-		if stag in self.__val_tags:
+        return BRK
 			t = elm.get('{0}val'.format(OMML_NS))
 			self.__innerdict[stag] = t
 		return None
-	tag2meth = {
+    def do_common(self, elm):
-		'brk':do_brk,
+        stag = elm.tag.replace(OMML_NS, "")
-		'chr':do_common,
+        if stag in self.__val_tags:
-		'pos':do_common,
+            t = elm.get("{0}val".format(OMML_NS))
-		'begChr':do_common,
+            self.__innerdict[stag] = t
-		'endChr':do_common,
+        return None
-		'type':do_common,
+
-	}
+    tag2meth = {
        "brk": do_brk,
        "chr": do_common,
        "pos": do_common,
        "begChr": do_common,
        "endChr": do_common,
        "type": do_common,
    }
 class oMath2Latex(Tag2Method):
-	"""
+    """
-	Convert oMath element of omml to latex
+    Convert oMath element of omml to latex
-	"""
+    """
 	_t_dict = T
-	__direct_tags = ('box','sSub','sSup','sSubSup','num','den','deg','e')
+    _t_dict = T
-	def __init__(self, element):
+    __direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e")
 		self._latex = self.process_children(element)		
-	def __str__(self):
+    def __init__(self, element):
-		return self.latex
+        self._latex = self.process_children(element)
-	def __unicode__(self):
+    def __str__(self):
-		return self.__str__(self)
+        return self.latex
-	def process_unknow(self,elm,stag):			
+    def __unicode__(self):
-		if stag in self.__direct_tags:
+        return self.__str__(self)
 			return self.process_children(elm)
 		elif stag[-2:] == 'Pr':
 			return Pr(elm)
 		else:
 			return None
-	@property
+    def process_unknow(self, elm, stag):
-	def latex(self):
+        if stag in self.__direct_tags:
-		return self._latex
+            return self.process_children(elm)
        elif stag[-2:] == "Pr":
            return Pr(elm)
        else:
            return None
-	def do_acc(self,elm):
+    @property
-		"""
+    def latex(self):
-		the accent function
+        return self._latex
 		"""
 		c_dict = self.process_children_dict(elm)
 		latex_s = get_val(c_dict['accPr'].chr,default=CHR_DEFAULT.get('ACC_VAL'),store=CHR)
 		return latex_s.format(c_dict['e'])		
-	def do_bar(self,elm):
+    def do_acc(self, elm):
-		"""
+        """
-		the bar function
+        the accent function
-		"""
+        """
-		c_dict = self.process_children_dict(elm)
+        c_dict = self.process_children_dict(elm)
-		pr = c_dict['barPr']
+        latex_s = get_val(
-		latex_s = get_val(pr.pos,default=POS_DEFAULT.get('BAR_VAL'),store=POS)
+            c_dict["accPr"].chr, default=CHR_DEFAULT.get("ACC_VAL"), store=CHR
-		return pr.text+latex_s.format(c_dict['e'])
+        )
        return latex_s.format(c_dict["e"])
-	def do_d(self,elm):
+    def do_bar(self, elm):
-		"""
+        """
-		the delimiter object
+        the bar function
-		"""
+        """
-		c_dict = self.process_children_dict(elm)
+        c_dict = self.process_children_dict(elm)
-		pr = c_dict['dPr']
+        pr = c_dict["barPr"]
-		null = D_DEFAULT.get('null')
+        latex_s = get_val(pr.pos, default=POS_DEFAULT.get("BAR_VAL"), store=POS)
-		s_val = get_val(pr.begChr,default=D_DEFAULT.get('left'),store=T)
+        return pr.text + latex_s.format(c_dict["e"])
 		e_val = get_val(pr.endChr,default=D_DEFAULT.get('right'),store=T)
 		return pr.text+D.format(left= null if not s_val else escape_latex(s_val),
 					text=c_dict['e'],
 					right= null if not e_val else  escape_latex(e_val))
    def do_d(self, elm):
        """
        the delimiter object
        """
        c_dict = self.process_children_dict(elm)
        pr = c_dict["dPr"]
        null = D_DEFAULT.get("null")
        s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T)
        e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T)
        return pr.text + D.format(
            left=null if not s_val else escape_latex(s_val),
            text=c_dict["e"],
            right=null if not e_val else escape_latex(e_val),
        )
-	def do_spre(self,elm):
+    def do_spre(self, elm):
-		"""
+        """
-		the Pre-Sub-Superscript object -- Not support yet
+        the Pre-Sub-Superscript object -- Not support yet
-		"""
+        """
-		pass
+        pass
-	def do_sub(self,elm):
+    def do_sub(self, elm):
-		text = self.process_children(elm)
+        text = self.process_children(elm)
-		return SUB.format(text)
+        return SUB.format(text)
-	def do_sup(self,elm):
+    def do_sup(self, elm):
-		text = self.process_children(elm)
+        text = self.process_children(elm)
-		return SUP.format(text)
+        return SUP.format(text)
-	def do_f(self,elm):
+    def do_f(self, elm):
-		"""
+        """
-		the fraction object
+        the fraction object
-		"""
+        """
-		c_dict = self.process_children_dict(elm)
+        c_dict = self.process_children_dict(elm)
-		pr = c_dict['fPr']
+        pr = c_dict["fPr"]
-		latex_s = get_val(pr.type,default=F_DEFAULT,store=F)
+        latex_s = get_val(pr.type, default=F_DEFAULT, store=F)
-		return pr.text+latex_s.format(num=c_dict.get('num'),den=c_dict.get('den'))
+        return pr.text + latex_s.format(num=c_dict.get("num"), den=c_dict.get("den"))
-	def do_func(self,elm):
+    def do_func(self, elm):
-		"""
+        """
-		the Function-Apply object (Examples:sin cos)
+        the Function-Apply object (Examples:sin cos)
-		"""
+        """
-		c_dict = self.process_children_dict(elm)
+        c_dict = self.process_children_dict(elm)
-		func_name = c_dict.get('fName')
+        func_name = c_dict.get("fName")
-		return func_name.replace(FUNC_PLACE,c_dict.get('e'))
+        return func_name.replace(FUNC_PLACE, c_dict.get("e"))
-	def do_fname(self,elm):
+    def do_fname(self, elm):
-		"""
+        """
-		the func name
+        the func name
-		"""
+        """
-		latex_chars = []
+        latex_chars = []
-		for stag,t,e in self.process_children_list(elm):
+        for stag, t, e in self.process_children_list(elm):
-			if stag == 'r':
+            if stag == "r":
-				if FUNC.get(t):
+                if FUNC.get(t):
-					latex_chars.append(FUNC[t])
+                    latex_chars.append(FUNC[t])
-				else :
+                else:
-					raise NotImplemented("Not support func %s" % t)
+                    raise NotImplemented("Not support func %s" % t)
-			else:
+            else:
-				latex_chars.append(t)
+                latex_chars.append(t)
-		t = BLANK.join(latex_chars)
+        t = BLANK.join(latex_chars)
-		return t if FUNC_PLACE in t else t+FUNC_PLACE #do_func will replace this
+        return t if FUNC_PLACE in t else t + FUNC_PLACE  # do_func will replace this
-	def do_groupchr(self,elm):
+    def do_groupchr(self, elm):
-		"""
+        """
-		the Group-Character object
+        the Group-Character object
-		"""
+        """
-		c_dict = self.process_children_dict(elm)
+        c_dict = self.process_children_dict(elm)
-		pr = c_dict['groupChrPr']
+        pr = c_dict["groupChrPr"]
-		latex_s = get_val(pr.chr)
+        latex_s = get_val(pr.chr)
-		return pr.text+latex_s.format(c_dict['e'])
+        return pr.text + latex_s.format(c_dict["e"])
-	def do_rad(self,elm):
+    def do_rad(self, elm):
-		"""
+        """
-		the radical object
+        the radical object
-		"""
+        """
-		c_dict = self.process_children_dict(elm)
+        c_dict = self.process_children_dict(elm)
-		text = c_dict.get('e')
+        text = c_dict.get("e")
-		deg_text = c_dict.get('deg')
+        deg_text = c_dict.get("deg")
-		if deg_text:
+        if deg_text:
-			return RAD.format(deg=deg_text,text=text)
+            return RAD.format(deg=deg_text, text=text)
-		else:
+        else:
-			return RAD_DEFAULT.format(text=text)
+            return RAD_DEFAULT.format(text=text)
-	def do_eqarr(self,elm):
+    def do_eqarr(self, elm):
-		"""
+        """
-		the Array object
+        the Array object
-		"""
+        """
-		return ARR.format(text=BRK.join(
+        return ARR.format(
-			[t for stag,t,e in self.process_children_list(elm,include=('e',))]))
+            text=BRK.join(
                [t for stag, t, e in self.process_children_list(elm, include=("e",))]
            )
        )
    def do_limlow(self, elm):
        """
        the Lower-Limit object
        """
        t_dict = self.process_children_dict(elm, include=("e", "lim"))
        latex_s = LIM_FUNC.get(t_dict["e"])
        if not latex_s:
            raise NotImplemented("Not support lim %s" % t_dict["e"])
        else:
            return latex_s.format(lim=t_dict.get("lim"))
-	def do_limlow(self,elm):
+    def do_limupp(self, elm):
-		"""
+        """
-		the Lower-Limit object
+        the Upper-Limit object
-		"""
+        """
-		t_dict = self.process_children_dict(elm,include=('e','lim'))
+        t_dict = self.process_children_dict(elm, include=("e", "lim"))
-		latex_s = LIM_FUNC.get(t_dict['e'])
+        return LIM_UPP.format(lim=t_dict.get("lim"), text=t_dict.get("e"))
 		if not latex_s :
 			raise NotImplemented("Not support lim %s" % t_dict['e'])
 		else:
 			return latex_s.format(lim=t_dict.get('lim'))
-	def do_limupp(self,elm):
+    def do_lim(self, elm):
-		"""
+        """
-		the Upper-Limit object
+        the lower limit of the limLow object and the upper limit of the limUpp function
-		"""
+        """
-		t_dict = self.process_children_dict(elm,include=('e','lim'))
+        return self.process_children(elm).replace(LIM_TO[0], LIM_TO[1])
 		return LIM_UPP.format(lim=t_dict.get('lim'),text=t_dict.get('e'))
-	def do_lim(self,elm):
+    def do_m(self, elm):
-		"""
+        """
-		the lower limit of the limLow object and the upper limit of the limUpp function
+        the Matrix object
-		"""
+        """
-		return self.process_children(elm).replace(LIM_TO[0],LIM_TO[1])
+        rows = []
        for stag, t, e in self.process_children_list(elm):
            if stag is "mPr":
                pass
            elif stag == "mr":
                rows.append(t)
        return M.format(text=BRK.join(rows))
-	def do_m(self,elm):
+    def do_mr(self, elm):
-		"""
+        """
-		the Matrix object
+        a single row of the matrix m
-		"""
+        """
-		rows = []
+        return ALN.join(
-		for stag,t,e in self.process_children_list(elm):
+            [t for stag, t, e in self.process_children_list(elm, include=("e",))]
-			if stag is 'mPr':
+        )
 				pass
 			elif stag == 'mr':
 				rows.append(t)
 		return M.format(text=BRK.join(rows))
-	def do_mr(self,elm):
+    def do_nary(self, elm):
-		"""
+        """
-		a single row of the matrix m
+        the n-ary object
-		"""
+        """
-		return ALN.join(
+        res = []
-			[t for stag,t,e in self.process_children_list(elm,include=('e',))])
+        bo = ""
        for stag, t, e in self.process_children_list(elm):
            if stag == "naryPr":
                bo = get_val(t.chr, store=CHR_BO)
            else:
                res.append(t)
        return bo + BLANK.join(res)
-	def do_nary(self,elm):
+    def do_r(self, elm):
-		"""
+        """
-		the n-ary object
+        Get text from 'r' element,And try convert them to latex symbols
-		"""
+        @todo text style support , (sty)
-		res = []
+        @todo \text (latex pure text support)
-		bo = ''
+        """
-		for stag,t,e in self.process_children_list(elm):
+        _str = []
-			if stag == 'naryPr':
+        for s in elm.findtext("./{0}t".format(OMML_NS)):
-				bo = get_val(t.chr,store=CHR_BO)
+            # s = s if isinstance(s,unicode) else unicode(s,'utf-8')
-			else :
+            _str.append(self._t_dict.get(s, s))
-				res.append(t)
+        return escape_latex(BLANK.join(_str))
 		return bo+BLANK.join(res)
-	def do_r(self,elm):
+    tag2meth = {
-		"""
+        "acc": do_acc,
-		Get text from 'r' element,And try convert them to latex symbols
+        "r": do_r,
-		@todo text style support , (sty)
+        "bar": do_bar,
-		@todo \text (latex pure text support)
+        "sub": do_sub,
-		"""
+        "sup": do_sup,
-		_str = []
+        "f": do_f,
-		for s in elm.findtext('./{0}t'.format(OMML_NS)):
+        "func": do_func,
-			#s = s if isinstance(s,unicode) else unicode(s,'utf-8')
+        "fName": do_fname,
-			_str.append(self._t_dict.get(s,s))
+        "groupChr": do_groupchr,
-		return escape_latex(BLANK.join(_str))
+        "d": do_d,
-
+        "rad": do_rad,
-	tag2meth={
+        "eqArr": do_eqarr,
-		'acc' : do_acc,
+        "limLow": do_limlow,
-		'r' : do_r,
+        "limUpp": do_limupp,
-		'bar' : do_bar,
+        "lim": do_lim,
-		'sub' : do_sub,
+        "m": do_m,
-		'sup' : do_sup,
+        "mr": do_mr,
-		'f'   : do_f,
+        "nary": do_nary,
-		'func': do_func,
+    }
 		'fName' : do_fname,
 		'groupChr' : do_groupchr,
 		'd' : do_d,
 		'rad' : do_rad,
 		'eqArr' : do_eqarr,
 		'limLow' : do_limlow,
 		'limUpp' : do_limupp,
 		'lim' : do_lim,
 		'm' : do_m,
 		'mr' : do_mr,
 		'nary' : do_nary,
 	}
--- a/packages/markitdown/src/markitdown/converters/_docx_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py
@ -75,5 +75,6 @@ class DocxConverter(HtmlConverter):
        style_map = kwargs.get("style_map", None)
        pre_process_stream = pre_process_docx(file_stream)
        return self._html_converter.convert_string(
-            mammoth.convert_to_html(pre_process_stream, style_map=style_map).value, **kwargs
+            mammoth.convert_to_html(pre_process_stream, style_map=style_map).value,
            **kwargs,
        )
--- a/packages/markitdown/tests/test_module_misc.py
+++ b/packages/markitdown/tests/test_module_misc.py
@ -272,9 +272,10 @@ def test_docx_equations() -> None:
    assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found"
    # Find block equations wrapped with double $$ and check if they are present
-    block_equations = re.findall(r'\$\$(.+?)\$\$', result.text_content)
+    block_equations = re.findall(r"\$\$(.+?)\$\$", result.text_content)
    assert block_equations, "No block equations found in the document."
 def test_input_as_strings() -> None:
    markitdown = MarkItDown()