refactor: reformatted with black

2025-03-28 16:24:27 -04:00 · 2025-03-28 16:24:27 -04:00 · 799a1caf97
commit 799a1caf97
parent 6a66b275bb
4 changed files with 559 additions and 519 deletions
--- a/packages/markitdown/src/markitdown/converter_utils/docx/math/latex_dict.py
+++ b/packages/markitdown/src/markitdown/converter_utils/docx/math/latex_dict.py
@ -7,269 +7,267 @@ On 25/03/2025

 from __future__ import unicode_literals

-CHARS = ('{','}', '_', '^', '#', '&', '$', '%', '~')
+CHARS = ("{", "}", "_", "^", "#", "&", "$", "%", "~")

-BLANK = ''
-BACKSLASH = '\\'
-ALN = '&'
+BLANK = ""
+BACKSLASH = "\\"
+ALN = "&"

 CHR = {
-	#Unicode : Latex Math Symbols
-	#Top accents
-	'\u0300' : '\\grave{{{0}}}',
-	'\u0301' : '\\acute{{{0}}}',
-	'\u0302' : '\\hat{{{0}}}',
-	'\u0303' : '\\tilde{{{0}}}',
-	'\u0304' : '\\bar{{{0}}}',
-	'\u0305' : '\\overbar{{{0}}}',
-	'\u0306' : '\\breve{{{0}}}',
-	'\u0307' : '\\dot{{{0}}}',
-	'\u0308' : '\\ddot{{{0}}}',
-	'\u0309' : '\\ovhook{{{0}}}',
-	'\u030a' : '\\ocirc{{{0}}}}',
-	'\u030c' : '\\check{{{0}}}}',
-	'\u0310' : '\\candra{{{0}}}',
-	'\u0312' : '\\oturnedcomma{{{0}}}',
-	'\u0315' : '\\ocommatopright{{{0}}}',
-	'\u031a' : '\\droang{{{0}}}',
-	'\u0338' : '\\not{{{0}}}',
-	'\u20d0' : '\\leftharpoonaccent{{{0}}}',
-	'\u20d1' : '\\rightharpoonaccent{{{0}}}',
-	'\u20d2' : '\\vertoverlay{{{0}}}',
-	'\u20d6' : '\\overleftarrow{{{0}}}',
-	'\u20d7' : '\\vec{{{0}}}',
-	'\u20db' : '\\dddot{{{0}}}',
-	'\u20dc' : '\\ddddot{{{0}}}',
-	'\u20e1' : '\\overleftrightarrow{{{0}}}',
-	'\u20e7' : '\\annuity{{{0}}}',
-	'\u20e9' : '\\widebridgeabove{{{0}}}',
-	'\u20f0' : '\\asteraccent{{{0}}}',
-	 #Bottom accents
-	'\u0330' : '\\wideutilde{{{0}}}',
-	'\u0331' : '\\underbar{{{0}}}',
-	'\u20e8' : '\\threeunderdot{{{0}}}',
-	'\u20ec' : '\\underrightharpoondown{{{0}}}',
-	'\u20ed' : '\\underleftharpoondown{{{0}}}',
-	'\u20ee' : '\\underledtarrow{{{0}}}',
-	'\u20ef' : '\\underrightarrow{{{0}}}',
-	#Over | group
-	'\u23b4' : '\\overbracket{{{0}}}',
-	'\u23dc' : '\\overparen{{{0}}}',
-	'\u23de' : '\\overbrace{{{0}}}',
-	#Under| group
-	'\u23b5' : '\\underbracket{{{0}}}',
-	'\u23dd' : '\\underparen{{{0}}}',
-	'\u23df' : '\\underbrace{{{0}}}',
+    # Unicode : Latex Math Symbols
+    # Top accents
+    "\u0300": "\\grave{{{0}}}",
+    "\u0301": "\\acute{{{0}}}",
+    "\u0302": "\\hat{{{0}}}",
+    "\u0303": "\\tilde{{{0}}}",
+    "\u0304": "\\bar{{{0}}}",
+    "\u0305": "\\overbar{{{0}}}",
+    "\u0306": "\\breve{{{0}}}",
+    "\u0307": "\\dot{{{0}}}",
+    "\u0308": "\\ddot{{{0}}}",
+    "\u0309": "\\ovhook{{{0}}}",
+    "\u030a": "\\ocirc{{{0}}}}",
+    "\u030c": "\\check{{{0}}}}",
+    "\u0310": "\\candra{{{0}}}",
+    "\u0312": "\\oturnedcomma{{{0}}}",
+    "\u0315": "\\ocommatopright{{{0}}}",
+    "\u031a": "\\droang{{{0}}}",
+    "\u0338": "\\not{{{0}}}",
+    "\u20d0": "\\leftharpoonaccent{{{0}}}",
+    "\u20d1": "\\rightharpoonaccent{{{0}}}",
+    "\u20d2": "\\vertoverlay{{{0}}}",
+    "\u20d6": "\\overleftarrow{{{0}}}",
+    "\u20d7": "\\vec{{{0}}}",
+    "\u20db": "\\dddot{{{0}}}",
+    "\u20dc": "\\ddddot{{{0}}}",
+    "\u20e1": "\\overleftrightarrow{{{0}}}",
+    "\u20e7": "\\annuity{{{0}}}",
+    "\u20e9": "\\widebridgeabove{{{0}}}",
+    "\u20f0": "\\asteraccent{{{0}}}",
+    # Bottom accents
+    "\u0330": "\\wideutilde{{{0}}}",
+    "\u0331": "\\underbar{{{0}}}",
+    "\u20e8": "\\threeunderdot{{{0}}}",
+    "\u20ec": "\\underrightharpoondown{{{0}}}",
+    "\u20ed": "\\underleftharpoondown{{{0}}}",
+    "\u20ee": "\\underledtarrow{{{0}}}",
+    "\u20ef": "\\underrightarrow{{{0}}}",
+    # Over | group
+    "\u23b4": "\\overbracket{{{0}}}",
+    "\u23dc": "\\overparen{{{0}}}",
+    "\u23de": "\\overbrace{{{0}}}",
+    # Under| group
+    "\u23b5": "\\underbracket{{{0}}}",
+    "\u23dd": "\\underparen{{{0}}}",
+    "\u23df": "\\underbrace{{{0}}}",
 }

 CHR_BO = {
-	#Big operators,
-	'\u2140' : '\\Bbbsum',
-	'\u220f' : '\\prod',
-	'\u2210' : '\\coprod',
-	'\u2211' : '\\sum',
-	'\u222b' : '\\int',
-	'\u22c0' : '\\bigwedge',
-	'\u22c1' : '\\bigvee',
-	'\u22c2' : '\\bigcap',
-	'\u22c3' : '\\bigcup',
-	'\u2a00' : '\\bigodot',
-	'\u2a01' : '\\bigoplus',
-	'\u2a02' : '\\bigotimes',
+    # Big operators,
+    "\u2140": "\\Bbbsum",
+    "\u220f": "\\prod",
+    "\u2210": "\\coprod",
+    "\u2211": "\\sum",
+    "\u222b": "\\int",
+    "\u22c0": "\\bigwedge",
+    "\u22c1": "\\bigvee",
+    "\u22c2": "\\bigcap",
+    "\u22c3": "\\bigcup",
+    "\u2a00": "\\bigodot",
+    "\u2a01": "\\bigoplus",
+    "\u2a02": "\\bigotimes",
 }

 T = {
-
-	'\u2192' : '\\rightarrow ',
-	#Greek letters
-	'\U0001d6fc' : '\\alpha ',
-	'\U0001d6fd' : '\\beta ',
-	'\U0001d6fe' : '\\gamma ',
-	'\U0001d6ff' : '\\theta ',
-	'\U0001d700' : '\\epsilon ',
-	'\U0001d701' : '\\zeta ',
-	'\U0001d702' : '\\eta ',
-	'\U0001d703' : '\\theta ',
-	'\U0001d704' : '\\iota ',
-	'\U0001d705' : '\\kappa ',
-	'\U0001d706' : '\\lambda ',
-	'\U0001d707' : '\\m ',
-	'\U0001d708' : '\\n ',
-	'\U0001d709' : '\\xi ',
-	'\U0001d70a' : '\\omicron ',
-	'\U0001d70b' : '\\pi ',
-	'\U0001d70c' : '\\rho ',
-	'\U0001d70d' : '\\varsigma ',
-	'\U0001d70e' : '\\sigma ',
-	'\U0001d70f' : '\\ta ',
-	'\U0001d710' : '\\upsilon ',
-	'\U0001d711' : '\\phi ',
-	'\U0001d712' : '\\chi ',
-	'\U0001d713' : '\\psi ',
-	'\U0001d714' : '\\omega ',
-	'\U0001d715' : '\\partial ',
-	'\U0001d716' : '\\varepsilon ',
-	'\U0001d717' : '\\vartheta ',
-	'\U0001d718' : '\\varkappa ',
-	'\U0001d719' : '\\varphi ',
-	'\U0001d71a' : '\\varrho ',
-	'\U0001d71b' : '\\varpi ',
-	#Relation symbols
-	'\u2190' : '\\leftarrow ',
-	'\u2191' : '\\uparrow ',
-	'\u2192' : '\\rightarrow ',
-	'\u2193' : '\\downright ',
-	'\u2194' : '\\leftrightarrow ',
-	'\u2195' : '\\updownarrow ',
-	'\u2196' : '\\nwarrow ',
-	'\u2197' : '\\nearrow ',
-	'\u2198' : '\\searrow ',
-	'\u2199' : '\\swarrow ',
-	'\u22ee' : '\\vdots ',
-	'\u22ef' : '\\cdots ',
-	'\u22f0' : '\\adots ',
-	'\u22f1' : '\\ddots ',
-	'\u2260' : '\\ne ',
-	'\u2264' : '\\leq ',
-	'\u2265' : '\\geq ',
-	'\u2266' : '\\leqq ',
-	'\u2267' : '\\geqq ',
-	'\u2268' : '\\lneqq ',
-	'\u2269' : '\\gneqq ',
-	'\u226a' : '\\ll ',
-	'\u226b' : '\\gg ',
-	'\u2208' : '\\in ',
-	'\u2209' : '\\notin ',
-	'\u220b' : '\\ni ',
-	'\u220c' : '\\nni ',
-
-	#Ordinary symbols
-	'\u221e' : '\\infty ',
-	#Binary relations
-	'\u00b1' : '\\pm ',
-	'\u2213' : '\\mp ',
-	#Italic, Latin, uppercase
-	'\U0001d434' : 'A',
-	'\U0001d435' : 'B',
-	'\U0001d436' : 'C',
-	'\U0001d437' : 'D',
-	'\U0001d438' : 'E',
-	'\U0001d439' : 'F',
-	'\U0001d43a' : 'G',
-	'\U0001d43b' : 'H',
-	'\U0001d43c'  : 'I',
-	'\U0001d43d'  : 'J',
-	'\U0001d43e'  : 'K',
-	'\U0001d43f'   : 'L',
-	'\U0001d440'  : 'M',
-	'\U0001d441'  : 'N',
-	'\U0001d442'  : 'O',
-	'\U0001d443'  : 'P',
-	'\U0001d444'  : 'Q',
-	'\U0001d445'  : 'R',
-	'\U0001d446'  : 'S',
-	'\U0001d447'  : 'T',
-	'\U0001d448'  : 'U',
-	'\U0001d449'  : 'V',
-	'\U0001d44a'  : 'W',
-	'\U0001d44b'  : 'X',
-	'\U0001d44c'  : 'Y',
-	'\U0001d44d'  : 'Z',
-	#Italic, Latin, lowercase
-	'\U0001d44e'  : 'a',
-	'\U0001d44f'   : 'b',
-	'\U0001d450'  : 'c',
-	'\U0001d451'  : 'd',
-	'\U0001d452'  : 'e',
-	'\U0001d453'  : 'f',
-	'\U0001d454'  : 'g',
-	'\U0001d456'  : 'i',
-	'\U0001d457'  : 'j',
-	'\U0001d458'  : 'k',
-	'\U0001d459'  : 'l',
-	'\U0001d45a'  : 'm',
-	'\U0001d45b'  : 'n',
-	'\U0001d45c'   : 'o',
-	'\U0001d45d'  : 'p',
-	'\U0001d45e'  : 'q',
-	'\U0001d45f'   : 'r',
-	'\U0001d460'  : 's',
-	'\U0001d461'  : 't',
-	'\U0001d462'  : 'u',
-	'\U0001d463'  : 'v',
-	'\U0001d464'  : 'w',
-	'\U0001d465'  : 'x',
-	'\U0001d466'  : 'y',
-	'\U0001d467'  : 'z',
+    "\u2192": "\\rightarrow ",
+    # Greek letters
+    "\U0001d6fc": "\\alpha ",
+    "\U0001d6fd": "\\beta ",
+    "\U0001d6fe": "\\gamma ",
+    "\U0001d6ff": "\\theta ",
+    "\U0001d700": "\\epsilon ",
+    "\U0001d701": "\\zeta ",
+    "\U0001d702": "\\eta ",
+    "\U0001d703": "\\theta ",
+    "\U0001d704": "\\iota ",
+    "\U0001d705": "\\kappa ",
+    "\U0001d706": "\\lambda ",
+    "\U0001d707": "\\m ",
+    "\U0001d708": "\\n ",
+    "\U0001d709": "\\xi ",
+    "\U0001d70a": "\\omicron ",
+    "\U0001d70b": "\\pi ",
+    "\U0001d70c": "\\rho ",
+    "\U0001d70d": "\\varsigma ",
+    "\U0001d70e": "\\sigma ",
+    "\U0001d70f": "\\ta ",
+    "\U0001d710": "\\upsilon ",
+    "\U0001d711": "\\phi ",
+    "\U0001d712": "\\chi ",
+    "\U0001d713": "\\psi ",
+    "\U0001d714": "\\omega ",
+    "\U0001d715": "\\partial ",
+    "\U0001d716": "\\varepsilon ",
+    "\U0001d717": "\\vartheta ",
+    "\U0001d718": "\\varkappa ",
+    "\U0001d719": "\\varphi ",
+    "\U0001d71a": "\\varrho ",
+    "\U0001d71b": "\\varpi ",
+    # Relation symbols
+    "\u2190": "\\leftarrow ",
+    "\u2191": "\\uparrow ",
+    "\u2192": "\\rightarrow ",
+    "\u2193": "\\downright ",
+    "\u2194": "\\leftrightarrow ",
+    "\u2195": "\\updownarrow ",
+    "\u2196": "\\nwarrow ",
+    "\u2197": "\\nearrow ",
+    "\u2198": "\\searrow ",
+    "\u2199": "\\swarrow ",
+    "\u22ee": "\\vdots ",
+    "\u22ef": "\\cdots ",
+    "\u22f0": "\\adots ",
+    "\u22f1": "\\ddots ",
+    "\u2260": "\\ne ",
+    "\u2264": "\\leq ",
+    "\u2265": "\\geq ",
+    "\u2266": "\\leqq ",
+    "\u2267": "\\geqq ",
+    "\u2268": "\\lneqq ",
+    "\u2269": "\\gneqq ",
+    "\u226a": "\\ll ",
+    "\u226b": "\\gg ",
+    "\u2208": "\\in ",
+    "\u2209": "\\notin ",
+    "\u220b": "\\ni ",
+    "\u220c": "\\nni ",
+    # Ordinary symbols
+    "\u221e": "\\infty ",
+    # Binary relations
+    "\u00b1": "\\pm ",
+    "\u2213": "\\mp ",
+    # Italic, Latin, uppercase
+    "\U0001d434": "A",
+    "\U0001d435": "B",
+    "\U0001d436": "C",
+    "\U0001d437": "D",
+    "\U0001d438": "E",
+    "\U0001d439": "F",
+    "\U0001d43a": "G",
+    "\U0001d43b": "H",
+    "\U0001d43c": "I",
+    "\U0001d43d": "J",
+    "\U0001d43e": "K",
+    "\U0001d43f": "L",
+    "\U0001d440": "M",
+    "\U0001d441": "N",
+    "\U0001d442": "O",
+    "\U0001d443": "P",
+    "\U0001d444": "Q",
+    "\U0001d445": "R",
+    "\U0001d446": "S",
+    "\U0001d447": "T",
+    "\U0001d448": "U",
+    "\U0001d449": "V",
+    "\U0001d44a": "W",
+    "\U0001d44b": "X",
+    "\U0001d44c": "Y",
+    "\U0001d44d": "Z",
+    # Italic, Latin, lowercase
+    "\U0001d44e": "a",
+    "\U0001d44f": "b",
+    "\U0001d450": "c",
+    "\U0001d451": "d",
+    "\U0001d452": "e",
+    "\U0001d453": "f",
+    "\U0001d454": "g",
+    "\U0001d456": "i",
+    "\U0001d457": "j",
+    "\U0001d458": "k",
+    "\U0001d459": "l",
+    "\U0001d45a": "m",
+    "\U0001d45b": "n",
+    "\U0001d45c": "o",
+    "\U0001d45d": "p",
+    "\U0001d45e": "q",
+    "\U0001d45f": "r",
+    "\U0001d460": "s",
+    "\U0001d461": "t",
+    "\U0001d462": "u",
+    "\U0001d463": "v",
+    "\U0001d464": "w",
+    "\U0001d465": "x",
+    "\U0001d466": "y",
+    "\U0001d467": "z",
 }

-FUNC ={
-	'sin' : '\\sin({fe})',
-	'cos' : '\\cos({fe})',
-	'tan' : '\\tan({fe})',
-	'arcsin' : '\\arcsin({fe})',
-	'arccos' : '\\arccos({fe})',
-	'arctan' : '\\arctan({fe})',
-	'arccot' : '\\arccot({fe})',
-	'sinh' : '\\sinh({fe})',
-	'cosh' : '\\cosh({fe})',
-	'tanh' : '\\tanh({fe})',
-	'coth' : '\\coth({fe})',
-	'sec'  : '\\sec({fe})',
-	'csc'  : '\\csc({fe})',
+FUNC = {
+    "sin": "\\sin({fe})",
+    "cos": "\\cos({fe})",
+    "tan": "\\tan({fe})",
+    "arcsin": "\\arcsin({fe})",
+    "arccos": "\\arccos({fe})",
+    "arctan": "\\arctan({fe})",
+    "arccot": "\\arccot({fe})",
+    "sinh": "\\sinh({fe})",
+    "cosh": "\\cosh({fe})",
+    "tanh": "\\tanh({fe})",
+    "coth": "\\coth({fe})",
+    "sec": "\\sec({fe})",
+    "csc": "\\csc({fe})",
 }

-FUNC_PLACE = '{fe}'
+FUNC_PLACE = "{fe}"

-BRK = '\\\\'
+BRK = "\\\\"

 CHR_DEFAULT = {
-	'ACC_VAL':'\\hat{{{0}}}',
+    "ACC_VAL": "\\hat{{{0}}}",
 }

 POS = {
-	'top' : '\\overline{{{0}}}', # not sure
-	'bot' : '\\underline{{{0}}}',
+    "top": "\\overline{{{0}}}",  # not sure
+    "bot": "\\underline{{{0}}}",
 }

 POS_DEFAULT = {
-	'BAR_VAL': '\\overline{{{0}}}',
+    "BAR_VAL": "\\overline{{{0}}}",
 }

-SUB = '_{{{0}}}'
+SUB = "_{{{0}}}"

-SUP = '^{{{0}}}'
+SUP = "^{{{0}}}"

 F = {
-	'bar': '\\frac{{{num}}}{{{den}}}',
-	'skw': r'^{{{num}}}/_{{{den}}}',
-	'noBar': '\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}',
-	'lin' : '{{{num}}}/{{{den}}}',
+    "bar": "\\frac{{{num}}}{{{den}}}",
+    "skw": r"^{{{num}}}/_{{{den}}}",
+    "noBar": "\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}",
+    "lin": "{{{num}}}/{{{den}}}",
 }
-F_DEFAULT = '\\frac{{{num}}}{{{den}}}'
+F_DEFAULT = "\\frac{{{num}}}{{{den}}}"

-D  = '\\left{left}{text}\\right{right}'
+D = "\\left{left}{text}\\right{right}"

 D_DEFAULT = {
-	'left':'(',
-	'right':')',
-	'null':'.',
+    "left": "(",
+    "right": ")",
+    "null": ".",
 }

-RAD = '\\sqrt[{deg}]{{{text}}}'
+RAD = "\\sqrt[{deg}]{{{text}}}"

-RAD_DEFAULT = '\\sqrt{{{text}}}'
+RAD_DEFAULT = "\\sqrt{{{text}}}"

-ARR = '\\begin{{array}}{{c}}{text}\end{{array}}'
+ARR = "\\begin{{array}}{{c}}{text}\end{{array}}"

 LIM_FUNC = {
-	'lim':'\\lim_{{{lim}}}',
-	'max':'\\max_{{{lim}}}',
-	'min':'\\min_{{{lim}}}',
+    "lim": "\\lim_{{{lim}}}",
+    "max": "\\max_{{{lim}}}",
+    "min": "\\min_{{{lim}}}",
 }

-LIM_TO  = ('\\rightarrow','\\to')
+LIM_TO = ("\\rightarrow", "\\to")

-LIM_UPP = '\\overset{{{lim}}}{{{text}}}'
+LIM_UPP = "\\overset{{{lim}}}{{{text}}}"

-M = '\\begin{{matrix}}{text}\end{{matrix}}'
+M = "\\begin{{matrix}}{text}\end{{matrix}}"
--- a/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
+++ b/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
@ -8,355 +8,395 @@ On 25/03/2025

 import xml.etree.ElementTree as ET

-from .latex_dict import (CHARS, CHR, CHR_BO, CHR_DEFAULT, POS, POS_DEFAULT
-	, SUB, SUP, F, F_DEFAULT, T, FUNC, D, D_DEFAULT, RAD, RAD_DEFAULT, ARR
-	, LIM_FUNC, LIM_TO, LIM_UPP, M, BRK, BLANK, BACKSLASH, ALN, FUNC_PLACE)
+from .latex_dict import (
+    CHARS,
+    CHR,
+    CHR_BO,
+    CHR_DEFAULT,
+    POS,
+    POS_DEFAULT,
+    SUB,
+    SUP,
+    F,
+    F_DEFAULT,
+    T,
+    FUNC,
+    D,
+    D_DEFAULT,
+    RAD,
+    RAD_DEFAULT,
+    ARR,
+    LIM_FUNC,
+    LIM_TO,
+    LIM_UPP,
+    M,
+    BRK,
+    BLANK,
+    BACKSLASH,
+    ALN,
+    FUNC_PLACE,
+)

 OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}"


 def load(stream):
-	tree = ET.parse(stream)
-	for omath in tree.findall(OMML_NS+'oMath'):
-		yield oMath2Latex(omath)
+    tree = ET.parse(stream)
+    for omath in tree.findall(OMML_NS + "oMath"):
+        yield oMath2Latex(omath)
+

 def load_string(string):
-	root = ET.fromstring(string)
-	for omath in root.findall(OMML_NS+'oMath'):
-		yield oMath2Latex(omath)
+    root = ET.fromstring(string)
+    for omath in root.findall(OMML_NS + "oMath"):
+        yield oMath2Latex(omath)
+

 def escape_latex(strs):
-	last = None
-	new_chr = []
-	strs = strs.replace(r'\\','\\')
-	for c in strs :
-		if (c in CHARS) and (last !=BACKSLASH):
-			new_chr.append(BACKSLASH+c)
-		else:
-			new_chr.append(c)
-		last = c
-	return BLANK.join(new_chr)
+    last = None
+    new_chr = []
+    strs = strs.replace(r"\\", "\\")
+    for c in strs:
+        if (c in CHARS) and (last != BACKSLASH):
+            new_chr.append(BACKSLASH + c)
+        else:
+            new_chr.append(c)
+        last = c
+    return BLANK.join(new_chr)

-def get_val(key,default=None,store=CHR):
-	if key is not None:
-		return key if not store else store.get(key,key)
-	else:
-		return default
+
+def get_val(key, default=None, store=CHR):
+    if key is not None:
+        return key if not store else store.get(key, key)
+    else:
+        return default


 class Tag2Method(object):

-	def call_method(self,elm,stag=None):
-		getmethod = self.tag2meth.get
-		if stag is None:
-			stag = elm.tag.replace(OMML_NS,'')
-		method = getmethod(stag)
-		if method:
-			return method(self,elm)
-		else:
-			return None
+    def call_method(self, elm, stag=None):
+        getmethod = self.tag2meth.get
+        if stag is None:
+            stag = elm.tag.replace(OMML_NS, "")
+        method = getmethod(stag)
+        if method:
+            return method(self, elm)
+        else:
+            return None

-	def process_children_list(self,elm,include=None):
-		"""
-		process children of the elm,return iterable
-		"""		
-		for _e in list(elm):
-			if (OMML_NS not in _e.tag):
-				continue
-			stag = _e.tag.replace(OMML_NS,'')			
-			if include and (stag not in include):
-				continue
-			t = self.call_method(_e,stag=stag)
-			if t is None:
-				t = self.process_unknow(_e,stag)
-				if t is None:
-					continue
-			yield (stag,t,_e)
+    def process_children_list(self, elm, include=None):
+        """
+        process children of the elm,return iterable
+        """
+        for _e in list(elm):
+            if OMML_NS not in _e.tag:
+                continue
+            stag = _e.tag.replace(OMML_NS, "")
+            if include and (stag not in include):
+                continue
+            t = self.call_method(_e, stag=stag)
+            if t is None:
+                t = self.process_unknow(_e, stag)
+                if t is None:
+                    continue
+            yield (stag, t, _e)

-	def process_children_dict(self,elm,include=None):
-		"""
-		process children of the elm,return dict
-		"""
-		latex_chars = dict()
-		for stag,t,e in self.process_children_list(elm,include):
-			latex_chars[stag] = t
-		return latex_chars
+    def process_children_dict(self, elm, include=None):
+        """
+        process children of the elm,return dict
+        """
+        latex_chars = dict()
+        for stag, t, e in self.process_children_list(elm, include):
+            latex_chars[stag] = t
+        return latex_chars

-	def process_children(self,elm,include=None):
-		"""
-		process children of the elm,return string
-		"""
-		return BLANK.join(( t if not isinstance(t,Tag2Method) else str(t) 
-			for stag,t,e in self.process_children_list(elm,include)))
+    def process_children(self, elm, include=None):
+        """
+        process children of the elm,return string
+        """
+        return BLANK.join(
+            (
+                t if not isinstance(t, Tag2Method) else str(t)
+                for stag, t, e in self.process_children_list(elm, include)
+            )
+        )

-	def process_unknow(self,elm,stag):
-		return None
+    def process_unknow(self, elm, stag):
+        return None


 class Pr(Tag2Method):

-	text = ''
+    text = ""

-	__val_tags = ('chr','pos','begChr','endChr','type')
+    __val_tags = ("chr", "pos", "begChr", "endChr", "type")

-	__innerdict= None #can't use the __dict__
+    __innerdict = None  # can't use the __dict__

-	""" common properties of element"""
-	def __init__(self, elm):
-		self.__innerdict={}
-		self.text=self.process_children(elm)
+    """ common properties of element"""

-	def __str__(self):
-		return self.text
+    def __init__(self, elm):
+        self.__innerdict = {}
+        self.text = self.process_children(elm)

-	def __unicode__(self):
-		return self.__str__(self)
+    def __str__(self):
+        return self.text

-	def __getattr__(self,name):
-		return self.__innerdict.get(name,None)
+    def __unicode__(self):
+        return self.__str__(self)

-	def do_brk(self,elm):
-		self.__innerdict['brk'] = BRK 
-		return BRK
+    def __getattr__(self, name):
+        return self.__innerdict.get(name, None)

-	def do_common(self,elm):
-		stag = elm.tag.replace(OMML_NS,'')
-		if stag in self.__val_tags:
-			t = elm.get('{0}val'.format(OMML_NS))
-			self.__innerdict[stag] = t
-		return None
+    def do_brk(self, elm):
+        self.__innerdict["brk"] = BRK
+        return BRK

-	tag2meth = {
-		'brk':do_brk,
-		'chr':do_common,
-		'pos':do_common,
-		'begChr':do_common,
-		'endChr':do_common,
-		'type':do_common,
-	}
+    def do_common(self, elm):
+        stag = elm.tag.replace(OMML_NS, "")
+        if stag in self.__val_tags:
+            t = elm.get("{0}val".format(OMML_NS))
+            self.__innerdict[stag] = t
+        return None
+
+    tag2meth = {
+        "brk": do_brk,
+        "chr": do_common,
+        "pos": do_common,
+        "begChr": do_common,
+        "endChr": do_common,
+        "type": do_common,
+    }


 class oMath2Latex(Tag2Method):
-	"""
-	Convert oMath element of omml to latex
-	"""
-	_t_dict = T
+    """
+    Convert oMath element of omml to latex
+    """

-	__direct_tags = ('box','sSub','sSup','sSubSup','num','den','deg','e')
+    _t_dict = T

-	def __init__(self, element):
-		self._latex = self.process_children(element)		
+    __direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e")

-	def __str__(self):
-		return self.latex
+    def __init__(self, element):
+        self._latex = self.process_children(element)

-	def __unicode__(self):
-		return self.__str__(self)
+    def __str__(self):
+        return self.latex

-	def process_unknow(self,elm,stag):			
-		if stag in self.__direct_tags:
-			return self.process_children(elm)
-		elif stag[-2:] == 'Pr':
-			return Pr(elm)
-		else:
-			return None
+    def __unicode__(self):
+        return self.__str__(self)

-	@property
-	def latex(self):
-		return self._latex
+    def process_unknow(self, elm, stag):
+        if stag in self.__direct_tags:
+            return self.process_children(elm)
+        elif stag[-2:] == "Pr":
+            return Pr(elm)
+        else:
+            return None

-	def do_acc(self,elm):
-		"""
-		the accent function
-		"""
-		c_dict = self.process_children_dict(elm)
-		latex_s = get_val(c_dict['accPr'].chr,default=CHR_DEFAULT.get('ACC_VAL'),store=CHR)
-		return latex_s.format(c_dict['e'])		
+    @property
+    def latex(self):
+        return self._latex

-	def do_bar(self,elm):
-		"""
-		the bar function
-		"""
-		c_dict = self.process_children_dict(elm)
-		pr = c_dict['barPr']
-		latex_s = get_val(pr.pos,default=POS_DEFAULT.get('BAR_VAL'),store=POS)
-		return pr.text+latex_s.format(c_dict['e'])
+    def do_acc(self, elm):
+        """
+        the accent function
+        """
+        c_dict = self.process_children_dict(elm)
+        latex_s = get_val(
+            c_dict["accPr"].chr, default=CHR_DEFAULT.get("ACC_VAL"), store=CHR
+        )
+        return latex_s.format(c_dict["e"])

-	def do_d(self,elm):
-		"""
-		the delimiter object
-		"""
-		c_dict = self.process_children_dict(elm)
-		pr = c_dict['dPr']
-		null = D_DEFAULT.get('null')
-		s_val = get_val(pr.begChr,default=D_DEFAULT.get('left'),store=T)
-		e_val = get_val(pr.endChr,default=D_DEFAULT.get('right'),store=T)
-		return pr.text+D.format(left= null if not s_val else escape_latex(s_val),
-					text=c_dict['e'],
-					right= null if not e_val else  escape_latex(e_val))
+    def do_bar(self, elm):
+        """
+        the bar function
+        """
+        c_dict = self.process_children_dict(elm)
+        pr = c_dict["barPr"]
+        latex_s = get_val(pr.pos, default=POS_DEFAULT.get("BAR_VAL"), store=POS)
+        return pr.text + latex_s.format(c_dict["e"])

+    def do_d(self, elm):
+        """
+        the delimiter object
+        """
+        c_dict = self.process_children_dict(elm)
+        pr = c_dict["dPr"]
+        null = D_DEFAULT.get("null")
+        s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T)
+        e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T)
+        return pr.text + D.format(
+            left=null if not s_val else escape_latex(s_val),
+            text=c_dict["e"],
+            right=null if not e_val else escape_latex(e_val),
+        )

-	def do_spre(self,elm):
-		"""
-		the Pre-Sub-Superscript object -- Not support yet
-		"""
-		pass
+    def do_spre(self, elm):
+        """
+        the Pre-Sub-Superscript object -- Not support yet
+        """
+        pass

-	def do_sub(self,elm):
-		text = self.process_children(elm)
-		return SUB.format(text)
+    def do_sub(self, elm):
+        text = self.process_children(elm)
+        return SUB.format(text)

-	def do_sup(self,elm):
-		text = self.process_children(elm)
-		return SUP.format(text)
+    def do_sup(self, elm):
+        text = self.process_children(elm)
+        return SUP.format(text)

-	def do_f(self,elm):
-		"""
-		the fraction object
-		"""
-		c_dict = self.process_children_dict(elm)
-		pr = c_dict['fPr']
-		latex_s = get_val(pr.type,default=F_DEFAULT,store=F)
-		return pr.text+latex_s.format(num=c_dict.get('num'),den=c_dict.get('den'))
+    def do_f(self, elm):
+        """
+        the fraction object
+        """
+        c_dict = self.process_children_dict(elm)
+        pr = c_dict["fPr"]
+        latex_s = get_val(pr.type, default=F_DEFAULT, store=F)
+        return pr.text + latex_s.format(num=c_dict.get("num"), den=c_dict.get("den"))

-	def do_func(self,elm):
-		"""
-		the Function-Apply object (Examples:sin cos)
-		"""
-		c_dict = self.process_children_dict(elm)
-		func_name = c_dict.get('fName')
-		return func_name.replace(FUNC_PLACE,c_dict.get('e'))
+    def do_func(self, elm):
+        """
+        the Function-Apply object (Examples:sin cos)
+        """
+        c_dict = self.process_children_dict(elm)
+        func_name = c_dict.get("fName")
+        return func_name.replace(FUNC_PLACE, c_dict.get("e"))

-	def do_fname(self,elm):
-		"""
-		the func name
-		"""
-		latex_chars = []
-		for stag,t,e in self.process_children_list(elm):
-			if stag == 'r':
-				if FUNC.get(t):
-					latex_chars.append(FUNC[t])
-				else :
-					raise NotImplemented("Not support func %s" % t)
-			else:
-				latex_chars.append(t)
-		t = BLANK.join(latex_chars)
-		return t if FUNC_PLACE in t else t+FUNC_PLACE #do_func will replace this
+    def do_fname(self, elm):
+        """
+        the func name
+        """
+        latex_chars = []
+        for stag, t, e in self.process_children_list(elm):
+            if stag == "r":
+                if FUNC.get(t):
+                    latex_chars.append(FUNC[t])
+                else:
+                    raise NotImplemented("Not support func %s" % t)
+            else:
+                latex_chars.append(t)
+        t = BLANK.join(latex_chars)
+        return t if FUNC_PLACE in t else t + FUNC_PLACE  # do_func will replace this

-	def do_groupchr(self,elm):
-		"""
-		the Group-Character object
-		"""
-		c_dict = self.process_children_dict(elm)
-		pr = c_dict['groupChrPr']
-		latex_s = get_val(pr.chr)
-		return pr.text+latex_s.format(c_dict['e'])
+    def do_groupchr(self, elm):
+        """
+        the Group-Character object
+        """
+        c_dict = self.process_children_dict(elm)
+        pr = c_dict["groupChrPr"]
+        latex_s = get_val(pr.chr)
+        return pr.text + latex_s.format(c_dict["e"])

-	def do_rad(self,elm):
-		"""
-		the radical object
-		"""
-		c_dict = self.process_children_dict(elm)
-		text = c_dict.get('e')
-		deg_text = c_dict.get('deg')
-		if deg_text:
-			return RAD.format(deg=deg_text,text=text)
-		else:
-			return RAD_DEFAULT.format(text=text)
-			
-	def do_eqarr(self,elm):
-		"""
-		the Array object
-		"""
-		return ARR.format(text=BRK.join(
-			[t for stag,t,e in self.process_children_list(elm,include=('e',))]))
+    def do_rad(self, elm):
+        """
+        the radical object
+        """
+        c_dict = self.process_children_dict(elm)
+        text = c_dict.get("e")
+        deg_text = c_dict.get("deg")
+        if deg_text:
+            return RAD.format(deg=deg_text, text=text)
+        else:
+            return RAD_DEFAULT.format(text=text)

+    def do_eqarr(self, elm):
+        """
+        the Array object
+        """
+        return ARR.format(
+            text=BRK.join(
+                [t for stag, t, e in self.process_children_list(elm, include=("e",))]
+            )
+        )

-	def do_limlow(self,elm):
-		"""
-		the Lower-Limit object
-		"""
-		t_dict = self.process_children_dict(elm,include=('e','lim'))
-		latex_s = LIM_FUNC.get(t_dict['e'])
-		if not latex_s :
-			raise NotImplemented("Not support lim %s" % t_dict['e'])
-		else:
-			return latex_s.format(lim=t_dict.get('lim'))
+    def do_limlow(self, elm):
+        """
+        the Lower-Limit object
+        """
+        t_dict = self.process_children_dict(elm, include=("e", "lim"))
+        latex_s = LIM_FUNC.get(t_dict["e"])
+        if not latex_s:
+            raise NotImplemented("Not support lim %s" % t_dict["e"])
+        else:
+            return latex_s.format(lim=t_dict.get("lim"))

-	def do_limupp(self,elm):
-		"""
-		the Upper-Limit object
-		"""
-		t_dict = self.process_children_dict(elm,include=('e','lim'))
-		return LIM_UPP.format(lim=t_dict.get('lim'),text=t_dict.get('e'))
+    def do_limupp(self, elm):
+        """
+        the Upper-Limit object
+        """
+        t_dict = self.process_children_dict(elm, include=("e", "lim"))
+        return LIM_UPP.format(lim=t_dict.get("lim"), text=t_dict.get("e"))

-	def do_lim(self,elm):
-		"""
-		the lower limit of the limLow object and the upper limit of the limUpp function
-		"""
-		return self.process_children(elm).replace(LIM_TO[0],LIM_TO[1])
-	
-	def do_m(self,elm):
-		"""
-		the Matrix object
-		"""
-		rows = []
-		for stag,t,e in self.process_children_list(elm):
-			if stag is 'mPr':
-				pass
-			elif stag == 'mr':
-				rows.append(t)
-		return M.format(text=BRK.join(rows))
+    def do_lim(self, elm):
+        """
+        the lower limit of the limLow object and the upper limit of the limUpp function
+        """
+        return self.process_children(elm).replace(LIM_TO[0], LIM_TO[1])

-	def do_mr(self,elm):
-		"""
-		a single row of the matrix m
-		"""
-		return ALN.join(
-			[t for stag,t,e in self.process_children_list(elm,include=('e',))])
+    def do_m(self, elm):
+        """
+        the Matrix object
+        """
+        rows = []
+        for stag, t, e in self.process_children_list(elm):
+            if stag is "mPr":
+                pass
+            elif stag == "mr":
+                rows.append(t)
+        return M.format(text=BRK.join(rows))

-	def do_nary(self,elm):
-		"""
-		the n-ary object
-		"""
-		res = []
-		bo = ''
-		for stag,t,e in self.process_children_list(elm):
-			if stag == 'naryPr':
-				bo = get_val(t.chr,store=CHR_BO)
-			else :
-				res.append(t)
-		return bo+BLANK.join(res)
+    def do_mr(self, elm):
+        """
+        a single row of the matrix m
+        """
+        return ALN.join(
+            [t for stag, t, e in self.process_children_list(elm, include=("e",))]
+        )

-	def do_r(self,elm):
-		"""
-		Get text from 'r' element,And try convert them to latex symbols
-		@todo text style support , (sty)
-		@todo \text (latex pure text support)
-		"""
-		_str = []
-		for s in elm.findtext('./{0}t'.format(OMML_NS)):
-			#s = s if isinstance(s,unicode) else unicode(s,'utf-8')
-			_str.append(self._t_dict.get(s,s))
-		return escape_latex(BLANK.join(_str))
+    def do_nary(self, elm):
+        """
+        the n-ary object
+        """
+        res = []
+        bo = ""
+        for stag, t, e in self.process_children_list(elm):
+            if stag == "naryPr":
+                bo = get_val(t.chr, store=CHR_BO)
+            else:
+                res.append(t)
+        return bo + BLANK.join(res)

-	tag2meth={
-		'acc' : do_acc,
-		'r' : do_r,
-		'bar' : do_bar,
-		'sub' : do_sub,
-		'sup' : do_sup,
-		'f'   : do_f,
-		'func': do_func,
-		'fName' : do_fname,
-		'groupChr' : do_groupchr,
-		'd' : do_d,
-		'rad' : do_rad,
-		'eqArr' : do_eqarr,
-		'limLow' : do_limlow,
-		'limUpp' : do_limupp,
-		'lim' : do_lim,
-		'm' : do_m,
-		'mr' : do_mr,
-		'nary' : do_nary,
- 	}
+    def do_r(self, elm):
+        """
+        Get text from 'r' element,And try convert them to latex symbols
+        @todo text style support , (sty)
+        @todo \text (latex pure text support)
+        """
+        _str = []
+        for s in elm.findtext("./{0}t".format(OMML_NS)):
+            # s = s if isinstance(s,unicode) else unicode(s,'utf-8')
+            _str.append(self._t_dict.get(s, s))
+        return escape_latex(BLANK.join(_str))
+
+    tag2meth = {
+        "acc": do_acc,
+        "r": do_r,
+        "bar": do_bar,
+        "sub": do_sub,
+        "sup": do_sup,
+        "f": do_f,
+        "func": do_func,
+        "fName": do_fname,
+        "groupChr": do_groupchr,
+        "d": do_d,
+        "rad": do_rad,
+        "eqArr": do_eqarr,
+        "limLow": do_limlow,
+        "limUpp": do_limupp,
+        "lim": do_lim,
+        "m": do_m,
+        "mr": do_mr,
+        "nary": do_nary,
+    }
--- a/packages/markitdown/src/markitdown/converters/_docx_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py
@ -75,5 +75,6 @@ class DocxConverter(HtmlConverter):
        style_map = kwargs.get("style_map", None)
        pre_process_stream = pre_process_docx(file_stream)
        return self._html_converter.convert_string(
-            mammoth.convert_to_html(pre_process_stream, style_map=style_map).value, **kwargs
+            mammoth.convert_to_html(pre_process_stream, style_map=style_map).value,
+            **kwargs,
        )
--- a/packages/markitdown/tests/test_module_misc.py
+++ b/packages/markitdown/tests/test_module_misc.py
@ -272,9 +272,10 @@ def test_docx_equations() -> None:
    assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found"

    # Find block equations wrapped with double $$ and check if they are present
-    block_equations = re.findall(r'\$\$(.+?)\$\$', result.text_content)
+    block_equations = re.findall(r"\$\$(.+?)\$\$", result.text_content)
    assert block_equations, "No block equations found in the document."

+
 def test_input_as_strings() -> None:
    markitdown = MarkItDown()