refactor: reformatted with black

This commit is contained in:
Sathindu Ganhala Arachchige 2025-03-28 16:24:27 -04:00
parent 6a66b275bb
commit 799a1caf97
4 changed files with 559 additions and 519 deletions

View file

@ -7,269 +7,267 @@ On 25/03/2025
from __future__ import unicode_literals from __future__ import unicode_literals
CHARS = ('{','}', '_', '^', '#', '&', '$', '%', '~') CHARS = ("{", "}", "_", "^", "#", "&", "$", "%", "~")
BLANK = '' BLANK = ""
BACKSLASH = '\\' BACKSLASH = "\\"
ALN = '&' ALN = "&"
CHR = { CHR = {
# Unicode : Latex Math Symbols # Unicode : Latex Math Symbols
# Top accents # Top accents
'\u0300' : '\\grave{{{0}}}', "\u0300": "\\grave{{{0}}}",
'\u0301' : '\\acute{{{0}}}', "\u0301": "\\acute{{{0}}}",
'\u0302' : '\\hat{{{0}}}', "\u0302": "\\hat{{{0}}}",
'\u0303' : '\\tilde{{{0}}}', "\u0303": "\\tilde{{{0}}}",
'\u0304' : '\\bar{{{0}}}', "\u0304": "\\bar{{{0}}}",
'\u0305' : '\\overbar{{{0}}}', "\u0305": "\\overbar{{{0}}}",
'\u0306' : '\\breve{{{0}}}', "\u0306": "\\breve{{{0}}}",
'\u0307' : '\\dot{{{0}}}', "\u0307": "\\dot{{{0}}}",
'\u0308' : '\\ddot{{{0}}}', "\u0308": "\\ddot{{{0}}}",
'\u0309' : '\\ovhook{{{0}}}', "\u0309": "\\ovhook{{{0}}}",
'\u030a' : '\\ocirc{{{0}}}}', "\u030a": "\\ocirc{{{0}}}}",
'\u030c' : '\\check{{{0}}}}', "\u030c": "\\check{{{0}}}}",
'\u0310' : '\\candra{{{0}}}', "\u0310": "\\candra{{{0}}}",
'\u0312' : '\\oturnedcomma{{{0}}}', "\u0312": "\\oturnedcomma{{{0}}}",
'\u0315' : '\\ocommatopright{{{0}}}', "\u0315": "\\ocommatopright{{{0}}}",
'\u031a' : '\\droang{{{0}}}', "\u031a": "\\droang{{{0}}}",
'\u0338' : '\\not{{{0}}}', "\u0338": "\\not{{{0}}}",
'\u20d0' : '\\leftharpoonaccent{{{0}}}', "\u20d0": "\\leftharpoonaccent{{{0}}}",
'\u20d1' : '\\rightharpoonaccent{{{0}}}', "\u20d1": "\\rightharpoonaccent{{{0}}}",
'\u20d2' : '\\vertoverlay{{{0}}}', "\u20d2": "\\vertoverlay{{{0}}}",
'\u20d6' : '\\overleftarrow{{{0}}}', "\u20d6": "\\overleftarrow{{{0}}}",
'\u20d7' : '\\vec{{{0}}}', "\u20d7": "\\vec{{{0}}}",
'\u20db' : '\\dddot{{{0}}}', "\u20db": "\\dddot{{{0}}}",
'\u20dc' : '\\ddddot{{{0}}}', "\u20dc": "\\ddddot{{{0}}}",
'\u20e1' : '\\overleftrightarrow{{{0}}}', "\u20e1": "\\overleftrightarrow{{{0}}}",
'\u20e7' : '\\annuity{{{0}}}', "\u20e7": "\\annuity{{{0}}}",
'\u20e9' : '\\widebridgeabove{{{0}}}', "\u20e9": "\\widebridgeabove{{{0}}}",
'\u20f0' : '\\asteraccent{{{0}}}', "\u20f0": "\\asteraccent{{{0}}}",
# Bottom accents # Bottom accents
'\u0330' : '\\wideutilde{{{0}}}', "\u0330": "\\wideutilde{{{0}}}",
'\u0331' : '\\underbar{{{0}}}', "\u0331": "\\underbar{{{0}}}",
'\u20e8' : '\\threeunderdot{{{0}}}', "\u20e8": "\\threeunderdot{{{0}}}",
'\u20ec' : '\\underrightharpoondown{{{0}}}', "\u20ec": "\\underrightharpoondown{{{0}}}",
'\u20ed' : '\\underleftharpoondown{{{0}}}', "\u20ed": "\\underleftharpoondown{{{0}}}",
'\u20ee' : '\\underledtarrow{{{0}}}', "\u20ee": "\\underledtarrow{{{0}}}",
'\u20ef' : '\\underrightarrow{{{0}}}', "\u20ef": "\\underrightarrow{{{0}}}",
# Over | group # Over | group
'\u23b4' : '\\overbracket{{{0}}}', "\u23b4": "\\overbracket{{{0}}}",
'\u23dc' : '\\overparen{{{0}}}', "\u23dc": "\\overparen{{{0}}}",
'\u23de' : '\\overbrace{{{0}}}', "\u23de": "\\overbrace{{{0}}}",
# Under| group # Under| group
'\u23b5' : '\\underbracket{{{0}}}', "\u23b5": "\\underbracket{{{0}}}",
'\u23dd' : '\\underparen{{{0}}}', "\u23dd": "\\underparen{{{0}}}",
'\u23df' : '\\underbrace{{{0}}}', "\u23df": "\\underbrace{{{0}}}",
} }
CHR_BO = { CHR_BO = {
# Big operators, # Big operators,
'\u2140' : '\\Bbbsum', "\u2140": "\\Bbbsum",
'\u220f' : '\\prod', "\u220f": "\\prod",
'\u2210' : '\\coprod', "\u2210": "\\coprod",
'\u2211' : '\\sum', "\u2211": "\\sum",
'\u222b' : '\\int', "\u222b": "\\int",
'\u22c0' : '\\bigwedge', "\u22c0": "\\bigwedge",
'\u22c1' : '\\bigvee', "\u22c1": "\\bigvee",
'\u22c2' : '\\bigcap', "\u22c2": "\\bigcap",
'\u22c3' : '\\bigcup', "\u22c3": "\\bigcup",
'\u2a00' : '\\bigodot', "\u2a00": "\\bigodot",
'\u2a01' : '\\bigoplus', "\u2a01": "\\bigoplus",
'\u2a02' : '\\bigotimes', "\u2a02": "\\bigotimes",
} }
T = { T = {
"\u2192": "\\rightarrow ",
'\u2192' : '\\rightarrow ',
# Greek letters # Greek letters
'\U0001d6fc' : '\\alpha ', "\U0001d6fc": "\\alpha ",
'\U0001d6fd' : '\\beta ', "\U0001d6fd": "\\beta ",
'\U0001d6fe' : '\\gamma ', "\U0001d6fe": "\\gamma ",
'\U0001d6ff' : '\\theta ', "\U0001d6ff": "\\theta ",
'\U0001d700' : '\\epsilon ', "\U0001d700": "\\epsilon ",
'\U0001d701' : '\\zeta ', "\U0001d701": "\\zeta ",
'\U0001d702' : '\\eta ', "\U0001d702": "\\eta ",
'\U0001d703' : '\\theta ', "\U0001d703": "\\theta ",
'\U0001d704' : '\\iota ', "\U0001d704": "\\iota ",
'\U0001d705' : '\\kappa ', "\U0001d705": "\\kappa ",
'\U0001d706' : '\\lambda ', "\U0001d706": "\\lambda ",
'\U0001d707' : '\\m ', "\U0001d707": "\\m ",
'\U0001d708' : '\\n ', "\U0001d708": "\\n ",
'\U0001d709' : '\\xi ', "\U0001d709": "\\xi ",
'\U0001d70a' : '\\omicron ', "\U0001d70a": "\\omicron ",
'\U0001d70b' : '\\pi ', "\U0001d70b": "\\pi ",
'\U0001d70c' : '\\rho ', "\U0001d70c": "\\rho ",
'\U0001d70d' : '\\varsigma ', "\U0001d70d": "\\varsigma ",
'\U0001d70e' : '\\sigma ', "\U0001d70e": "\\sigma ",
'\U0001d70f' : '\\ta ', "\U0001d70f": "\\ta ",
'\U0001d710' : '\\upsilon ', "\U0001d710": "\\upsilon ",
'\U0001d711' : '\\phi ', "\U0001d711": "\\phi ",
'\U0001d712' : '\\chi ', "\U0001d712": "\\chi ",
'\U0001d713' : '\\psi ', "\U0001d713": "\\psi ",
'\U0001d714' : '\\omega ', "\U0001d714": "\\omega ",
'\U0001d715' : '\\partial ', "\U0001d715": "\\partial ",
'\U0001d716' : '\\varepsilon ', "\U0001d716": "\\varepsilon ",
'\U0001d717' : '\\vartheta ', "\U0001d717": "\\vartheta ",
'\U0001d718' : '\\varkappa ', "\U0001d718": "\\varkappa ",
'\U0001d719' : '\\varphi ', "\U0001d719": "\\varphi ",
'\U0001d71a' : '\\varrho ', "\U0001d71a": "\\varrho ",
'\U0001d71b' : '\\varpi ', "\U0001d71b": "\\varpi ",
# Relation symbols # Relation symbols
'\u2190' : '\\leftarrow ', "\u2190": "\\leftarrow ",
'\u2191' : '\\uparrow ', "\u2191": "\\uparrow ",
'\u2192' : '\\rightarrow ', "\u2192": "\\rightarrow ",
'\u2193' : '\\downright ', "\u2193": "\\downright ",
'\u2194' : '\\leftrightarrow ', "\u2194": "\\leftrightarrow ",
'\u2195' : '\\updownarrow ', "\u2195": "\\updownarrow ",
'\u2196' : '\\nwarrow ', "\u2196": "\\nwarrow ",
'\u2197' : '\\nearrow ', "\u2197": "\\nearrow ",
'\u2198' : '\\searrow ', "\u2198": "\\searrow ",
'\u2199' : '\\swarrow ', "\u2199": "\\swarrow ",
'\u22ee' : '\\vdots ', "\u22ee": "\\vdots ",
'\u22ef' : '\\cdots ', "\u22ef": "\\cdots ",
'\u22f0' : '\\adots ', "\u22f0": "\\adots ",
'\u22f1' : '\\ddots ', "\u22f1": "\\ddots ",
'\u2260' : '\\ne ', "\u2260": "\\ne ",
'\u2264' : '\\leq ', "\u2264": "\\leq ",
'\u2265' : '\\geq ', "\u2265": "\\geq ",
'\u2266' : '\\leqq ', "\u2266": "\\leqq ",
'\u2267' : '\\geqq ', "\u2267": "\\geqq ",
'\u2268' : '\\lneqq ', "\u2268": "\\lneqq ",
'\u2269' : '\\gneqq ', "\u2269": "\\gneqq ",
'\u226a' : '\\ll ', "\u226a": "\\ll ",
'\u226b' : '\\gg ', "\u226b": "\\gg ",
'\u2208' : '\\in ', "\u2208": "\\in ",
'\u2209' : '\\notin ', "\u2209": "\\notin ",
'\u220b' : '\\ni ', "\u220b": "\\ni ",
'\u220c' : '\\nni ', "\u220c": "\\nni ",
# Ordinary symbols # Ordinary symbols
'\u221e' : '\\infty ', "\u221e": "\\infty ",
# Binary relations # Binary relations
'\u00b1' : '\\pm ', "\u00b1": "\\pm ",
'\u2213' : '\\mp ', "\u2213": "\\mp ",
# Italic, Latin, uppercase # Italic, Latin, uppercase
'\U0001d434' : 'A', "\U0001d434": "A",
'\U0001d435' : 'B', "\U0001d435": "B",
'\U0001d436' : 'C', "\U0001d436": "C",
'\U0001d437' : 'D', "\U0001d437": "D",
'\U0001d438' : 'E', "\U0001d438": "E",
'\U0001d439' : 'F', "\U0001d439": "F",
'\U0001d43a' : 'G', "\U0001d43a": "G",
'\U0001d43b' : 'H', "\U0001d43b": "H",
'\U0001d43c' : 'I', "\U0001d43c": "I",
'\U0001d43d' : 'J', "\U0001d43d": "J",
'\U0001d43e' : 'K', "\U0001d43e": "K",
'\U0001d43f' : 'L', "\U0001d43f": "L",
'\U0001d440' : 'M', "\U0001d440": "M",
'\U0001d441' : 'N', "\U0001d441": "N",
'\U0001d442' : 'O', "\U0001d442": "O",
'\U0001d443' : 'P', "\U0001d443": "P",
'\U0001d444' : 'Q', "\U0001d444": "Q",
'\U0001d445' : 'R', "\U0001d445": "R",
'\U0001d446' : 'S', "\U0001d446": "S",
'\U0001d447' : 'T', "\U0001d447": "T",
'\U0001d448' : 'U', "\U0001d448": "U",
'\U0001d449' : 'V', "\U0001d449": "V",
'\U0001d44a' : 'W', "\U0001d44a": "W",
'\U0001d44b' : 'X', "\U0001d44b": "X",
'\U0001d44c' : 'Y', "\U0001d44c": "Y",
'\U0001d44d' : 'Z', "\U0001d44d": "Z",
# Italic, Latin, lowercase # Italic, Latin, lowercase
'\U0001d44e' : 'a', "\U0001d44e": "a",
'\U0001d44f' : 'b', "\U0001d44f": "b",
'\U0001d450' : 'c', "\U0001d450": "c",
'\U0001d451' : 'd', "\U0001d451": "d",
'\U0001d452' : 'e', "\U0001d452": "e",
'\U0001d453' : 'f', "\U0001d453": "f",
'\U0001d454' : 'g', "\U0001d454": "g",
'\U0001d456' : 'i', "\U0001d456": "i",
'\U0001d457' : 'j', "\U0001d457": "j",
'\U0001d458' : 'k', "\U0001d458": "k",
'\U0001d459' : 'l', "\U0001d459": "l",
'\U0001d45a' : 'm', "\U0001d45a": "m",
'\U0001d45b' : 'n', "\U0001d45b": "n",
'\U0001d45c' : 'o', "\U0001d45c": "o",
'\U0001d45d' : 'p', "\U0001d45d": "p",
'\U0001d45e' : 'q', "\U0001d45e": "q",
'\U0001d45f' : 'r', "\U0001d45f": "r",
'\U0001d460' : 's', "\U0001d460": "s",
'\U0001d461' : 't', "\U0001d461": "t",
'\U0001d462' : 'u', "\U0001d462": "u",
'\U0001d463' : 'v', "\U0001d463": "v",
'\U0001d464' : 'w', "\U0001d464": "w",
'\U0001d465' : 'x', "\U0001d465": "x",
'\U0001d466' : 'y', "\U0001d466": "y",
'\U0001d467' : 'z', "\U0001d467": "z",
} }
FUNC = { FUNC = {
'sin' : '\\sin({fe})', "sin": "\\sin({fe})",
'cos' : '\\cos({fe})', "cos": "\\cos({fe})",
'tan' : '\\tan({fe})', "tan": "\\tan({fe})",
'arcsin' : '\\arcsin({fe})', "arcsin": "\\arcsin({fe})",
'arccos' : '\\arccos({fe})', "arccos": "\\arccos({fe})",
'arctan' : '\\arctan({fe})', "arctan": "\\arctan({fe})",
'arccot' : '\\arccot({fe})', "arccot": "\\arccot({fe})",
'sinh' : '\\sinh({fe})', "sinh": "\\sinh({fe})",
'cosh' : '\\cosh({fe})', "cosh": "\\cosh({fe})",
'tanh' : '\\tanh({fe})', "tanh": "\\tanh({fe})",
'coth' : '\\coth({fe})', "coth": "\\coth({fe})",
'sec' : '\\sec({fe})', "sec": "\\sec({fe})",
'csc' : '\\csc({fe})', "csc": "\\csc({fe})",
} }
FUNC_PLACE = '{fe}' FUNC_PLACE = "{fe}"
BRK = '\\\\' BRK = "\\\\"
CHR_DEFAULT = { CHR_DEFAULT = {
'ACC_VAL':'\\hat{{{0}}}', "ACC_VAL": "\\hat{{{0}}}",
} }
POS = { POS = {
'top' : '\\overline{{{0}}}', # not sure "top": "\\overline{{{0}}}", # not sure
'bot' : '\\underline{{{0}}}', "bot": "\\underline{{{0}}}",
} }
POS_DEFAULT = { POS_DEFAULT = {
'BAR_VAL': '\\overline{{{0}}}', "BAR_VAL": "\\overline{{{0}}}",
} }
SUB = '_{{{0}}}' SUB = "_{{{0}}}"
SUP = '^{{{0}}}' SUP = "^{{{0}}}"
F = { F = {
'bar': '\\frac{{{num}}}{{{den}}}', "bar": "\\frac{{{num}}}{{{den}}}",
'skw': r'^{{{num}}}/_{{{den}}}', "skw": r"^{{{num}}}/_{{{den}}}",
'noBar': '\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}', "noBar": "\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}",
'lin' : '{{{num}}}/{{{den}}}', "lin": "{{{num}}}/{{{den}}}",
} }
F_DEFAULT = '\\frac{{{num}}}{{{den}}}' F_DEFAULT = "\\frac{{{num}}}{{{den}}}"
D = '\\left{left}{text}\\right{right}' D = "\\left{left}{text}\\right{right}"
D_DEFAULT = { D_DEFAULT = {
'left':'(', "left": "(",
'right':')', "right": ")",
'null':'.', "null": ".",
} }
RAD = '\\sqrt[{deg}]{{{text}}}' RAD = "\\sqrt[{deg}]{{{text}}}"
RAD_DEFAULT = '\\sqrt{{{text}}}' RAD_DEFAULT = "\\sqrt{{{text}}}"
ARR = '\\begin{{array}}{{c}}{text}\end{{array}}' ARR = "\\begin{{array}}{{c}}{text}\end{{array}}"
LIM_FUNC = { LIM_FUNC = {
'lim':'\\lim_{{{lim}}}', "lim": "\\lim_{{{lim}}}",
'max':'\\max_{{{lim}}}', "max": "\\max_{{{lim}}}",
'min':'\\min_{{{lim}}}', "min": "\\min_{{{lim}}}",
} }
LIM_TO = ('\\rightarrow','\\to') LIM_TO = ("\\rightarrow", "\\to")
LIM_UPP = '\\overset{{{lim}}}{{{text}}}' LIM_UPP = "\\overset{{{lim}}}{{{text}}}"
M = '\\begin{{matrix}}{text}\end{{matrix}}' M = "\\begin{{matrix}}{text}\end{{matrix}}"

View file

@ -8,27 +8,54 @@ On 25/03/2025
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from .latex_dict import (CHARS, CHR, CHR_BO, CHR_DEFAULT, POS, POS_DEFAULT from .latex_dict import (
, SUB, SUP, F, F_DEFAULT, T, FUNC, D, D_DEFAULT, RAD, RAD_DEFAULT, ARR CHARS,
, LIM_FUNC, LIM_TO, LIM_UPP, M, BRK, BLANK, BACKSLASH, ALN, FUNC_PLACE) CHR,
CHR_BO,
CHR_DEFAULT,
POS,
POS_DEFAULT,
SUB,
SUP,
F,
F_DEFAULT,
T,
FUNC,
D,
D_DEFAULT,
RAD,
RAD_DEFAULT,
ARR,
LIM_FUNC,
LIM_TO,
LIM_UPP,
M,
BRK,
BLANK,
BACKSLASH,
ALN,
FUNC_PLACE,
)
OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}" OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}"
def load(stream): def load(stream):
tree = ET.parse(stream) tree = ET.parse(stream)
for omath in tree.findall(OMML_NS+'oMath'): for omath in tree.findall(OMML_NS + "oMath"):
yield oMath2Latex(omath) yield oMath2Latex(omath)
def load_string(string): def load_string(string):
root = ET.fromstring(string) root = ET.fromstring(string)
for omath in root.findall(OMML_NS+'oMath'): for omath in root.findall(OMML_NS + "oMath"):
yield oMath2Latex(omath) yield oMath2Latex(omath)
def escape_latex(strs): def escape_latex(strs):
last = None last = None
new_chr = [] new_chr = []
strs = strs.replace(r'\\','\\') strs = strs.replace(r"\\", "\\")
for c in strs: for c in strs:
if (c in CHARS) and (last != BACKSLASH): if (c in CHARS) and (last != BACKSLASH):
new_chr.append(BACKSLASH + c) new_chr.append(BACKSLASH + c)
@ -37,6 +64,7 @@ def escape_latex(strs):
last = c last = c
return BLANK.join(new_chr) return BLANK.join(new_chr)
def get_val(key, default=None, store=CHR): def get_val(key, default=None, store=CHR):
if key is not None: if key is not None:
return key if not store else store.get(key, key) return key if not store else store.get(key, key)
@ -49,7 +77,7 @@ class Tag2Method(object):
def call_method(self, elm, stag=None): def call_method(self, elm, stag=None):
getmethod = self.tag2meth.get getmethod = self.tag2meth.get
if stag is None: if stag is None:
stag = elm.tag.replace(OMML_NS,'') stag = elm.tag.replace(OMML_NS, "")
method = getmethod(stag) method = getmethod(stag)
if method: if method:
return method(self, elm) return method(self, elm)
@ -61,9 +89,9 @@ class Tag2Method(object):
process children of the elm,return iterable process children of the elm,return iterable
""" """
for _e in list(elm): for _e in list(elm):
if (OMML_NS not in _e.tag): if OMML_NS not in _e.tag:
continue continue
stag = _e.tag.replace(OMML_NS,'') stag = _e.tag.replace(OMML_NS, "")
if include and (stag not in include): if include and (stag not in include):
continue continue
t = self.call_method(_e, stag=stag) t = self.call_method(_e, stag=stag)
@ -86,8 +114,12 @@ class Tag2Method(object):
""" """
process children of the elm,return string process children of the elm,return string
""" """
return BLANK.join(( t if not isinstance(t,Tag2Method) else str(t) return BLANK.join(
for stag,t,e in self.process_children_list(elm,include))) (
t if not isinstance(t, Tag2Method) else str(t)
for stag, t, e in self.process_children_list(elm, include)
)
)
def process_unknow(self, elm, stag): def process_unknow(self, elm, stag):
return None return None
@ -95,13 +127,14 @@ class Tag2Method(object):
class Pr(Tag2Method): class Pr(Tag2Method):
text = '' text = ""
__val_tags = ('chr','pos','begChr','endChr','type') __val_tags = ("chr", "pos", "begChr", "endChr", "type")
__innerdict = None # can't use the __dict__ __innerdict = None # can't use the __dict__
""" common properties of element""" """ common properties of element"""
def __init__(self, elm): def __init__(self, elm):
self.__innerdict = {} self.__innerdict = {}
self.text = self.process_children(elm) self.text = self.process_children(elm)
@ -116,23 +149,23 @@ class Pr(Tag2Method):
return self.__innerdict.get(name, None) return self.__innerdict.get(name, None)
def do_brk(self, elm): def do_brk(self, elm):
self.__innerdict['brk'] = BRK self.__innerdict["brk"] = BRK
return BRK return BRK
def do_common(self, elm): def do_common(self, elm):
stag = elm.tag.replace(OMML_NS,'') stag = elm.tag.replace(OMML_NS, "")
if stag in self.__val_tags: if stag in self.__val_tags:
t = elm.get('{0}val'.format(OMML_NS)) t = elm.get("{0}val".format(OMML_NS))
self.__innerdict[stag] = t self.__innerdict[stag] = t
return None return None
tag2meth = { tag2meth = {
'brk':do_brk, "brk": do_brk,
'chr':do_common, "chr": do_common,
'pos':do_common, "pos": do_common,
'begChr':do_common, "begChr": do_common,
'endChr':do_common, "endChr": do_common,
'type':do_common, "type": do_common,
} }
@ -140,9 +173,10 @@ class oMath2Latex(Tag2Method):
""" """
Convert oMath element of omml to latex Convert oMath element of omml to latex
""" """
_t_dict = T _t_dict = T
__direct_tags = ('box','sSub','sSup','sSubSup','num','den','deg','e') __direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e")
def __init__(self, element): def __init__(self, element):
self._latex = self.process_children(element) self._latex = self.process_children(element)
@ -156,7 +190,7 @@ class oMath2Latex(Tag2Method):
def process_unknow(self, elm, stag): def process_unknow(self, elm, stag):
if stag in self.__direct_tags: if stag in self.__direct_tags:
return self.process_children(elm) return self.process_children(elm)
elif stag[-2:] == 'Pr': elif stag[-2:] == "Pr":
return Pr(elm) return Pr(elm)
else: else:
return None return None
@ -170,31 +204,34 @@ class oMath2Latex(Tag2Method):
the accent function the accent function
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
latex_s = get_val(c_dict['accPr'].chr,default=CHR_DEFAULT.get('ACC_VAL'),store=CHR) latex_s = get_val(
return latex_s.format(c_dict['e']) c_dict["accPr"].chr, default=CHR_DEFAULT.get("ACC_VAL"), store=CHR
)
return latex_s.format(c_dict["e"])
def do_bar(self, elm): def do_bar(self, elm):
""" """
the bar function the bar function
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['barPr'] pr = c_dict["barPr"]
latex_s = get_val(pr.pos,default=POS_DEFAULT.get('BAR_VAL'),store=POS) latex_s = get_val(pr.pos, default=POS_DEFAULT.get("BAR_VAL"), store=POS)
return pr.text+latex_s.format(c_dict['e']) return pr.text + latex_s.format(c_dict["e"])
def do_d(self, elm): def do_d(self, elm):
""" """
the delimiter object the delimiter object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['dPr'] pr = c_dict["dPr"]
null = D_DEFAULT.get('null') null = D_DEFAULT.get("null")
s_val = get_val(pr.begChr,default=D_DEFAULT.get('left'),store=T) s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T)
e_val = get_val(pr.endChr,default=D_DEFAULT.get('right'),store=T) e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T)
return pr.text+D.format(left= null if not s_val else escape_latex(s_val), return pr.text + D.format(
text=c_dict['e'], left=null if not s_val else escape_latex(s_val),
right= null if not e_val else escape_latex(e_val)) text=c_dict["e"],
right=null if not e_val else escape_latex(e_val),
)
def do_spre(self, elm): def do_spre(self, elm):
""" """
@ -215,17 +252,17 @@ class oMath2Latex(Tag2Method):
the fraction object the fraction object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['fPr'] pr = c_dict["fPr"]
latex_s = get_val(pr.type, default=F_DEFAULT, store=F) latex_s = get_val(pr.type, default=F_DEFAULT, store=F)
return pr.text+latex_s.format(num=c_dict.get('num'),den=c_dict.get('den')) return pr.text + latex_s.format(num=c_dict.get("num"), den=c_dict.get("den"))
def do_func(self, elm): def do_func(self, elm):
""" """
the Function-Apply object (Examples:sin cos) the Function-Apply object (Examples:sin cos)
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
func_name = c_dict.get('fName') func_name = c_dict.get("fName")
return func_name.replace(FUNC_PLACE,c_dict.get('e')) return func_name.replace(FUNC_PLACE, c_dict.get("e"))
def do_fname(self, elm): def do_fname(self, elm):
""" """
@ -233,7 +270,7 @@ class oMath2Latex(Tag2Method):
""" """
latex_chars = [] latex_chars = []
for stag, t, e in self.process_children_list(elm): for stag, t, e in self.process_children_list(elm):
if stag == 'r': if stag == "r":
if FUNC.get(t): if FUNC.get(t):
latex_chars.append(FUNC[t]) latex_chars.append(FUNC[t])
else: else:
@ -248,17 +285,17 @@ class oMath2Latex(Tag2Method):
the Group-Character object the Group-Character object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['groupChrPr'] pr = c_dict["groupChrPr"]
latex_s = get_val(pr.chr) latex_s = get_val(pr.chr)
return pr.text+latex_s.format(c_dict['e']) return pr.text + latex_s.format(c_dict["e"])
def do_rad(self, elm): def do_rad(self, elm):
""" """
the radical object the radical object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
text = c_dict.get('e') text = c_dict.get("e")
deg_text = c_dict.get('deg') deg_text = c_dict.get("deg")
if deg_text: if deg_text:
return RAD.format(deg=deg_text, text=text) return RAD.format(deg=deg_text, text=text)
else: else:
@ -268,27 +305,29 @@ class oMath2Latex(Tag2Method):
""" """
the Array object the Array object
""" """
return ARR.format(text=BRK.join( return ARR.format(
[t for stag,t,e in self.process_children_list(elm,include=('e',))])) text=BRK.join(
[t for stag, t, e in self.process_children_list(elm, include=("e",))]
)
)
def do_limlow(self, elm): def do_limlow(self, elm):
""" """
the Lower-Limit object the Lower-Limit object
""" """
t_dict = self.process_children_dict(elm,include=('e','lim')) t_dict = self.process_children_dict(elm, include=("e", "lim"))
latex_s = LIM_FUNC.get(t_dict['e']) latex_s = LIM_FUNC.get(t_dict["e"])
if not latex_s: if not latex_s:
raise NotImplemented("Not support lim %s" % t_dict['e']) raise NotImplemented("Not support lim %s" % t_dict["e"])
else: else:
return latex_s.format(lim=t_dict.get('lim')) return latex_s.format(lim=t_dict.get("lim"))
def do_limupp(self, elm): def do_limupp(self, elm):
""" """
the Upper-Limit object the Upper-Limit object
""" """
t_dict = self.process_children_dict(elm,include=('e','lim')) t_dict = self.process_children_dict(elm, include=("e", "lim"))
return LIM_UPP.format(lim=t_dict.get('lim'),text=t_dict.get('e')) return LIM_UPP.format(lim=t_dict.get("lim"), text=t_dict.get("e"))
def do_lim(self, elm): def do_lim(self, elm):
""" """
@ -302,9 +341,9 @@ class oMath2Latex(Tag2Method):
""" """
rows = [] rows = []
for stag, t, e in self.process_children_list(elm): for stag, t, e in self.process_children_list(elm):
if stag is 'mPr': if stag is "mPr":
pass pass
elif stag == 'mr': elif stag == "mr":
rows.append(t) rows.append(t)
return M.format(text=BRK.join(rows)) return M.format(text=BRK.join(rows))
@ -313,16 +352,17 @@ class oMath2Latex(Tag2Method):
a single row of the matrix m a single row of the matrix m
""" """
return ALN.join( return ALN.join(
[t for stag,t,e in self.process_children_list(elm,include=('e',))]) [t for stag, t, e in self.process_children_list(elm, include=("e",))]
)
def do_nary(self, elm): def do_nary(self, elm):
""" """
the n-ary object the n-ary object
""" """
res = [] res = []
bo = '' bo = ""
for stag, t, e in self.process_children_list(elm): for stag, t, e in self.process_children_list(elm):
if stag == 'naryPr': if stag == "naryPr":
bo = get_val(t.chr, store=CHR_BO) bo = get_val(t.chr, store=CHR_BO)
else: else:
res.append(t) res.append(t)
@ -335,28 +375,28 @@ class oMath2Latex(Tag2Method):
@todo \text (latex pure text support) @todo \text (latex pure text support)
""" """
_str = [] _str = []
for s in elm.findtext('./{0}t'.format(OMML_NS)): for s in elm.findtext("./{0}t".format(OMML_NS)):
# s = s if isinstance(s,unicode) else unicode(s,'utf-8') # s = s if isinstance(s,unicode) else unicode(s,'utf-8')
_str.append(self._t_dict.get(s, s)) _str.append(self._t_dict.get(s, s))
return escape_latex(BLANK.join(_str)) return escape_latex(BLANK.join(_str))
tag2meth = { tag2meth = {
'acc' : do_acc, "acc": do_acc,
'r' : do_r, "r": do_r,
'bar' : do_bar, "bar": do_bar,
'sub' : do_sub, "sub": do_sub,
'sup' : do_sup, "sup": do_sup,
'f' : do_f, "f": do_f,
'func': do_func, "func": do_func,
'fName' : do_fname, "fName": do_fname,
'groupChr' : do_groupchr, "groupChr": do_groupchr,
'd' : do_d, "d": do_d,
'rad' : do_rad, "rad": do_rad,
'eqArr' : do_eqarr, "eqArr": do_eqarr,
'limLow' : do_limlow, "limLow": do_limlow,
'limUpp' : do_limupp, "limUpp": do_limupp,
'lim' : do_lim, "lim": do_lim,
'm' : do_m, "m": do_m,
'mr' : do_mr, "mr": do_mr,
'nary' : do_nary, "nary": do_nary,
} }

View file

@ -75,5 +75,6 @@ class DocxConverter(HtmlConverter):
style_map = kwargs.get("style_map", None) style_map = kwargs.get("style_map", None)
pre_process_stream = pre_process_docx(file_stream) pre_process_stream = pre_process_docx(file_stream)
return self._html_converter.convert_string( return self._html_converter.convert_string(
mammoth.convert_to_html(pre_process_stream, style_map=style_map).value, **kwargs mammoth.convert_to_html(pre_process_stream, style_map=style_map).value,
**kwargs,
) )

View file

@ -272,9 +272,10 @@ def test_docx_equations() -> None:
assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found" assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found"
# Find block equations wrapped with double $$ and check if they are present # Find block equations wrapped with double $$ and check if they are present
block_equations = re.findall(r'\$\$(.+?)\$\$', result.text_content) block_equations = re.findall(r"\$\$(.+?)\$\$", result.text_content)
assert block_equations, "No block equations found in the document." assert block_equations, "No block equations found in the document."
def test_input_as_strings() -> None: def test_input_as_strings() -> None:
markitdown = MarkItDown() markitdown = MarkItDown()