refactor: reformatted with black

This commit is contained in:
Sathindu Ganhala Arachchige 2025-03-28 16:24:27 -04:00
parent 6a66b275bb
commit 799a1caf97
4 changed files with 559 additions and 519 deletions

View file

@ -7,269 +7,267 @@ On 25/03/2025
from __future__ import unicode_literals from __future__ import unicode_literals
CHARS = ('{','}', '_', '^', '#', '&', '$', '%', '~') CHARS = ("{", "}", "_", "^", "#", "&", "$", "%", "~")
BLANK = '' BLANK = ""
BACKSLASH = '\\' BACKSLASH = "\\"
ALN = '&' ALN = "&"
CHR = { CHR = {
#Unicode : Latex Math Symbols # Unicode : Latex Math Symbols
#Top accents # Top accents
'\u0300' : '\\grave{{{0}}}', "\u0300": "\\grave{{{0}}}",
'\u0301' : '\\acute{{{0}}}', "\u0301": "\\acute{{{0}}}",
'\u0302' : '\\hat{{{0}}}', "\u0302": "\\hat{{{0}}}",
'\u0303' : '\\tilde{{{0}}}', "\u0303": "\\tilde{{{0}}}",
'\u0304' : '\\bar{{{0}}}', "\u0304": "\\bar{{{0}}}",
'\u0305' : '\\overbar{{{0}}}', "\u0305": "\\overbar{{{0}}}",
'\u0306' : '\\breve{{{0}}}', "\u0306": "\\breve{{{0}}}",
'\u0307' : '\\dot{{{0}}}', "\u0307": "\\dot{{{0}}}",
'\u0308' : '\\ddot{{{0}}}', "\u0308": "\\ddot{{{0}}}",
'\u0309' : '\\ovhook{{{0}}}', "\u0309": "\\ovhook{{{0}}}",
'\u030a' : '\\ocirc{{{0}}}}', "\u030a": "\\ocirc{{{0}}}}",
'\u030c' : '\\check{{{0}}}}', "\u030c": "\\check{{{0}}}}",
'\u0310' : '\\candra{{{0}}}', "\u0310": "\\candra{{{0}}}",
'\u0312' : '\\oturnedcomma{{{0}}}', "\u0312": "\\oturnedcomma{{{0}}}",
'\u0315' : '\\ocommatopright{{{0}}}', "\u0315": "\\ocommatopright{{{0}}}",
'\u031a' : '\\droang{{{0}}}', "\u031a": "\\droang{{{0}}}",
'\u0338' : '\\not{{{0}}}', "\u0338": "\\not{{{0}}}",
'\u20d0' : '\\leftharpoonaccent{{{0}}}', "\u20d0": "\\leftharpoonaccent{{{0}}}",
'\u20d1' : '\\rightharpoonaccent{{{0}}}', "\u20d1": "\\rightharpoonaccent{{{0}}}",
'\u20d2' : '\\vertoverlay{{{0}}}', "\u20d2": "\\vertoverlay{{{0}}}",
'\u20d6' : '\\overleftarrow{{{0}}}', "\u20d6": "\\overleftarrow{{{0}}}",
'\u20d7' : '\\vec{{{0}}}', "\u20d7": "\\vec{{{0}}}",
'\u20db' : '\\dddot{{{0}}}', "\u20db": "\\dddot{{{0}}}",
'\u20dc' : '\\ddddot{{{0}}}', "\u20dc": "\\ddddot{{{0}}}",
'\u20e1' : '\\overleftrightarrow{{{0}}}', "\u20e1": "\\overleftrightarrow{{{0}}}",
'\u20e7' : '\\annuity{{{0}}}', "\u20e7": "\\annuity{{{0}}}",
'\u20e9' : '\\widebridgeabove{{{0}}}', "\u20e9": "\\widebridgeabove{{{0}}}",
'\u20f0' : '\\asteraccent{{{0}}}', "\u20f0": "\\asteraccent{{{0}}}",
#Bottom accents # Bottom accents
'\u0330' : '\\wideutilde{{{0}}}', "\u0330": "\\wideutilde{{{0}}}",
'\u0331' : '\\underbar{{{0}}}', "\u0331": "\\underbar{{{0}}}",
'\u20e8' : '\\threeunderdot{{{0}}}', "\u20e8": "\\threeunderdot{{{0}}}",
'\u20ec' : '\\underrightharpoondown{{{0}}}', "\u20ec": "\\underrightharpoondown{{{0}}}",
'\u20ed' : '\\underleftharpoondown{{{0}}}', "\u20ed": "\\underleftharpoondown{{{0}}}",
'\u20ee' : '\\underledtarrow{{{0}}}', "\u20ee": "\\underledtarrow{{{0}}}",
'\u20ef' : '\\underrightarrow{{{0}}}', "\u20ef": "\\underrightarrow{{{0}}}",
#Over | group # Over | group
'\u23b4' : '\\overbracket{{{0}}}', "\u23b4": "\\overbracket{{{0}}}",
'\u23dc' : '\\overparen{{{0}}}', "\u23dc": "\\overparen{{{0}}}",
'\u23de' : '\\overbrace{{{0}}}', "\u23de": "\\overbrace{{{0}}}",
#Under| group # Under| group
'\u23b5' : '\\underbracket{{{0}}}', "\u23b5": "\\underbracket{{{0}}}",
'\u23dd' : '\\underparen{{{0}}}', "\u23dd": "\\underparen{{{0}}}",
'\u23df' : '\\underbrace{{{0}}}', "\u23df": "\\underbrace{{{0}}}",
} }
CHR_BO = { CHR_BO = {
#Big operators, # Big operators,
'\u2140' : '\\Bbbsum', "\u2140": "\\Bbbsum",
'\u220f' : '\\prod', "\u220f": "\\prod",
'\u2210' : '\\coprod', "\u2210": "\\coprod",
'\u2211' : '\\sum', "\u2211": "\\sum",
'\u222b' : '\\int', "\u222b": "\\int",
'\u22c0' : '\\bigwedge', "\u22c0": "\\bigwedge",
'\u22c1' : '\\bigvee', "\u22c1": "\\bigvee",
'\u22c2' : '\\bigcap', "\u22c2": "\\bigcap",
'\u22c3' : '\\bigcup', "\u22c3": "\\bigcup",
'\u2a00' : '\\bigodot', "\u2a00": "\\bigodot",
'\u2a01' : '\\bigoplus', "\u2a01": "\\bigoplus",
'\u2a02' : '\\bigotimes', "\u2a02": "\\bigotimes",
} }
T = { T = {
"\u2192": "\\rightarrow ",
'\u2192' : '\\rightarrow ', # Greek letters
#Greek letters "\U0001d6fc": "\\alpha ",
'\U0001d6fc' : '\\alpha ', "\U0001d6fd": "\\beta ",
'\U0001d6fd' : '\\beta ', "\U0001d6fe": "\\gamma ",
'\U0001d6fe' : '\\gamma ', "\U0001d6ff": "\\theta ",
'\U0001d6ff' : '\\theta ', "\U0001d700": "\\epsilon ",
'\U0001d700' : '\\epsilon ', "\U0001d701": "\\zeta ",
'\U0001d701' : '\\zeta ', "\U0001d702": "\\eta ",
'\U0001d702' : '\\eta ', "\U0001d703": "\\theta ",
'\U0001d703' : '\\theta ', "\U0001d704": "\\iota ",
'\U0001d704' : '\\iota ', "\U0001d705": "\\kappa ",
'\U0001d705' : '\\kappa ', "\U0001d706": "\\lambda ",
'\U0001d706' : '\\lambda ', "\U0001d707": "\\m ",
'\U0001d707' : '\\m ', "\U0001d708": "\\n ",
'\U0001d708' : '\\n ', "\U0001d709": "\\xi ",
'\U0001d709' : '\\xi ', "\U0001d70a": "\\omicron ",
'\U0001d70a' : '\\omicron ', "\U0001d70b": "\\pi ",
'\U0001d70b' : '\\pi ', "\U0001d70c": "\\rho ",
'\U0001d70c' : '\\rho ', "\U0001d70d": "\\varsigma ",
'\U0001d70d' : '\\varsigma ', "\U0001d70e": "\\sigma ",
'\U0001d70e' : '\\sigma ', "\U0001d70f": "\\ta ",
'\U0001d70f' : '\\ta ', "\U0001d710": "\\upsilon ",
'\U0001d710' : '\\upsilon ', "\U0001d711": "\\phi ",
'\U0001d711' : '\\phi ', "\U0001d712": "\\chi ",
'\U0001d712' : '\\chi ', "\U0001d713": "\\psi ",
'\U0001d713' : '\\psi ', "\U0001d714": "\\omega ",
'\U0001d714' : '\\omega ', "\U0001d715": "\\partial ",
'\U0001d715' : '\\partial ', "\U0001d716": "\\varepsilon ",
'\U0001d716' : '\\varepsilon ', "\U0001d717": "\\vartheta ",
'\U0001d717' : '\\vartheta ', "\U0001d718": "\\varkappa ",
'\U0001d718' : '\\varkappa ', "\U0001d719": "\\varphi ",
'\U0001d719' : '\\varphi ', "\U0001d71a": "\\varrho ",
'\U0001d71a' : '\\varrho ', "\U0001d71b": "\\varpi ",
'\U0001d71b' : '\\varpi ', # Relation symbols
#Relation symbols "\u2190": "\\leftarrow ",
'\u2190' : '\\leftarrow ', "\u2191": "\\uparrow ",
'\u2191' : '\\uparrow ', "\u2192": "\\rightarrow ",
'\u2192' : '\\rightarrow ', "\u2193": "\\downright ",
'\u2193' : '\\downright ', "\u2194": "\\leftrightarrow ",
'\u2194' : '\\leftrightarrow ', "\u2195": "\\updownarrow ",
'\u2195' : '\\updownarrow ', "\u2196": "\\nwarrow ",
'\u2196' : '\\nwarrow ', "\u2197": "\\nearrow ",
'\u2197' : '\\nearrow ', "\u2198": "\\searrow ",
'\u2198' : '\\searrow ', "\u2199": "\\swarrow ",
'\u2199' : '\\swarrow ', "\u22ee": "\\vdots ",
'\u22ee' : '\\vdots ', "\u22ef": "\\cdots ",
'\u22ef' : '\\cdots ', "\u22f0": "\\adots ",
'\u22f0' : '\\adots ', "\u22f1": "\\ddots ",
'\u22f1' : '\\ddots ', "\u2260": "\\ne ",
'\u2260' : '\\ne ', "\u2264": "\\leq ",
'\u2264' : '\\leq ', "\u2265": "\\geq ",
'\u2265' : '\\geq ', "\u2266": "\\leqq ",
'\u2266' : '\\leqq ', "\u2267": "\\geqq ",
'\u2267' : '\\geqq ', "\u2268": "\\lneqq ",
'\u2268' : '\\lneqq ', "\u2269": "\\gneqq ",
'\u2269' : '\\gneqq ', "\u226a": "\\ll ",
'\u226a' : '\\ll ', "\u226b": "\\gg ",
'\u226b' : '\\gg ', "\u2208": "\\in ",
'\u2208' : '\\in ', "\u2209": "\\notin ",
'\u2209' : '\\notin ', "\u220b": "\\ni ",
'\u220b' : '\\ni ', "\u220c": "\\nni ",
'\u220c' : '\\nni ', # Ordinary symbols
"\u221e": "\\infty ",
#Ordinary symbols # Binary relations
'\u221e' : '\\infty ', "\u00b1": "\\pm ",
#Binary relations "\u2213": "\\mp ",
'\u00b1' : '\\pm ', # Italic, Latin, uppercase
'\u2213' : '\\mp ', "\U0001d434": "A",
#Italic, Latin, uppercase "\U0001d435": "B",
'\U0001d434' : 'A', "\U0001d436": "C",
'\U0001d435' : 'B', "\U0001d437": "D",
'\U0001d436' : 'C', "\U0001d438": "E",
'\U0001d437' : 'D', "\U0001d439": "F",
'\U0001d438' : 'E', "\U0001d43a": "G",
'\U0001d439' : 'F', "\U0001d43b": "H",
'\U0001d43a' : 'G', "\U0001d43c": "I",
'\U0001d43b' : 'H', "\U0001d43d": "J",
'\U0001d43c' : 'I', "\U0001d43e": "K",
'\U0001d43d' : 'J', "\U0001d43f": "L",
'\U0001d43e' : 'K', "\U0001d440": "M",
'\U0001d43f' : 'L', "\U0001d441": "N",
'\U0001d440' : 'M', "\U0001d442": "O",
'\U0001d441' : 'N', "\U0001d443": "P",
'\U0001d442' : 'O', "\U0001d444": "Q",
'\U0001d443' : 'P', "\U0001d445": "R",
'\U0001d444' : 'Q', "\U0001d446": "S",
'\U0001d445' : 'R', "\U0001d447": "T",
'\U0001d446' : 'S', "\U0001d448": "U",
'\U0001d447' : 'T', "\U0001d449": "V",
'\U0001d448' : 'U', "\U0001d44a": "W",
'\U0001d449' : 'V', "\U0001d44b": "X",
'\U0001d44a' : 'W', "\U0001d44c": "Y",
'\U0001d44b' : 'X', "\U0001d44d": "Z",
'\U0001d44c' : 'Y', # Italic, Latin, lowercase
'\U0001d44d' : 'Z', "\U0001d44e": "a",
#Italic, Latin, lowercase "\U0001d44f": "b",
'\U0001d44e' : 'a', "\U0001d450": "c",
'\U0001d44f' : 'b', "\U0001d451": "d",
'\U0001d450' : 'c', "\U0001d452": "e",
'\U0001d451' : 'd', "\U0001d453": "f",
'\U0001d452' : 'e', "\U0001d454": "g",
'\U0001d453' : 'f', "\U0001d456": "i",
'\U0001d454' : 'g', "\U0001d457": "j",
'\U0001d456' : 'i', "\U0001d458": "k",
'\U0001d457' : 'j', "\U0001d459": "l",
'\U0001d458' : 'k', "\U0001d45a": "m",
'\U0001d459' : 'l', "\U0001d45b": "n",
'\U0001d45a' : 'm', "\U0001d45c": "o",
'\U0001d45b' : 'n', "\U0001d45d": "p",
'\U0001d45c' : 'o', "\U0001d45e": "q",
'\U0001d45d' : 'p', "\U0001d45f": "r",
'\U0001d45e' : 'q', "\U0001d460": "s",
'\U0001d45f' : 'r', "\U0001d461": "t",
'\U0001d460' : 's', "\U0001d462": "u",
'\U0001d461' : 't', "\U0001d463": "v",
'\U0001d462' : 'u', "\U0001d464": "w",
'\U0001d463' : 'v', "\U0001d465": "x",
'\U0001d464' : 'w', "\U0001d466": "y",
'\U0001d465' : 'x', "\U0001d467": "z",
'\U0001d466' : 'y',
'\U0001d467' : 'z',
} }
FUNC ={ FUNC = {
'sin' : '\\sin({fe})', "sin": "\\sin({fe})",
'cos' : '\\cos({fe})', "cos": "\\cos({fe})",
'tan' : '\\tan({fe})', "tan": "\\tan({fe})",
'arcsin' : '\\arcsin({fe})', "arcsin": "\\arcsin({fe})",
'arccos' : '\\arccos({fe})', "arccos": "\\arccos({fe})",
'arctan' : '\\arctan({fe})', "arctan": "\\arctan({fe})",
'arccot' : '\\arccot({fe})', "arccot": "\\arccot({fe})",
'sinh' : '\\sinh({fe})', "sinh": "\\sinh({fe})",
'cosh' : '\\cosh({fe})', "cosh": "\\cosh({fe})",
'tanh' : '\\tanh({fe})', "tanh": "\\tanh({fe})",
'coth' : '\\coth({fe})', "coth": "\\coth({fe})",
'sec' : '\\sec({fe})', "sec": "\\sec({fe})",
'csc' : '\\csc({fe})', "csc": "\\csc({fe})",
} }
FUNC_PLACE = '{fe}' FUNC_PLACE = "{fe}"
BRK = '\\\\' BRK = "\\\\"
CHR_DEFAULT = { CHR_DEFAULT = {
'ACC_VAL':'\\hat{{{0}}}', "ACC_VAL": "\\hat{{{0}}}",
} }
POS = { POS = {
'top' : '\\overline{{{0}}}', # not sure "top": "\\overline{{{0}}}", # not sure
'bot' : '\\underline{{{0}}}', "bot": "\\underline{{{0}}}",
} }
POS_DEFAULT = { POS_DEFAULT = {
'BAR_VAL': '\\overline{{{0}}}', "BAR_VAL": "\\overline{{{0}}}",
} }
SUB = '_{{{0}}}' SUB = "_{{{0}}}"
SUP = '^{{{0}}}' SUP = "^{{{0}}}"
F = { F = {
'bar': '\\frac{{{num}}}{{{den}}}', "bar": "\\frac{{{num}}}{{{den}}}",
'skw': r'^{{{num}}}/_{{{den}}}', "skw": r"^{{{num}}}/_{{{den}}}",
'noBar': '\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}', "noBar": "\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}",
'lin' : '{{{num}}}/{{{den}}}', "lin": "{{{num}}}/{{{den}}}",
} }
F_DEFAULT = '\\frac{{{num}}}{{{den}}}' F_DEFAULT = "\\frac{{{num}}}{{{den}}}"
D = '\\left{left}{text}\\right{right}' D = "\\left{left}{text}\\right{right}"
D_DEFAULT = { D_DEFAULT = {
'left':'(', "left": "(",
'right':')', "right": ")",
'null':'.', "null": ".",
} }
RAD = '\\sqrt[{deg}]{{{text}}}' RAD = "\\sqrt[{deg}]{{{text}}}"
RAD_DEFAULT = '\\sqrt{{{text}}}' RAD_DEFAULT = "\\sqrt{{{text}}}"
ARR = '\\begin{{array}}{{c}}{text}\end{{array}}' ARR = "\\begin{{array}}{{c}}{text}\end{{array}}"
LIM_FUNC = { LIM_FUNC = {
'lim':'\\lim_{{{lim}}}', "lim": "\\lim_{{{lim}}}",
'max':'\\max_{{{lim}}}', "max": "\\max_{{{lim}}}",
'min':'\\min_{{{lim}}}', "min": "\\min_{{{lim}}}",
} }
LIM_TO = ('\\rightarrow','\\to') LIM_TO = ("\\rightarrow", "\\to")
LIM_UPP = '\\overset{{{lim}}}{{{text}}}' LIM_UPP = "\\overset{{{lim}}}{{{text}}}"
M = '\\begin{{matrix}}{text}\end{{matrix}}' M = "\\begin{{matrix}}{text}\end{{matrix}}"

View file

@ -8,355 +8,395 @@ On 25/03/2025
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from .latex_dict import (CHARS, CHR, CHR_BO, CHR_DEFAULT, POS, POS_DEFAULT from .latex_dict import (
, SUB, SUP, F, F_DEFAULT, T, FUNC, D, D_DEFAULT, RAD, RAD_DEFAULT, ARR CHARS,
, LIM_FUNC, LIM_TO, LIM_UPP, M, BRK, BLANK, BACKSLASH, ALN, FUNC_PLACE) CHR,
CHR_BO,
CHR_DEFAULT,
POS,
POS_DEFAULT,
SUB,
SUP,
F,
F_DEFAULT,
T,
FUNC,
D,
D_DEFAULT,
RAD,
RAD_DEFAULT,
ARR,
LIM_FUNC,
LIM_TO,
LIM_UPP,
M,
BRK,
BLANK,
BACKSLASH,
ALN,
FUNC_PLACE,
)
OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}" OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}"
def load(stream): def load(stream):
tree = ET.parse(stream) tree = ET.parse(stream)
for omath in tree.findall(OMML_NS+'oMath'): for omath in tree.findall(OMML_NS + "oMath"):
yield oMath2Latex(omath) yield oMath2Latex(omath)
def load_string(string): def load_string(string):
root = ET.fromstring(string) root = ET.fromstring(string)
for omath in root.findall(OMML_NS+'oMath'): for omath in root.findall(OMML_NS + "oMath"):
yield oMath2Latex(omath) yield oMath2Latex(omath)
def escape_latex(strs): def escape_latex(strs):
last = None last = None
new_chr = [] new_chr = []
strs = strs.replace(r'\\','\\') strs = strs.replace(r"\\", "\\")
for c in strs : for c in strs:
if (c in CHARS) and (last !=BACKSLASH): if (c in CHARS) and (last != BACKSLASH):
new_chr.append(BACKSLASH+c) new_chr.append(BACKSLASH + c)
else: else:
new_chr.append(c) new_chr.append(c)
last = c last = c
return BLANK.join(new_chr) return BLANK.join(new_chr)
def get_val(key,default=None,store=CHR):
if key is not None: def get_val(key, default=None, store=CHR):
return key if not store else store.get(key,key) if key is not None:
else: return key if not store else store.get(key, key)
return default else:
return default
class Tag2Method(object): class Tag2Method(object):
def call_method(self,elm,stag=None): def call_method(self, elm, stag=None):
getmethod = self.tag2meth.get getmethod = self.tag2meth.get
if stag is None: if stag is None:
stag = elm.tag.replace(OMML_NS,'') stag = elm.tag.replace(OMML_NS, "")
method = getmethod(stag) method = getmethod(stag)
if method: if method:
return method(self,elm) return method(self, elm)
else: else:
return None return None
def process_children_list(self,elm,include=None): def process_children_list(self, elm, include=None):
""" """
process children of the elm,return iterable process children of the elm,return iterable
""" """
for _e in list(elm): for _e in list(elm):
if (OMML_NS not in _e.tag): if OMML_NS not in _e.tag:
continue continue
stag = _e.tag.replace(OMML_NS,'') stag = _e.tag.replace(OMML_NS, "")
if include and (stag not in include): if include and (stag not in include):
continue continue
t = self.call_method(_e,stag=stag) t = self.call_method(_e, stag=stag)
if t is None: if t is None:
t = self.process_unknow(_e,stag) t = self.process_unknow(_e, stag)
if t is None: if t is None:
continue continue
yield (stag,t,_e) yield (stag, t, _e)
def process_children_dict(self,elm,include=None): def process_children_dict(self, elm, include=None):
""" """
process children of the elm,return dict process children of the elm,return dict
""" """
latex_chars = dict() latex_chars = dict()
for stag,t,e in self.process_children_list(elm,include): for stag, t, e in self.process_children_list(elm, include):
latex_chars[stag] = t latex_chars[stag] = t
return latex_chars return latex_chars
def process_children(self,elm,include=None): def process_children(self, elm, include=None):
""" """
process children of the elm,return string process children of the elm,return string
""" """
return BLANK.join(( t if not isinstance(t,Tag2Method) else str(t) return BLANK.join(
for stag,t,e in self.process_children_list(elm,include))) (
t if not isinstance(t, Tag2Method) else str(t)
for stag, t, e in self.process_children_list(elm, include)
)
)
def process_unknow(self,elm,stag): def process_unknow(self, elm, stag):
return None return None
class Pr(Tag2Method): class Pr(Tag2Method):
text = '' text = ""
__val_tags = ('chr','pos','begChr','endChr','type') __val_tags = ("chr", "pos", "begChr", "endChr", "type")
__innerdict= None #can't use the __dict__ __innerdict = None # can't use the __dict__
""" common properties of element""" """ common properties of element"""
def __init__(self, elm):
self.__innerdict={}
self.text=self.process_children(elm)
def __str__(self): def __init__(self, elm):
return self.text self.__innerdict = {}
self.text = self.process_children(elm)
def __unicode__(self): def __str__(self):
return self.__str__(self) return self.text
def __getattr__(self,name): def __unicode__(self):
return self.__innerdict.get(name,None) return self.__str__(self)
def do_brk(self,elm): def __getattr__(self, name):
self.__innerdict['brk'] = BRK return self.__innerdict.get(name, None)
return BRK
def do_common(self,elm): def do_brk(self, elm):
stag = elm.tag.replace(OMML_NS,'') self.__innerdict["brk"] = BRK
if stag in self.__val_tags: return BRK
t = elm.get('{0}val'.format(OMML_NS))
self.__innerdict[stag] = t
return None
tag2meth = { def do_common(self, elm):
'brk':do_brk, stag = elm.tag.replace(OMML_NS, "")
'chr':do_common, if stag in self.__val_tags:
'pos':do_common, t = elm.get("{0}val".format(OMML_NS))
'begChr':do_common, self.__innerdict[stag] = t
'endChr':do_common, return None
'type':do_common,
} tag2meth = {
"brk": do_brk,
"chr": do_common,
"pos": do_common,
"begChr": do_common,
"endChr": do_common,
"type": do_common,
}
class oMath2Latex(Tag2Method): class oMath2Latex(Tag2Method):
""" """
Convert oMath element of omml to latex Convert oMath element of omml to latex
""" """
_t_dict = T
__direct_tags = ('box','sSub','sSup','sSubSup','num','den','deg','e') _t_dict = T
def __init__(self, element): __direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e")
self._latex = self.process_children(element)
def __str__(self): def __init__(self, element):
return self.latex self._latex = self.process_children(element)
def __unicode__(self): def __str__(self):
return self.__str__(self) return self.latex
def process_unknow(self,elm,stag): def __unicode__(self):
if stag in self.__direct_tags: return self.__str__(self)
return self.process_children(elm)
elif stag[-2:] == 'Pr':
return Pr(elm)
else:
return None
@property def process_unknow(self, elm, stag):
def latex(self): if stag in self.__direct_tags:
return self._latex return self.process_children(elm)
elif stag[-2:] == "Pr":
return Pr(elm)
else:
return None
def do_acc(self,elm): @property
""" def latex(self):
the accent function return self._latex
"""
c_dict = self.process_children_dict(elm)
latex_s = get_val(c_dict['accPr'].chr,default=CHR_DEFAULT.get('ACC_VAL'),store=CHR)
return latex_s.format(c_dict['e'])
def do_bar(self,elm): def do_acc(self, elm):
""" """
the bar function the accent function
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['barPr'] latex_s = get_val(
latex_s = get_val(pr.pos,default=POS_DEFAULT.get('BAR_VAL'),store=POS) c_dict["accPr"].chr, default=CHR_DEFAULT.get("ACC_VAL"), store=CHR
return pr.text+latex_s.format(c_dict['e']) )
return latex_s.format(c_dict["e"])
def do_d(self,elm): def do_bar(self, elm):
""" """
the delimiter object the bar function
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['dPr'] pr = c_dict["barPr"]
null = D_DEFAULT.get('null') latex_s = get_val(pr.pos, default=POS_DEFAULT.get("BAR_VAL"), store=POS)
s_val = get_val(pr.begChr,default=D_DEFAULT.get('left'),store=T) return pr.text + latex_s.format(c_dict["e"])
e_val = get_val(pr.endChr,default=D_DEFAULT.get('right'),store=T)
return pr.text+D.format(left= null if not s_val else escape_latex(s_val),
text=c_dict['e'],
right= null if not e_val else escape_latex(e_val))
def do_d(self, elm):
"""
the delimiter object
"""
c_dict = self.process_children_dict(elm)
pr = c_dict["dPr"]
null = D_DEFAULT.get("null")
s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T)
e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T)
return pr.text + D.format(
left=null if not s_val else escape_latex(s_val),
text=c_dict["e"],
right=null if not e_val else escape_latex(e_val),
)
def do_spre(self,elm): def do_spre(self, elm):
""" """
the Pre-Sub-Superscript object -- Not support yet the Pre-Sub-Superscript object -- Not support yet
""" """
pass pass
def do_sub(self,elm): def do_sub(self, elm):
text = self.process_children(elm) text = self.process_children(elm)
return SUB.format(text) return SUB.format(text)
def do_sup(self,elm): def do_sup(self, elm):
text = self.process_children(elm) text = self.process_children(elm)
return SUP.format(text) return SUP.format(text)
def do_f(self,elm): def do_f(self, elm):
""" """
the fraction object the fraction object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['fPr'] pr = c_dict["fPr"]
latex_s = get_val(pr.type,default=F_DEFAULT,store=F) latex_s = get_val(pr.type, default=F_DEFAULT, store=F)
return pr.text+latex_s.format(num=c_dict.get('num'),den=c_dict.get('den')) return pr.text + latex_s.format(num=c_dict.get("num"), den=c_dict.get("den"))
def do_func(self,elm): def do_func(self, elm):
""" """
the Function-Apply object (Examples:sin cos) the Function-Apply object (Examples:sin cos)
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
func_name = c_dict.get('fName') func_name = c_dict.get("fName")
return func_name.replace(FUNC_PLACE,c_dict.get('e')) return func_name.replace(FUNC_PLACE, c_dict.get("e"))
def do_fname(self,elm): def do_fname(self, elm):
""" """
the func name the func name
""" """
latex_chars = [] latex_chars = []
for stag,t,e in self.process_children_list(elm): for stag, t, e in self.process_children_list(elm):
if stag == 'r': if stag == "r":
if FUNC.get(t): if FUNC.get(t):
latex_chars.append(FUNC[t]) latex_chars.append(FUNC[t])
else : else:
raise NotImplemented("Not support func %s" % t) raise NotImplemented("Not support func %s" % t)
else: else:
latex_chars.append(t) latex_chars.append(t)
t = BLANK.join(latex_chars) t = BLANK.join(latex_chars)
return t if FUNC_PLACE in t else t+FUNC_PLACE #do_func will replace this return t if FUNC_PLACE in t else t + FUNC_PLACE # do_func will replace this
def do_groupchr(self,elm): def do_groupchr(self, elm):
""" """
the Group-Character object the Group-Character object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['groupChrPr'] pr = c_dict["groupChrPr"]
latex_s = get_val(pr.chr) latex_s = get_val(pr.chr)
return pr.text+latex_s.format(c_dict['e']) return pr.text + latex_s.format(c_dict["e"])
def do_rad(self,elm): def do_rad(self, elm):
""" """
the radical object the radical object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
text = c_dict.get('e') text = c_dict.get("e")
deg_text = c_dict.get('deg') deg_text = c_dict.get("deg")
if deg_text: if deg_text:
return RAD.format(deg=deg_text,text=text) return RAD.format(deg=deg_text, text=text)
else: else:
return RAD_DEFAULT.format(text=text) return RAD_DEFAULT.format(text=text)
def do_eqarr(self,elm): def do_eqarr(self, elm):
""" """
the Array object the Array object
""" """
return ARR.format(text=BRK.join( return ARR.format(
[t for stag,t,e in self.process_children_list(elm,include=('e',))])) text=BRK.join(
[t for stag, t, e in self.process_children_list(elm, include=("e",))]
)
)
def do_limlow(self, elm):
"""
the Lower-Limit object
"""
t_dict = self.process_children_dict(elm, include=("e", "lim"))
latex_s = LIM_FUNC.get(t_dict["e"])
if not latex_s:
raise NotImplemented("Not support lim %s" % t_dict["e"])
else:
return latex_s.format(lim=t_dict.get("lim"))
def do_limlow(self,elm): def do_limupp(self, elm):
""" """
the Lower-Limit object the Upper-Limit object
""" """
t_dict = self.process_children_dict(elm,include=('e','lim')) t_dict = self.process_children_dict(elm, include=("e", "lim"))
latex_s = LIM_FUNC.get(t_dict['e']) return LIM_UPP.format(lim=t_dict.get("lim"), text=t_dict.get("e"))
if not latex_s :
raise NotImplemented("Not support lim %s" % t_dict['e'])
else:
return latex_s.format(lim=t_dict.get('lim'))
def do_limupp(self,elm): def do_lim(self, elm):
""" """
the Upper-Limit object the lower limit of the limLow object and the upper limit of the limUpp function
""" """
t_dict = self.process_children_dict(elm,include=('e','lim')) return self.process_children(elm).replace(LIM_TO[0], LIM_TO[1])
return LIM_UPP.format(lim=t_dict.get('lim'),text=t_dict.get('e'))
def do_lim(self,elm): def do_m(self, elm):
""" """
the lower limit of the limLow object and the upper limit of the limUpp function the Matrix object
""" """
return self.process_children(elm).replace(LIM_TO[0],LIM_TO[1]) rows = []
for stag, t, e in self.process_children_list(elm):
if stag is "mPr":
pass
elif stag == "mr":
rows.append(t)
return M.format(text=BRK.join(rows))
def do_m(self,elm): def do_mr(self, elm):
""" """
the Matrix object a single row of the matrix m
""" """
rows = [] return ALN.join(
for stag,t,e in self.process_children_list(elm): [t for stag, t, e in self.process_children_list(elm, include=("e",))]
if stag is 'mPr': )
pass
elif stag == 'mr':
rows.append(t)
return M.format(text=BRK.join(rows))
def do_mr(self,elm): def do_nary(self, elm):
""" """
a single row of the matrix m the n-ary object
""" """
return ALN.join( res = []
[t for stag,t,e in self.process_children_list(elm,include=('e',))]) bo = ""
for stag, t, e in self.process_children_list(elm):
if stag == "naryPr":
bo = get_val(t.chr, store=CHR_BO)
else:
res.append(t)
return bo + BLANK.join(res)
def do_nary(self,elm): def do_r(self, elm):
""" """
the n-ary object Get text from 'r' element,And try convert them to latex symbols
""" @todo text style support , (sty)
res = [] @todo \text (latex pure text support)
bo = '' """
for stag,t,e in self.process_children_list(elm): _str = []
if stag == 'naryPr': for s in elm.findtext("./{0}t".format(OMML_NS)):
bo = get_val(t.chr,store=CHR_BO) # s = s if isinstance(s,unicode) else unicode(s,'utf-8')
else : _str.append(self._t_dict.get(s, s))
res.append(t) return escape_latex(BLANK.join(_str))
return bo+BLANK.join(res)
def do_r(self,elm): tag2meth = {
""" "acc": do_acc,
Get text from 'r' element,And try convert them to latex symbols "r": do_r,
@todo text style support , (sty) "bar": do_bar,
@todo \text (latex pure text support) "sub": do_sub,
""" "sup": do_sup,
_str = [] "f": do_f,
for s in elm.findtext('./{0}t'.format(OMML_NS)): "func": do_func,
#s = s if isinstance(s,unicode) else unicode(s,'utf-8') "fName": do_fname,
_str.append(self._t_dict.get(s,s)) "groupChr": do_groupchr,
return escape_latex(BLANK.join(_str)) "d": do_d,
"rad": do_rad,
tag2meth={ "eqArr": do_eqarr,
'acc' : do_acc, "limLow": do_limlow,
'r' : do_r, "limUpp": do_limupp,
'bar' : do_bar, "lim": do_lim,
'sub' : do_sub, "m": do_m,
'sup' : do_sup, "mr": do_mr,
'f' : do_f, "nary": do_nary,
'func': do_func, }
'fName' : do_fname,
'groupChr' : do_groupchr,
'd' : do_d,
'rad' : do_rad,
'eqArr' : do_eqarr,
'limLow' : do_limlow,
'limUpp' : do_limupp,
'lim' : do_lim,
'm' : do_m,
'mr' : do_mr,
'nary' : do_nary,
}

View file

@ -75,5 +75,6 @@ class DocxConverter(HtmlConverter):
style_map = kwargs.get("style_map", None) style_map = kwargs.get("style_map", None)
pre_process_stream = pre_process_docx(file_stream) pre_process_stream = pre_process_docx(file_stream)
return self._html_converter.convert_string( return self._html_converter.convert_string(
mammoth.convert_to_html(pre_process_stream, style_map=style_map).value, **kwargs mammoth.convert_to_html(pre_process_stream, style_map=style_map).value,
**kwargs,
) )

View file

@ -272,9 +272,10 @@ def test_docx_equations() -> None:
assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found" assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found"
# Find block equations wrapped with double $$ and check if they are present # Find block equations wrapped with double $$ and check if they are present
block_equations = re.findall(r'\$\$(.+?)\$\$', result.text_content) block_equations = re.findall(r"\$\$(.+?)\$\$", result.text_content)
assert block_equations, "No block equations found in the document." assert block_equations, "No block equations found in the document."
def test_input_as_strings() -> None: def test_input_as_strings() -> None:
markitdown = MarkItDown() markitdown = MarkItDown()