refactor: reformatted with black

This commit is contained in:
Sathindu Ganhala Arachchige 2025-03-28 16:24:27 -04:00
parent 6a66b275bb
commit 799a1caf97
4 changed files with 559 additions and 519 deletions

View file

@ -7,269 +7,267 @@ On 25/03/2025
from __future__ import unicode_literals from __future__ import unicode_literals
CHARS = ('{','}', '_', '^', '#', '&', '$', '%', '~') CHARS = ("{", "}", "_", "^", "#", "&", "$", "%", "~")
BLANK = '' BLANK = ""
BACKSLASH = '\\' BACKSLASH = "\\"
ALN = '&' ALN = "&"
CHR = { CHR = {
#Unicode : Latex Math Symbols # Unicode : Latex Math Symbols
#Top accents # Top accents
'\u0300' : '\\grave{{{0}}}', "\u0300": "\\grave{{{0}}}",
'\u0301' : '\\acute{{{0}}}', "\u0301": "\\acute{{{0}}}",
'\u0302' : '\\hat{{{0}}}', "\u0302": "\\hat{{{0}}}",
'\u0303' : '\\tilde{{{0}}}', "\u0303": "\\tilde{{{0}}}",
'\u0304' : '\\bar{{{0}}}', "\u0304": "\\bar{{{0}}}",
'\u0305' : '\\overbar{{{0}}}', "\u0305": "\\overbar{{{0}}}",
'\u0306' : '\\breve{{{0}}}', "\u0306": "\\breve{{{0}}}",
'\u0307' : '\\dot{{{0}}}', "\u0307": "\\dot{{{0}}}",
'\u0308' : '\\ddot{{{0}}}', "\u0308": "\\ddot{{{0}}}",
'\u0309' : '\\ovhook{{{0}}}', "\u0309": "\\ovhook{{{0}}}",
'\u030a' : '\\ocirc{{{0}}}}', "\u030a": "\\ocirc{{{0}}}}",
'\u030c' : '\\check{{{0}}}}', "\u030c": "\\check{{{0}}}}",
'\u0310' : '\\candra{{{0}}}', "\u0310": "\\candra{{{0}}}",
'\u0312' : '\\oturnedcomma{{{0}}}', "\u0312": "\\oturnedcomma{{{0}}}",
'\u0315' : '\\ocommatopright{{{0}}}', "\u0315": "\\ocommatopright{{{0}}}",
'\u031a' : '\\droang{{{0}}}', "\u031a": "\\droang{{{0}}}",
'\u0338' : '\\not{{{0}}}', "\u0338": "\\not{{{0}}}",
'\u20d0' : '\\leftharpoonaccent{{{0}}}', "\u20d0": "\\leftharpoonaccent{{{0}}}",
'\u20d1' : '\\rightharpoonaccent{{{0}}}', "\u20d1": "\\rightharpoonaccent{{{0}}}",
'\u20d2' : '\\vertoverlay{{{0}}}', "\u20d2": "\\vertoverlay{{{0}}}",
'\u20d6' : '\\overleftarrow{{{0}}}', "\u20d6": "\\overleftarrow{{{0}}}",
'\u20d7' : '\\vec{{{0}}}', "\u20d7": "\\vec{{{0}}}",
'\u20db' : '\\dddot{{{0}}}', "\u20db": "\\dddot{{{0}}}",
'\u20dc' : '\\ddddot{{{0}}}', "\u20dc": "\\ddddot{{{0}}}",
'\u20e1' : '\\overleftrightarrow{{{0}}}', "\u20e1": "\\overleftrightarrow{{{0}}}",
'\u20e7' : '\\annuity{{{0}}}', "\u20e7": "\\annuity{{{0}}}",
'\u20e9' : '\\widebridgeabove{{{0}}}', "\u20e9": "\\widebridgeabove{{{0}}}",
'\u20f0' : '\\asteraccent{{{0}}}', "\u20f0": "\\asteraccent{{{0}}}",
#Bottom accents # Bottom accents
'\u0330' : '\\wideutilde{{{0}}}', "\u0330": "\\wideutilde{{{0}}}",
'\u0331' : '\\underbar{{{0}}}', "\u0331": "\\underbar{{{0}}}",
'\u20e8' : '\\threeunderdot{{{0}}}', "\u20e8": "\\threeunderdot{{{0}}}",
'\u20ec' : '\\underrightharpoondown{{{0}}}', "\u20ec": "\\underrightharpoondown{{{0}}}",
'\u20ed' : '\\underleftharpoondown{{{0}}}', "\u20ed": "\\underleftharpoondown{{{0}}}",
'\u20ee' : '\\underledtarrow{{{0}}}', "\u20ee": "\\underledtarrow{{{0}}}",
'\u20ef' : '\\underrightarrow{{{0}}}', "\u20ef": "\\underrightarrow{{{0}}}",
#Over | group # Over | group
'\u23b4' : '\\overbracket{{{0}}}', "\u23b4": "\\overbracket{{{0}}}",
'\u23dc' : '\\overparen{{{0}}}', "\u23dc": "\\overparen{{{0}}}",
'\u23de' : '\\overbrace{{{0}}}', "\u23de": "\\overbrace{{{0}}}",
#Under| group # Under| group
'\u23b5' : '\\underbracket{{{0}}}', "\u23b5": "\\underbracket{{{0}}}",
'\u23dd' : '\\underparen{{{0}}}', "\u23dd": "\\underparen{{{0}}}",
'\u23df' : '\\underbrace{{{0}}}', "\u23df": "\\underbrace{{{0}}}",
} }
CHR_BO = { CHR_BO = {
#Big operators, # Big operators,
'\u2140' : '\\Bbbsum', "\u2140": "\\Bbbsum",
'\u220f' : '\\prod', "\u220f": "\\prod",
'\u2210' : '\\coprod', "\u2210": "\\coprod",
'\u2211' : '\\sum', "\u2211": "\\sum",
'\u222b' : '\\int', "\u222b": "\\int",
'\u22c0' : '\\bigwedge', "\u22c0": "\\bigwedge",
'\u22c1' : '\\bigvee', "\u22c1": "\\bigvee",
'\u22c2' : '\\bigcap', "\u22c2": "\\bigcap",
'\u22c3' : '\\bigcup', "\u22c3": "\\bigcup",
'\u2a00' : '\\bigodot', "\u2a00": "\\bigodot",
'\u2a01' : '\\bigoplus', "\u2a01": "\\bigoplus",
'\u2a02' : '\\bigotimes', "\u2a02": "\\bigotimes",
} }
T = { T = {
"\u2192": "\\rightarrow ",
'\u2192' : '\\rightarrow ', # Greek letters
#Greek letters "\U0001d6fc": "\\alpha ",
'\U0001d6fc' : '\\alpha ', "\U0001d6fd": "\\beta ",
'\U0001d6fd' : '\\beta ', "\U0001d6fe": "\\gamma ",
'\U0001d6fe' : '\\gamma ', "\U0001d6ff": "\\theta ",
'\U0001d6ff' : '\\theta ', "\U0001d700": "\\epsilon ",
'\U0001d700' : '\\epsilon ', "\U0001d701": "\\zeta ",
'\U0001d701' : '\\zeta ', "\U0001d702": "\\eta ",
'\U0001d702' : '\\eta ', "\U0001d703": "\\theta ",
'\U0001d703' : '\\theta ', "\U0001d704": "\\iota ",
'\U0001d704' : '\\iota ', "\U0001d705": "\\kappa ",
'\U0001d705' : '\\kappa ', "\U0001d706": "\\lambda ",
'\U0001d706' : '\\lambda ', "\U0001d707": "\\m ",
'\U0001d707' : '\\m ', "\U0001d708": "\\n ",
'\U0001d708' : '\\n ', "\U0001d709": "\\xi ",
'\U0001d709' : '\\xi ', "\U0001d70a": "\\omicron ",
'\U0001d70a' : '\\omicron ', "\U0001d70b": "\\pi ",
'\U0001d70b' : '\\pi ', "\U0001d70c": "\\rho ",
'\U0001d70c' : '\\rho ', "\U0001d70d": "\\varsigma ",
'\U0001d70d' : '\\varsigma ', "\U0001d70e": "\\sigma ",
'\U0001d70e' : '\\sigma ', "\U0001d70f": "\\ta ",
'\U0001d70f' : '\\ta ', "\U0001d710": "\\upsilon ",
'\U0001d710' : '\\upsilon ', "\U0001d711": "\\phi ",
'\U0001d711' : '\\phi ', "\U0001d712": "\\chi ",
'\U0001d712' : '\\chi ', "\U0001d713": "\\psi ",
'\U0001d713' : '\\psi ', "\U0001d714": "\\omega ",
'\U0001d714' : '\\omega ', "\U0001d715": "\\partial ",
'\U0001d715' : '\\partial ', "\U0001d716": "\\varepsilon ",
'\U0001d716' : '\\varepsilon ', "\U0001d717": "\\vartheta ",
'\U0001d717' : '\\vartheta ', "\U0001d718": "\\varkappa ",
'\U0001d718' : '\\varkappa ', "\U0001d719": "\\varphi ",
'\U0001d719' : '\\varphi ', "\U0001d71a": "\\varrho ",
'\U0001d71a' : '\\varrho ', "\U0001d71b": "\\varpi ",
'\U0001d71b' : '\\varpi ', # Relation symbols
#Relation symbols "\u2190": "\\leftarrow ",
'\u2190' : '\\leftarrow ', "\u2191": "\\uparrow ",
'\u2191' : '\\uparrow ', "\u2192": "\\rightarrow ",
'\u2192' : '\\rightarrow ', "\u2193": "\\downright ",
'\u2193' : '\\downright ', "\u2194": "\\leftrightarrow ",
'\u2194' : '\\leftrightarrow ', "\u2195": "\\updownarrow ",
'\u2195' : '\\updownarrow ', "\u2196": "\\nwarrow ",
'\u2196' : '\\nwarrow ', "\u2197": "\\nearrow ",
'\u2197' : '\\nearrow ', "\u2198": "\\searrow ",
'\u2198' : '\\searrow ', "\u2199": "\\swarrow ",
'\u2199' : '\\swarrow ', "\u22ee": "\\vdots ",
'\u22ee' : '\\vdots ', "\u22ef": "\\cdots ",
'\u22ef' : '\\cdots ', "\u22f0": "\\adots ",
'\u22f0' : '\\adots ', "\u22f1": "\\ddots ",
'\u22f1' : '\\ddots ', "\u2260": "\\ne ",
'\u2260' : '\\ne ', "\u2264": "\\leq ",
'\u2264' : '\\leq ', "\u2265": "\\geq ",
'\u2265' : '\\geq ', "\u2266": "\\leqq ",
'\u2266' : '\\leqq ', "\u2267": "\\geqq ",
'\u2267' : '\\geqq ', "\u2268": "\\lneqq ",
'\u2268' : '\\lneqq ', "\u2269": "\\gneqq ",
'\u2269' : '\\gneqq ', "\u226a": "\\ll ",
'\u226a' : '\\ll ', "\u226b": "\\gg ",
'\u226b' : '\\gg ', "\u2208": "\\in ",
'\u2208' : '\\in ', "\u2209": "\\notin ",
'\u2209' : '\\notin ', "\u220b": "\\ni ",
'\u220b' : '\\ni ', "\u220c": "\\nni ",
'\u220c' : '\\nni ', # Ordinary symbols
"\u221e": "\\infty ",
#Ordinary symbols # Binary relations
'\u221e' : '\\infty ', "\u00b1": "\\pm ",
#Binary relations "\u2213": "\\mp ",
'\u00b1' : '\\pm ', # Italic, Latin, uppercase
'\u2213' : '\\mp ', "\U0001d434": "A",
#Italic, Latin, uppercase "\U0001d435": "B",
'\U0001d434' : 'A', "\U0001d436": "C",
'\U0001d435' : 'B', "\U0001d437": "D",
'\U0001d436' : 'C', "\U0001d438": "E",
'\U0001d437' : 'D', "\U0001d439": "F",
'\U0001d438' : 'E', "\U0001d43a": "G",
'\U0001d439' : 'F', "\U0001d43b": "H",
'\U0001d43a' : 'G', "\U0001d43c": "I",
'\U0001d43b' : 'H', "\U0001d43d": "J",
'\U0001d43c' : 'I', "\U0001d43e": "K",
'\U0001d43d' : 'J', "\U0001d43f": "L",
'\U0001d43e' : 'K', "\U0001d440": "M",
'\U0001d43f' : 'L', "\U0001d441": "N",
'\U0001d440' : 'M', "\U0001d442": "O",
'\U0001d441' : 'N', "\U0001d443": "P",
'\U0001d442' : 'O', "\U0001d444": "Q",
'\U0001d443' : 'P', "\U0001d445": "R",
'\U0001d444' : 'Q', "\U0001d446": "S",
'\U0001d445' : 'R', "\U0001d447": "T",
'\U0001d446' : 'S', "\U0001d448": "U",
'\U0001d447' : 'T', "\U0001d449": "V",
'\U0001d448' : 'U', "\U0001d44a": "W",
'\U0001d449' : 'V', "\U0001d44b": "X",
'\U0001d44a' : 'W', "\U0001d44c": "Y",
'\U0001d44b' : 'X', "\U0001d44d": "Z",
'\U0001d44c' : 'Y', # Italic, Latin, lowercase
'\U0001d44d' : 'Z', "\U0001d44e": "a",
#Italic, Latin, lowercase "\U0001d44f": "b",
'\U0001d44e' : 'a', "\U0001d450": "c",
'\U0001d44f' : 'b', "\U0001d451": "d",
'\U0001d450' : 'c', "\U0001d452": "e",
'\U0001d451' : 'd', "\U0001d453": "f",
'\U0001d452' : 'e', "\U0001d454": "g",
'\U0001d453' : 'f', "\U0001d456": "i",
'\U0001d454' : 'g', "\U0001d457": "j",
'\U0001d456' : 'i', "\U0001d458": "k",
'\U0001d457' : 'j', "\U0001d459": "l",
'\U0001d458' : 'k', "\U0001d45a": "m",
'\U0001d459' : 'l', "\U0001d45b": "n",
'\U0001d45a' : 'm', "\U0001d45c": "o",
'\U0001d45b' : 'n', "\U0001d45d": "p",
'\U0001d45c' : 'o', "\U0001d45e": "q",
'\U0001d45d' : 'p', "\U0001d45f": "r",
'\U0001d45e' : 'q', "\U0001d460": "s",
'\U0001d45f' : 'r', "\U0001d461": "t",
'\U0001d460' : 's', "\U0001d462": "u",
'\U0001d461' : 't', "\U0001d463": "v",
'\U0001d462' : 'u', "\U0001d464": "w",
'\U0001d463' : 'v', "\U0001d465": "x",
'\U0001d464' : 'w', "\U0001d466": "y",
'\U0001d465' : 'x', "\U0001d467": "z",
'\U0001d466' : 'y',
'\U0001d467' : 'z',
} }
FUNC ={ FUNC = {
'sin' : '\\sin({fe})', "sin": "\\sin({fe})",
'cos' : '\\cos({fe})', "cos": "\\cos({fe})",
'tan' : '\\tan({fe})', "tan": "\\tan({fe})",
'arcsin' : '\\arcsin({fe})', "arcsin": "\\arcsin({fe})",
'arccos' : '\\arccos({fe})', "arccos": "\\arccos({fe})",
'arctan' : '\\arctan({fe})', "arctan": "\\arctan({fe})",
'arccot' : '\\arccot({fe})', "arccot": "\\arccot({fe})",
'sinh' : '\\sinh({fe})', "sinh": "\\sinh({fe})",
'cosh' : '\\cosh({fe})', "cosh": "\\cosh({fe})",
'tanh' : '\\tanh({fe})', "tanh": "\\tanh({fe})",
'coth' : '\\coth({fe})', "coth": "\\coth({fe})",
'sec' : '\\sec({fe})', "sec": "\\sec({fe})",
'csc' : '\\csc({fe})', "csc": "\\csc({fe})",
} }
FUNC_PLACE = '{fe}' FUNC_PLACE = "{fe}"
BRK = '\\\\' BRK = "\\\\"
CHR_DEFAULT = { CHR_DEFAULT = {
'ACC_VAL':'\\hat{{{0}}}', "ACC_VAL": "\\hat{{{0}}}",
} }
POS = { POS = {
'top' : '\\overline{{{0}}}', # not sure "top": "\\overline{{{0}}}", # not sure
'bot' : '\\underline{{{0}}}', "bot": "\\underline{{{0}}}",
} }
POS_DEFAULT = { POS_DEFAULT = {
'BAR_VAL': '\\overline{{{0}}}', "BAR_VAL": "\\overline{{{0}}}",
} }
SUB = '_{{{0}}}' SUB = "_{{{0}}}"
SUP = '^{{{0}}}' SUP = "^{{{0}}}"
F = { F = {
'bar': '\\frac{{{num}}}{{{den}}}', "bar": "\\frac{{{num}}}{{{den}}}",
'skw': r'^{{{num}}}/_{{{den}}}', "skw": r"^{{{num}}}/_{{{den}}}",
'noBar': '\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}', "noBar": "\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}",
'lin' : '{{{num}}}/{{{den}}}', "lin": "{{{num}}}/{{{den}}}",
} }
F_DEFAULT = '\\frac{{{num}}}{{{den}}}' F_DEFAULT = "\\frac{{{num}}}{{{den}}}"
D = '\\left{left}{text}\\right{right}' D = "\\left{left}{text}\\right{right}"
D_DEFAULT = { D_DEFAULT = {
'left':'(', "left": "(",
'right':')', "right": ")",
'null':'.', "null": ".",
} }
RAD = '\\sqrt[{deg}]{{{text}}}' RAD = "\\sqrt[{deg}]{{{text}}}"
RAD_DEFAULT = '\\sqrt{{{text}}}' RAD_DEFAULT = "\\sqrt{{{text}}}"
ARR = '\\begin{{array}}{{c}}{text}\end{{array}}' ARR = "\\begin{{array}}{{c}}{text}\end{{array}}"
LIM_FUNC = { LIM_FUNC = {
'lim':'\\lim_{{{lim}}}', "lim": "\\lim_{{{lim}}}",
'max':'\\max_{{{lim}}}', "max": "\\max_{{{lim}}}",
'min':'\\min_{{{lim}}}', "min": "\\min_{{{lim}}}",
} }
LIM_TO = ('\\rightarrow','\\to') LIM_TO = ("\\rightarrow", "\\to")
LIM_UPP = '\\overset{{{lim}}}{{{text}}}' LIM_UPP = "\\overset{{{lim}}}{{{text}}}"
M = '\\begin{{matrix}}{text}\end{{matrix}}' M = "\\begin{{matrix}}{text}\end{{matrix}}"

View file

@ -8,103 +8,136 @@ On 25/03/2025
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from .latex_dict import (CHARS, CHR, CHR_BO, CHR_DEFAULT, POS, POS_DEFAULT from .latex_dict import (
, SUB, SUP, F, F_DEFAULT, T, FUNC, D, D_DEFAULT, RAD, RAD_DEFAULT, ARR CHARS,
, LIM_FUNC, LIM_TO, LIM_UPP, M, BRK, BLANK, BACKSLASH, ALN, FUNC_PLACE) CHR,
CHR_BO,
CHR_DEFAULT,
POS,
POS_DEFAULT,
SUB,
SUP,
F,
F_DEFAULT,
T,
FUNC,
D,
D_DEFAULT,
RAD,
RAD_DEFAULT,
ARR,
LIM_FUNC,
LIM_TO,
LIM_UPP,
M,
BRK,
BLANK,
BACKSLASH,
ALN,
FUNC_PLACE,
)
OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}" OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}"
def load(stream): def load(stream):
tree = ET.parse(stream) tree = ET.parse(stream)
for omath in tree.findall(OMML_NS+'oMath'): for omath in tree.findall(OMML_NS + "oMath"):
yield oMath2Latex(omath) yield oMath2Latex(omath)
def load_string(string): def load_string(string):
root = ET.fromstring(string) root = ET.fromstring(string)
for omath in root.findall(OMML_NS+'oMath'): for omath in root.findall(OMML_NS + "oMath"):
yield oMath2Latex(omath) yield oMath2Latex(omath)
def escape_latex(strs): def escape_latex(strs):
last = None last = None
new_chr = [] new_chr = []
strs = strs.replace(r'\\','\\') strs = strs.replace(r"\\", "\\")
for c in strs : for c in strs:
if (c in CHARS) and (last !=BACKSLASH): if (c in CHARS) and (last != BACKSLASH):
new_chr.append(BACKSLASH+c) new_chr.append(BACKSLASH + c)
else: else:
new_chr.append(c) new_chr.append(c)
last = c last = c
return BLANK.join(new_chr) return BLANK.join(new_chr)
def get_val(key,default=None,store=CHR):
def get_val(key, default=None, store=CHR):
if key is not None: if key is not None:
return key if not store else store.get(key,key) return key if not store else store.get(key, key)
else: else:
return default return default
class Tag2Method(object): class Tag2Method(object):
def call_method(self,elm,stag=None): def call_method(self, elm, stag=None):
getmethod = self.tag2meth.get getmethod = self.tag2meth.get
if stag is None: if stag is None:
stag = elm.tag.replace(OMML_NS,'') stag = elm.tag.replace(OMML_NS, "")
method = getmethod(stag) method = getmethod(stag)
if method: if method:
return method(self,elm) return method(self, elm)
else: else:
return None return None
def process_children_list(self,elm,include=None): def process_children_list(self, elm, include=None):
""" """
process children of the elm,return iterable process children of the elm,return iterable
""" """
for _e in list(elm): for _e in list(elm):
if (OMML_NS not in _e.tag): if OMML_NS not in _e.tag:
continue continue
stag = _e.tag.replace(OMML_NS,'') stag = _e.tag.replace(OMML_NS, "")
if include and (stag not in include): if include and (stag not in include):
continue continue
t = self.call_method(_e,stag=stag) t = self.call_method(_e, stag=stag)
if t is None: if t is None:
t = self.process_unknow(_e,stag) t = self.process_unknow(_e, stag)
if t is None: if t is None:
continue continue
yield (stag,t,_e) yield (stag, t, _e)
def process_children_dict(self,elm,include=None): def process_children_dict(self, elm, include=None):
""" """
process children of the elm,return dict process children of the elm,return dict
""" """
latex_chars = dict() latex_chars = dict()
for stag,t,e in self.process_children_list(elm,include): for stag, t, e in self.process_children_list(elm, include):
latex_chars[stag] = t latex_chars[stag] = t
return latex_chars return latex_chars
def process_children(self,elm,include=None): def process_children(self, elm, include=None):
""" """
process children of the elm,return string process children of the elm,return string
""" """
return BLANK.join(( t if not isinstance(t,Tag2Method) else str(t) return BLANK.join(
for stag,t,e in self.process_children_list(elm,include))) (
t if not isinstance(t, Tag2Method) else str(t)
for stag, t, e in self.process_children_list(elm, include)
)
)
def process_unknow(self,elm,stag): def process_unknow(self, elm, stag):
return None return None
class Pr(Tag2Method): class Pr(Tag2Method):
text = '' text = ""
__val_tags = ('chr','pos','begChr','endChr','type') __val_tags = ("chr", "pos", "begChr", "endChr", "type")
__innerdict= None #can't use the __dict__ __innerdict = None # can't use the __dict__
""" common properties of element""" """ common properties of element"""
def __init__(self, elm): def __init__(self, elm):
self.__innerdict={} self.__innerdict = {}
self.text=self.process_children(elm) self.text = self.process_children(elm)
def __str__(self): def __str__(self):
return self.text return self.text
@ -112,27 +145,27 @@ class Pr(Tag2Method):
def __unicode__(self): def __unicode__(self):
return self.__str__(self) return self.__str__(self)
def __getattr__(self,name): def __getattr__(self, name):
return self.__innerdict.get(name,None) return self.__innerdict.get(name, None)
def do_brk(self,elm): def do_brk(self, elm):
self.__innerdict['brk'] = BRK self.__innerdict["brk"] = BRK
return BRK return BRK
def do_common(self,elm): def do_common(self, elm):
stag = elm.tag.replace(OMML_NS,'') stag = elm.tag.replace(OMML_NS, "")
if stag in self.__val_tags: if stag in self.__val_tags:
t = elm.get('{0}val'.format(OMML_NS)) t = elm.get("{0}val".format(OMML_NS))
self.__innerdict[stag] = t self.__innerdict[stag] = t
return None return None
tag2meth = { tag2meth = {
'brk':do_brk, "brk": do_brk,
'chr':do_common, "chr": do_common,
'pos':do_common, "pos": do_common,
'begChr':do_common, "begChr": do_common,
'endChr':do_common, "endChr": do_common,
'type':do_common, "type": do_common,
} }
@ -140,9 +173,10 @@ class oMath2Latex(Tag2Method):
""" """
Convert oMath element of omml to latex Convert oMath element of omml to latex
""" """
_t_dict = T _t_dict = T
__direct_tags = ('box','sSub','sSup','sSubSup','num','den','deg','e') __direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e")
def __init__(self, element): def __init__(self, element):
self._latex = self.process_children(element) self._latex = self.process_children(element)
@ -153,10 +187,10 @@ class oMath2Latex(Tag2Method):
def __unicode__(self): def __unicode__(self):
return self.__str__(self) return self.__str__(self)
def process_unknow(self,elm,stag): def process_unknow(self, elm, stag):
if stag in self.__direct_tags: if stag in self.__direct_tags:
return self.process_children(elm) return self.process_children(elm)
elif stag[-2:] == 'Pr': elif stag[-2:] == "Pr":
return Pr(elm) return Pr(elm)
else: else:
return None return None
@ -165,198 +199,204 @@ class oMath2Latex(Tag2Method):
def latex(self): def latex(self):
return self._latex return self._latex
def do_acc(self,elm): def do_acc(self, elm):
""" """
the accent function the accent function
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
latex_s = get_val(c_dict['accPr'].chr,default=CHR_DEFAULT.get('ACC_VAL'),store=CHR) latex_s = get_val(
return latex_s.format(c_dict['e']) c_dict["accPr"].chr, default=CHR_DEFAULT.get("ACC_VAL"), store=CHR
)
return latex_s.format(c_dict["e"])
def do_bar(self,elm): def do_bar(self, elm):
""" """
the bar function the bar function
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['barPr'] pr = c_dict["barPr"]
latex_s = get_val(pr.pos,default=POS_DEFAULT.get('BAR_VAL'),store=POS) latex_s = get_val(pr.pos, default=POS_DEFAULT.get("BAR_VAL"), store=POS)
return pr.text+latex_s.format(c_dict['e']) return pr.text + latex_s.format(c_dict["e"])
def do_d(self,elm): def do_d(self, elm):
""" """
the delimiter object the delimiter object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['dPr'] pr = c_dict["dPr"]
null = D_DEFAULT.get('null') null = D_DEFAULT.get("null")
s_val = get_val(pr.begChr,default=D_DEFAULT.get('left'),store=T) s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T)
e_val = get_val(pr.endChr,default=D_DEFAULT.get('right'),store=T) e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T)
return pr.text+D.format(left= null if not s_val else escape_latex(s_val), return pr.text + D.format(
text=c_dict['e'], left=null if not s_val else escape_latex(s_val),
right= null if not e_val else escape_latex(e_val)) text=c_dict["e"],
right=null if not e_val else escape_latex(e_val),
)
def do_spre(self, elm):
def do_spre(self,elm):
""" """
the Pre-Sub-Superscript object -- Not support yet the Pre-Sub-Superscript object -- Not support yet
""" """
pass pass
def do_sub(self,elm): def do_sub(self, elm):
text = self.process_children(elm) text = self.process_children(elm)
return SUB.format(text) return SUB.format(text)
def do_sup(self,elm): def do_sup(self, elm):
text = self.process_children(elm) text = self.process_children(elm)
return SUP.format(text) return SUP.format(text)
def do_f(self,elm): def do_f(self, elm):
""" """
the fraction object the fraction object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['fPr'] pr = c_dict["fPr"]
latex_s = get_val(pr.type,default=F_DEFAULT,store=F) latex_s = get_val(pr.type, default=F_DEFAULT, store=F)
return pr.text+latex_s.format(num=c_dict.get('num'),den=c_dict.get('den')) return pr.text + latex_s.format(num=c_dict.get("num"), den=c_dict.get("den"))
def do_func(self,elm): def do_func(self, elm):
""" """
the Function-Apply object (Examples:sin cos) the Function-Apply object (Examples:sin cos)
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
func_name = c_dict.get('fName') func_name = c_dict.get("fName")
return func_name.replace(FUNC_PLACE,c_dict.get('e')) return func_name.replace(FUNC_PLACE, c_dict.get("e"))
def do_fname(self,elm): def do_fname(self, elm):
""" """
the func name the func name
""" """
latex_chars = [] latex_chars = []
for stag,t,e in self.process_children_list(elm): for stag, t, e in self.process_children_list(elm):
if stag == 'r': if stag == "r":
if FUNC.get(t): if FUNC.get(t):
latex_chars.append(FUNC[t]) latex_chars.append(FUNC[t])
else : else:
raise NotImplemented("Not support func %s" % t) raise NotImplemented("Not support func %s" % t)
else: else:
latex_chars.append(t) latex_chars.append(t)
t = BLANK.join(latex_chars) t = BLANK.join(latex_chars)
return t if FUNC_PLACE in t else t+FUNC_PLACE #do_func will replace this return t if FUNC_PLACE in t else t + FUNC_PLACE # do_func will replace this
def do_groupchr(self,elm): def do_groupchr(self, elm):
""" """
the Group-Character object the Group-Character object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict['groupChrPr'] pr = c_dict["groupChrPr"]
latex_s = get_val(pr.chr) latex_s = get_val(pr.chr)
return pr.text+latex_s.format(c_dict['e']) return pr.text + latex_s.format(c_dict["e"])
def do_rad(self,elm): def do_rad(self, elm):
""" """
the radical object the radical object
""" """
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
text = c_dict.get('e') text = c_dict.get("e")
deg_text = c_dict.get('deg') deg_text = c_dict.get("deg")
if deg_text: if deg_text:
return RAD.format(deg=deg_text,text=text) return RAD.format(deg=deg_text, text=text)
else: else:
return RAD_DEFAULT.format(text=text) return RAD_DEFAULT.format(text=text)
def do_eqarr(self,elm): def do_eqarr(self, elm):
""" """
the Array object the Array object
""" """
return ARR.format(text=BRK.join( return ARR.format(
[t for stag,t,e in self.process_children_list(elm,include=('e',))])) text=BRK.join(
[t for stag, t, e in self.process_children_list(elm, include=("e",))]
)
)
def do_limlow(self, elm):
def do_limlow(self,elm):
""" """
the Lower-Limit object the Lower-Limit object
""" """
t_dict = self.process_children_dict(elm,include=('e','lim')) t_dict = self.process_children_dict(elm, include=("e", "lim"))
latex_s = LIM_FUNC.get(t_dict['e']) latex_s = LIM_FUNC.get(t_dict["e"])
if not latex_s : if not latex_s:
raise NotImplemented("Not support lim %s" % t_dict['e']) raise NotImplemented("Not support lim %s" % t_dict["e"])
else: else:
return latex_s.format(lim=t_dict.get('lim')) return latex_s.format(lim=t_dict.get("lim"))
def do_limupp(self,elm): def do_limupp(self, elm):
""" """
the Upper-Limit object the Upper-Limit object
""" """
t_dict = self.process_children_dict(elm,include=('e','lim')) t_dict = self.process_children_dict(elm, include=("e", "lim"))
return LIM_UPP.format(lim=t_dict.get('lim'),text=t_dict.get('e')) return LIM_UPP.format(lim=t_dict.get("lim"), text=t_dict.get("e"))
def do_lim(self,elm): def do_lim(self, elm):
""" """
the lower limit of the limLow object and the upper limit of the limUpp function the lower limit of the limLow object and the upper limit of the limUpp function
""" """
return self.process_children(elm).replace(LIM_TO[0],LIM_TO[1]) return self.process_children(elm).replace(LIM_TO[0], LIM_TO[1])
def do_m(self,elm): def do_m(self, elm):
""" """
the Matrix object the Matrix object
""" """
rows = [] rows = []
for stag,t,e in self.process_children_list(elm): for stag, t, e in self.process_children_list(elm):
if stag is 'mPr': if stag is "mPr":
pass pass
elif stag == 'mr': elif stag == "mr":
rows.append(t) rows.append(t)
return M.format(text=BRK.join(rows)) return M.format(text=BRK.join(rows))
def do_mr(self,elm): def do_mr(self, elm):
""" """
a single row of the matrix m a single row of the matrix m
""" """
return ALN.join( return ALN.join(
[t for stag,t,e in self.process_children_list(elm,include=('e',))]) [t for stag, t, e in self.process_children_list(elm, include=("e",))]
)
def do_nary(self,elm): def do_nary(self, elm):
""" """
the n-ary object the n-ary object
""" """
res = [] res = []
bo = '' bo = ""
for stag,t,e in self.process_children_list(elm): for stag, t, e in self.process_children_list(elm):
if stag == 'naryPr': if stag == "naryPr":
bo = get_val(t.chr,store=CHR_BO) bo = get_val(t.chr, store=CHR_BO)
else : else:
res.append(t) res.append(t)
return bo+BLANK.join(res) return bo + BLANK.join(res)
def do_r(self,elm): def do_r(self, elm):
""" """
Get text from 'r' element,And try convert them to latex symbols Get text from 'r' element,And try convert them to latex symbols
@todo text style support , (sty) @todo text style support , (sty)
@todo \text (latex pure text support) @todo \text (latex pure text support)
""" """
_str = [] _str = []
for s in elm.findtext('./{0}t'.format(OMML_NS)): for s in elm.findtext("./{0}t".format(OMML_NS)):
#s = s if isinstance(s,unicode) else unicode(s,'utf-8') # s = s if isinstance(s,unicode) else unicode(s,'utf-8')
_str.append(self._t_dict.get(s,s)) _str.append(self._t_dict.get(s, s))
return escape_latex(BLANK.join(_str)) return escape_latex(BLANK.join(_str))
tag2meth={ tag2meth = {
'acc' : do_acc, "acc": do_acc,
'r' : do_r, "r": do_r,
'bar' : do_bar, "bar": do_bar,
'sub' : do_sub, "sub": do_sub,
'sup' : do_sup, "sup": do_sup,
'f' : do_f, "f": do_f,
'func': do_func, "func": do_func,
'fName' : do_fname, "fName": do_fname,
'groupChr' : do_groupchr, "groupChr": do_groupchr,
'd' : do_d, "d": do_d,
'rad' : do_rad, "rad": do_rad,
'eqArr' : do_eqarr, "eqArr": do_eqarr,
'limLow' : do_limlow, "limLow": do_limlow,
'limUpp' : do_limupp, "limUpp": do_limupp,
'lim' : do_lim, "lim": do_lim,
'm' : do_m, "m": do_m,
'mr' : do_mr, "mr": do_mr,
'nary' : do_nary, "nary": do_nary,
} }

View file

@ -75,5 +75,6 @@ class DocxConverter(HtmlConverter):
style_map = kwargs.get("style_map", None) style_map = kwargs.get("style_map", None)
pre_process_stream = pre_process_docx(file_stream) pre_process_stream = pre_process_docx(file_stream)
return self._html_converter.convert_string( return self._html_converter.convert_string(
mammoth.convert_to_html(pre_process_stream, style_map=style_map).value, **kwargs mammoth.convert_to_html(pre_process_stream, style_map=style_map).value,
**kwargs,
) )

View file

@ -272,9 +272,10 @@ def test_docx_equations() -> None:
assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found" assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found"
# Find block equations wrapped with double $$ and check if they are present # Find block equations wrapped with double $$ and check if they are present
block_equations = re.findall(r'\$\$(.+?)\$\$', result.text_content) block_equations = re.findall(r"\$\$(.+?)\$\$", result.text_content)
assert block_equations, "No block equations found in the document." assert block_equations, "No block equations found in the document."
def test_input_as_strings() -> None: def test_input_as_strings() -> None:
markitdown = MarkItDown() markitdown = MarkItDown()