refactor: reformatted with black

This commit is contained in:
Sathindu Ganhala Arachchige 2025-03-28 16:24:27 -04:00
parent 6a66b275bb
commit 799a1caf97
4 changed files with 559 additions and 519 deletions

View file

@ -7,269 +7,267 @@ On 25/03/2025
from __future__ import unicode_literals
CHARS = ('{','}', '_', '^', '#', '&', '$', '%', '~')
CHARS = ("{", "}", "_", "^", "#", "&", "$", "%", "~")
BLANK = ''
BACKSLASH = '\\'
ALN = '&'
BLANK = ""
BACKSLASH = "\\"
ALN = "&"
CHR = {
#Unicode : Latex Math Symbols
#Top accents
'\u0300' : '\\grave{{{0}}}',
'\u0301' : '\\acute{{{0}}}',
'\u0302' : '\\hat{{{0}}}',
'\u0303' : '\\tilde{{{0}}}',
'\u0304' : '\\bar{{{0}}}',
'\u0305' : '\\overbar{{{0}}}',
'\u0306' : '\\breve{{{0}}}',
'\u0307' : '\\dot{{{0}}}',
'\u0308' : '\\ddot{{{0}}}',
'\u0309' : '\\ovhook{{{0}}}',
'\u030a' : '\\ocirc{{{0}}}}',
'\u030c' : '\\check{{{0}}}}',
'\u0310' : '\\candra{{{0}}}',
'\u0312' : '\\oturnedcomma{{{0}}}',
'\u0315' : '\\ocommatopright{{{0}}}',
'\u031a' : '\\droang{{{0}}}',
'\u0338' : '\\not{{{0}}}',
'\u20d0' : '\\leftharpoonaccent{{{0}}}',
'\u20d1' : '\\rightharpoonaccent{{{0}}}',
'\u20d2' : '\\vertoverlay{{{0}}}',
'\u20d6' : '\\overleftarrow{{{0}}}',
'\u20d7' : '\\vec{{{0}}}',
'\u20db' : '\\dddot{{{0}}}',
'\u20dc' : '\\ddddot{{{0}}}',
'\u20e1' : '\\overleftrightarrow{{{0}}}',
'\u20e7' : '\\annuity{{{0}}}',
'\u20e9' : '\\widebridgeabove{{{0}}}',
'\u20f0' : '\\asteraccent{{{0}}}',
#Bottom accents
'\u0330' : '\\wideutilde{{{0}}}',
'\u0331' : '\\underbar{{{0}}}',
'\u20e8' : '\\threeunderdot{{{0}}}',
'\u20ec' : '\\underrightharpoondown{{{0}}}',
'\u20ed' : '\\underleftharpoondown{{{0}}}',
'\u20ee' : '\\underledtarrow{{{0}}}',
'\u20ef' : '\\underrightarrow{{{0}}}',
#Over | group
'\u23b4' : '\\overbracket{{{0}}}',
'\u23dc' : '\\overparen{{{0}}}',
'\u23de' : '\\overbrace{{{0}}}',
#Under| group
'\u23b5' : '\\underbracket{{{0}}}',
'\u23dd' : '\\underparen{{{0}}}',
'\u23df' : '\\underbrace{{{0}}}',
# Unicode : Latex Math Symbols
# Top accents
"\u0300": "\\grave{{{0}}}",
"\u0301": "\\acute{{{0}}}",
"\u0302": "\\hat{{{0}}}",
"\u0303": "\\tilde{{{0}}}",
"\u0304": "\\bar{{{0}}}",
"\u0305": "\\overbar{{{0}}}",
"\u0306": "\\breve{{{0}}}",
"\u0307": "\\dot{{{0}}}",
"\u0308": "\\ddot{{{0}}}",
"\u0309": "\\ovhook{{{0}}}",
"\u030a": "\\ocirc{{{0}}}}",
"\u030c": "\\check{{{0}}}}",
"\u0310": "\\candra{{{0}}}",
"\u0312": "\\oturnedcomma{{{0}}}",
"\u0315": "\\ocommatopright{{{0}}}",
"\u031a": "\\droang{{{0}}}",
"\u0338": "\\not{{{0}}}",
"\u20d0": "\\leftharpoonaccent{{{0}}}",
"\u20d1": "\\rightharpoonaccent{{{0}}}",
"\u20d2": "\\vertoverlay{{{0}}}",
"\u20d6": "\\overleftarrow{{{0}}}",
"\u20d7": "\\vec{{{0}}}",
"\u20db": "\\dddot{{{0}}}",
"\u20dc": "\\ddddot{{{0}}}",
"\u20e1": "\\overleftrightarrow{{{0}}}",
"\u20e7": "\\annuity{{{0}}}",
"\u20e9": "\\widebridgeabove{{{0}}}",
"\u20f0": "\\asteraccent{{{0}}}",
# Bottom accents
"\u0330": "\\wideutilde{{{0}}}",
"\u0331": "\\underbar{{{0}}}",
"\u20e8": "\\threeunderdot{{{0}}}",
"\u20ec": "\\underrightharpoondown{{{0}}}",
"\u20ed": "\\underleftharpoondown{{{0}}}",
"\u20ee": "\\underledtarrow{{{0}}}",
"\u20ef": "\\underrightarrow{{{0}}}",
# Over | group
"\u23b4": "\\overbracket{{{0}}}",
"\u23dc": "\\overparen{{{0}}}",
"\u23de": "\\overbrace{{{0}}}",
# Under| group
"\u23b5": "\\underbracket{{{0}}}",
"\u23dd": "\\underparen{{{0}}}",
"\u23df": "\\underbrace{{{0}}}",
}
CHR_BO = {
#Big operators,
'\u2140' : '\\Bbbsum',
'\u220f' : '\\prod',
'\u2210' : '\\coprod',
'\u2211' : '\\sum',
'\u222b' : '\\int',
'\u22c0' : '\\bigwedge',
'\u22c1' : '\\bigvee',
'\u22c2' : '\\bigcap',
'\u22c3' : '\\bigcup',
'\u2a00' : '\\bigodot',
'\u2a01' : '\\bigoplus',
'\u2a02' : '\\bigotimes',
# Big operators,
"\u2140": "\\Bbbsum",
"\u220f": "\\prod",
"\u2210": "\\coprod",
"\u2211": "\\sum",
"\u222b": "\\int",
"\u22c0": "\\bigwedge",
"\u22c1": "\\bigvee",
"\u22c2": "\\bigcap",
"\u22c3": "\\bigcup",
"\u2a00": "\\bigodot",
"\u2a01": "\\bigoplus",
"\u2a02": "\\bigotimes",
}
T = {
'\u2192' : '\\rightarrow ',
#Greek letters
'\U0001d6fc' : '\\alpha ',
'\U0001d6fd' : '\\beta ',
'\U0001d6fe' : '\\gamma ',
'\U0001d6ff' : '\\theta ',
'\U0001d700' : '\\epsilon ',
'\U0001d701' : '\\zeta ',
'\U0001d702' : '\\eta ',
'\U0001d703' : '\\theta ',
'\U0001d704' : '\\iota ',
'\U0001d705' : '\\kappa ',
'\U0001d706' : '\\lambda ',
'\U0001d707' : '\\m ',
'\U0001d708' : '\\n ',
'\U0001d709' : '\\xi ',
'\U0001d70a' : '\\omicron ',
'\U0001d70b' : '\\pi ',
'\U0001d70c' : '\\rho ',
'\U0001d70d' : '\\varsigma ',
'\U0001d70e' : '\\sigma ',
'\U0001d70f' : '\\ta ',
'\U0001d710' : '\\upsilon ',
'\U0001d711' : '\\phi ',
'\U0001d712' : '\\chi ',
'\U0001d713' : '\\psi ',
'\U0001d714' : '\\omega ',
'\U0001d715' : '\\partial ',
'\U0001d716' : '\\varepsilon ',
'\U0001d717' : '\\vartheta ',
'\U0001d718' : '\\varkappa ',
'\U0001d719' : '\\varphi ',
'\U0001d71a' : '\\varrho ',
'\U0001d71b' : '\\varpi ',
#Relation symbols
'\u2190' : '\\leftarrow ',
'\u2191' : '\\uparrow ',
'\u2192' : '\\rightarrow ',
'\u2193' : '\\downright ',
'\u2194' : '\\leftrightarrow ',
'\u2195' : '\\updownarrow ',
'\u2196' : '\\nwarrow ',
'\u2197' : '\\nearrow ',
'\u2198' : '\\searrow ',
'\u2199' : '\\swarrow ',
'\u22ee' : '\\vdots ',
'\u22ef' : '\\cdots ',
'\u22f0' : '\\adots ',
'\u22f1' : '\\ddots ',
'\u2260' : '\\ne ',
'\u2264' : '\\leq ',
'\u2265' : '\\geq ',
'\u2266' : '\\leqq ',
'\u2267' : '\\geqq ',
'\u2268' : '\\lneqq ',
'\u2269' : '\\gneqq ',
'\u226a' : '\\ll ',
'\u226b' : '\\gg ',
'\u2208' : '\\in ',
'\u2209' : '\\notin ',
'\u220b' : '\\ni ',
'\u220c' : '\\nni ',
#Ordinary symbols
'\u221e' : '\\infty ',
#Binary relations
'\u00b1' : '\\pm ',
'\u2213' : '\\mp ',
#Italic, Latin, uppercase
'\U0001d434' : 'A',
'\U0001d435' : 'B',
'\U0001d436' : 'C',
'\U0001d437' : 'D',
'\U0001d438' : 'E',
'\U0001d439' : 'F',
'\U0001d43a' : 'G',
'\U0001d43b' : 'H',
'\U0001d43c' : 'I',
'\U0001d43d' : 'J',
'\U0001d43e' : 'K',
'\U0001d43f' : 'L',
'\U0001d440' : 'M',
'\U0001d441' : 'N',
'\U0001d442' : 'O',
'\U0001d443' : 'P',
'\U0001d444' : 'Q',
'\U0001d445' : 'R',
'\U0001d446' : 'S',
'\U0001d447' : 'T',
'\U0001d448' : 'U',
'\U0001d449' : 'V',
'\U0001d44a' : 'W',
'\U0001d44b' : 'X',
'\U0001d44c' : 'Y',
'\U0001d44d' : 'Z',
#Italic, Latin, lowercase
'\U0001d44e' : 'a',
'\U0001d44f' : 'b',
'\U0001d450' : 'c',
'\U0001d451' : 'd',
'\U0001d452' : 'e',
'\U0001d453' : 'f',
'\U0001d454' : 'g',
'\U0001d456' : 'i',
'\U0001d457' : 'j',
'\U0001d458' : 'k',
'\U0001d459' : 'l',
'\U0001d45a' : 'm',
'\U0001d45b' : 'n',
'\U0001d45c' : 'o',
'\U0001d45d' : 'p',
'\U0001d45e' : 'q',
'\U0001d45f' : 'r',
'\U0001d460' : 's',
'\U0001d461' : 't',
'\U0001d462' : 'u',
'\U0001d463' : 'v',
'\U0001d464' : 'w',
'\U0001d465' : 'x',
'\U0001d466' : 'y',
'\U0001d467' : 'z',
"\u2192": "\\rightarrow ",
# Greek letters
"\U0001d6fc": "\\alpha ",
"\U0001d6fd": "\\beta ",
"\U0001d6fe": "\\gamma ",
"\U0001d6ff": "\\theta ",
"\U0001d700": "\\epsilon ",
"\U0001d701": "\\zeta ",
"\U0001d702": "\\eta ",
"\U0001d703": "\\theta ",
"\U0001d704": "\\iota ",
"\U0001d705": "\\kappa ",
"\U0001d706": "\\lambda ",
"\U0001d707": "\\m ",
"\U0001d708": "\\n ",
"\U0001d709": "\\xi ",
"\U0001d70a": "\\omicron ",
"\U0001d70b": "\\pi ",
"\U0001d70c": "\\rho ",
"\U0001d70d": "\\varsigma ",
"\U0001d70e": "\\sigma ",
"\U0001d70f": "\\ta ",
"\U0001d710": "\\upsilon ",
"\U0001d711": "\\phi ",
"\U0001d712": "\\chi ",
"\U0001d713": "\\psi ",
"\U0001d714": "\\omega ",
"\U0001d715": "\\partial ",
"\U0001d716": "\\varepsilon ",
"\U0001d717": "\\vartheta ",
"\U0001d718": "\\varkappa ",
"\U0001d719": "\\varphi ",
"\U0001d71a": "\\varrho ",
"\U0001d71b": "\\varpi ",
# Relation symbols
"\u2190": "\\leftarrow ",
"\u2191": "\\uparrow ",
"\u2192": "\\rightarrow ",
"\u2193": "\\downright ",
"\u2194": "\\leftrightarrow ",
"\u2195": "\\updownarrow ",
"\u2196": "\\nwarrow ",
"\u2197": "\\nearrow ",
"\u2198": "\\searrow ",
"\u2199": "\\swarrow ",
"\u22ee": "\\vdots ",
"\u22ef": "\\cdots ",
"\u22f0": "\\adots ",
"\u22f1": "\\ddots ",
"\u2260": "\\ne ",
"\u2264": "\\leq ",
"\u2265": "\\geq ",
"\u2266": "\\leqq ",
"\u2267": "\\geqq ",
"\u2268": "\\lneqq ",
"\u2269": "\\gneqq ",
"\u226a": "\\ll ",
"\u226b": "\\gg ",
"\u2208": "\\in ",
"\u2209": "\\notin ",
"\u220b": "\\ni ",
"\u220c": "\\nni ",
# Ordinary symbols
"\u221e": "\\infty ",
# Binary relations
"\u00b1": "\\pm ",
"\u2213": "\\mp ",
# Italic, Latin, uppercase
"\U0001d434": "A",
"\U0001d435": "B",
"\U0001d436": "C",
"\U0001d437": "D",
"\U0001d438": "E",
"\U0001d439": "F",
"\U0001d43a": "G",
"\U0001d43b": "H",
"\U0001d43c": "I",
"\U0001d43d": "J",
"\U0001d43e": "K",
"\U0001d43f": "L",
"\U0001d440": "M",
"\U0001d441": "N",
"\U0001d442": "O",
"\U0001d443": "P",
"\U0001d444": "Q",
"\U0001d445": "R",
"\U0001d446": "S",
"\U0001d447": "T",
"\U0001d448": "U",
"\U0001d449": "V",
"\U0001d44a": "W",
"\U0001d44b": "X",
"\U0001d44c": "Y",
"\U0001d44d": "Z",
# Italic, Latin, lowercase
"\U0001d44e": "a",
"\U0001d44f": "b",
"\U0001d450": "c",
"\U0001d451": "d",
"\U0001d452": "e",
"\U0001d453": "f",
"\U0001d454": "g",
"\U0001d456": "i",
"\U0001d457": "j",
"\U0001d458": "k",
"\U0001d459": "l",
"\U0001d45a": "m",
"\U0001d45b": "n",
"\U0001d45c": "o",
"\U0001d45d": "p",
"\U0001d45e": "q",
"\U0001d45f": "r",
"\U0001d460": "s",
"\U0001d461": "t",
"\U0001d462": "u",
"\U0001d463": "v",
"\U0001d464": "w",
"\U0001d465": "x",
"\U0001d466": "y",
"\U0001d467": "z",
}
FUNC ={
'sin' : '\\sin({fe})',
'cos' : '\\cos({fe})',
'tan' : '\\tan({fe})',
'arcsin' : '\\arcsin({fe})',
'arccos' : '\\arccos({fe})',
'arctan' : '\\arctan({fe})',
'arccot' : '\\arccot({fe})',
'sinh' : '\\sinh({fe})',
'cosh' : '\\cosh({fe})',
'tanh' : '\\tanh({fe})',
'coth' : '\\coth({fe})',
'sec' : '\\sec({fe})',
'csc' : '\\csc({fe})',
FUNC = {
"sin": "\\sin({fe})",
"cos": "\\cos({fe})",
"tan": "\\tan({fe})",
"arcsin": "\\arcsin({fe})",
"arccos": "\\arccos({fe})",
"arctan": "\\arctan({fe})",
"arccot": "\\arccot({fe})",
"sinh": "\\sinh({fe})",
"cosh": "\\cosh({fe})",
"tanh": "\\tanh({fe})",
"coth": "\\coth({fe})",
"sec": "\\sec({fe})",
"csc": "\\csc({fe})",
}
FUNC_PLACE = '{fe}'
FUNC_PLACE = "{fe}"
BRK = '\\\\'
BRK = "\\\\"
CHR_DEFAULT = {
'ACC_VAL':'\\hat{{{0}}}',
"ACC_VAL": "\\hat{{{0}}}",
}
POS = {
'top' : '\\overline{{{0}}}', # not sure
'bot' : '\\underline{{{0}}}',
"top": "\\overline{{{0}}}", # not sure
"bot": "\\underline{{{0}}}",
}
POS_DEFAULT = {
'BAR_VAL': '\\overline{{{0}}}',
"BAR_VAL": "\\overline{{{0}}}",
}
SUB = '_{{{0}}}'
SUB = "_{{{0}}}"
SUP = '^{{{0}}}'
SUP = "^{{{0}}}"
F = {
'bar': '\\frac{{{num}}}{{{den}}}',
'skw': r'^{{{num}}}/_{{{den}}}',
'noBar': '\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}',
'lin' : '{{{num}}}/{{{den}}}',
"bar": "\\frac{{{num}}}{{{den}}}",
"skw": r"^{{{num}}}/_{{{den}}}",
"noBar": "\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}",
"lin": "{{{num}}}/{{{den}}}",
}
F_DEFAULT = '\\frac{{{num}}}{{{den}}}'
F_DEFAULT = "\\frac{{{num}}}{{{den}}}"
D = '\\left{left}{text}\\right{right}'
D = "\\left{left}{text}\\right{right}"
D_DEFAULT = {
'left':'(',
'right':')',
'null':'.',
"left": "(",
"right": ")",
"null": ".",
}
RAD = '\\sqrt[{deg}]{{{text}}}'
RAD = "\\sqrt[{deg}]{{{text}}}"
RAD_DEFAULT = '\\sqrt{{{text}}}'
RAD_DEFAULT = "\\sqrt{{{text}}}"
ARR = '\\begin{{array}}{{c}}{text}\end{{array}}'
ARR = "\\begin{{array}}{{c}}{text}\end{{array}}"
LIM_FUNC = {
'lim':'\\lim_{{{lim}}}',
'max':'\\max_{{{lim}}}',
'min':'\\min_{{{lim}}}',
"lim": "\\lim_{{{lim}}}",
"max": "\\max_{{{lim}}}",
"min": "\\min_{{{lim}}}",
}
LIM_TO = ('\\rightarrow','\\to')
LIM_TO = ("\\rightarrow", "\\to")
LIM_UPP = '\\overset{{{lim}}}{{{text}}}'
LIM_UPP = "\\overset{{{lim}}}{{{text}}}"
M = '\\begin{{matrix}}{text}\end{{matrix}}'
M = "\\begin{{matrix}}{text}\end{{matrix}}"

View file

@ -8,355 +8,395 @@ On 25/03/2025
import xml.etree.ElementTree as ET
from .latex_dict import (CHARS, CHR, CHR_BO, CHR_DEFAULT, POS, POS_DEFAULT
, SUB, SUP, F, F_DEFAULT, T, FUNC, D, D_DEFAULT, RAD, RAD_DEFAULT, ARR
, LIM_FUNC, LIM_TO, LIM_UPP, M, BRK, BLANK, BACKSLASH, ALN, FUNC_PLACE)
from .latex_dict import (
CHARS,
CHR,
CHR_BO,
CHR_DEFAULT,
POS,
POS_DEFAULT,
SUB,
SUP,
F,
F_DEFAULT,
T,
FUNC,
D,
D_DEFAULT,
RAD,
RAD_DEFAULT,
ARR,
LIM_FUNC,
LIM_TO,
LIM_UPP,
M,
BRK,
BLANK,
BACKSLASH,
ALN,
FUNC_PLACE,
)
OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}"
def load(stream):
tree = ET.parse(stream)
for omath in tree.findall(OMML_NS+'oMath'):
yield oMath2Latex(omath)
tree = ET.parse(stream)
for omath in tree.findall(OMML_NS + "oMath"):
yield oMath2Latex(omath)
def load_string(string):
root = ET.fromstring(string)
for omath in root.findall(OMML_NS+'oMath'):
yield oMath2Latex(omath)
root = ET.fromstring(string)
for omath in root.findall(OMML_NS + "oMath"):
yield oMath2Latex(omath)
def escape_latex(strs):
last = None
new_chr = []
strs = strs.replace(r'\\','\\')
for c in strs :
if (c in CHARS) and (last !=BACKSLASH):
new_chr.append(BACKSLASH+c)
else:
new_chr.append(c)
last = c
return BLANK.join(new_chr)
last = None
new_chr = []
strs = strs.replace(r"\\", "\\")
for c in strs:
if (c in CHARS) and (last != BACKSLASH):
new_chr.append(BACKSLASH + c)
else:
new_chr.append(c)
last = c
return BLANK.join(new_chr)
def get_val(key,default=None,store=CHR):
if key is not None:
return key if not store else store.get(key,key)
else:
return default
def get_val(key, default=None, store=CHR):
if key is not None:
return key if not store else store.get(key, key)
else:
return default
class Tag2Method(object):
def call_method(self,elm,stag=None):
getmethod = self.tag2meth.get
if stag is None:
stag = elm.tag.replace(OMML_NS,'')
method = getmethod(stag)
if method:
return method(self,elm)
else:
return None
def call_method(self, elm, stag=None):
getmethod = self.tag2meth.get
if stag is None:
stag = elm.tag.replace(OMML_NS, "")
method = getmethod(stag)
if method:
return method(self, elm)
else:
return None
def process_children_list(self,elm,include=None):
"""
process children of the elm,return iterable
"""
for _e in list(elm):
if (OMML_NS not in _e.tag):
continue
stag = _e.tag.replace(OMML_NS,'')
if include and (stag not in include):
continue
t = self.call_method(_e,stag=stag)
if t is None:
t = self.process_unknow(_e,stag)
if t is None:
continue
yield (stag,t,_e)
def process_children_list(self, elm, include=None):
"""
process children of the elm,return iterable
"""
for _e in list(elm):
if OMML_NS not in _e.tag:
continue
stag = _e.tag.replace(OMML_NS, "")
if include and (stag not in include):
continue
t = self.call_method(_e, stag=stag)
if t is None:
t = self.process_unknow(_e, stag)
if t is None:
continue
yield (stag, t, _e)
def process_children_dict(self,elm,include=None):
"""
process children of the elm,return dict
"""
latex_chars = dict()
for stag,t,e in self.process_children_list(elm,include):
latex_chars[stag] = t
return latex_chars
def process_children_dict(self, elm, include=None):
"""
process children of the elm,return dict
"""
latex_chars = dict()
for stag, t, e in self.process_children_list(elm, include):
latex_chars[stag] = t
return latex_chars
def process_children(self,elm,include=None):
"""
process children of the elm,return string
"""
return BLANK.join(( t if not isinstance(t,Tag2Method) else str(t)
for stag,t,e in self.process_children_list(elm,include)))
def process_children(self, elm, include=None):
"""
process children of the elm,return string
"""
return BLANK.join(
(
t if not isinstance(t, Tag2Method) else str(t)
for stag, t, e in self.process_children_list(elm, include)
)
)
def process_unknow(self,elm,stag):
return None
def process_unknow(self, elm, stag):
return None
class Pr(Tag2Method):
text = ''
text = ""
__val_tags = ('chr','pos','begChr','endChr','type')
__val_tags = ("chr", "pos", "begChr", "endChr", "type")
__innerdict= None #can't use the __dict__
__innerdict = None # can't use the __dict__
""" common properties of element"""
def __init__(self, elm):
self.__innerdict={}
self.text=self.process_children(elm)
""" common properties of element"""
def __str__(self):
return self.text
def __init__(self, elm):
self.__innerdict = {}
self.text = self.process_children(elm)
def __unicode__(self):
return self.__str__(self)
def __str__(self):
return self.text
def __getattr__(self,name):
return self.__innerdict.get(name,None)
def __unicode__(self):
return self.__str__(self)
def do_brk(self,elm):
self.__innerdict['brk'] = BRK
return BRK
def __getattr__(self, name):
return self.__innerdict.get(name, None)
def do_common(self,elm):
stag = elm.tag.replace(OMML_NS,'')
if stag in self.__val_tags:
t = elm.get('{0}val'.format(OMML_NS))
self.__innerdict[stag] = t
return None
def do_brk(self, elm):
self.__innerdict["brk"] = BRK
return BRK
tag2meth = {
'brk':do_brk,
'chr':do_common,
'pos':do_common,
'begChr':do_common,
'endChr':do_common,
'type':do_common,
}
def do_common(self, elm):
stag = elm.tag.replace(OMML_NS, "")
if stag in self.__val_tags:
t = elm.get("{0}val".format(OMML_NS))
self.__innerdict[stag] = t
return None
tag2meth = {
"brk": do_brk,
"chr": do_common,
"pos": do_common,
"begChr": do_common,
"endChr": do_common,
"type": do_common,
}
class oMath2Latex(Tag2Method):
"""
Convert oMath element of omml to latex
"""
_t_dict = T
"""
Convert oMath element of omml to latex
"""
__direct_tags = ('box','sSub','sSup','sSubSup','num','den','deg','e')
_t_dict = T
def __init__(self, element):
self._latex = self.process_children(element)
__direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e")
def __str__(self):
return self.latex
def __init__(self, element):
self._latex = self.process_children(element)
def __unicode__(self):
return self.__str__(self)
def __str__(self):
return self.latex
def process_unknow(self,elm,stag):
if stag in self.__direct_tags:
return self.process_children(elm)
elif stag[-2:] == 'Pr':
return Pr(elm)
else:
return None
def __unicode__(self):
return self.__str__(self)
@property
def latex(self):
return self._latex
def process_unknow(self, elm, stag):
if stag in self.__direct_tags:
return self.process_children(elm)
elif stag[-2:] == "Pr":
return Pr(elm)
else:
return None
def do_acc(self,elm):
"""
the accent function
"""
c_dict = self.process_children_dict(elm)
latex_s = get_val(c_dict['accPr'].chr,default=CHR_DEFAULT.get('ACC_VAL'),store=CHR)
return latex_s.format(c_dict['e'])
@property
def latex(self):
return self._latex
def do_bar(self,elm):
"""
the bar function
"""
c_dict = self.process_children_dict(elm)
pr = c_dict['barPr']
latex_s = get_val(pr.pos,default=POS_DEFAULT.get('BAR_VAL'),store=POS)
return pr.text+latex_s.format(c_dict['e'])
def do_acc(self, elm):
"""
the accent function
"""
c_dict = self.process_children_dict(elm)
latex_s = get_val(
c_dict["accPr"].chr, default=CHR_DEFAULT.get("ACC_VAL"), store=CHR
)
return latex_s.format(c_dict["e"])
def do_d(self,elm):
"""
the delimiter object
"""
c_dict = self.process_children_dict(elm)
pr = c_dict['dPr']
null = D_DEFAULT.get('null')
s_val = get_val(pr.begChr,default=D_DEFAULT.get('left'),store=T)
e_val = get_val(pr.endChr,default=D_DEFAULT.get('right'),store=T)
return pr.text+D.format(left= null if not s_val else escape_latex(s_val),
text=c_dict['e'],
right= null if not e_val else escape_latex(e_val))
def do_bar(self, elm):
"""
the bar function
"""
c_dict = self.process_children_dict(elm)
pr = c_dict["barPr"]
latex_s = get_val(pr.pos, default=POS_DEFAULT.get("BAR_VAL"), store=POS)
return pr.text + latex_s.format(c_dict["e"])
def do_d(self, elm):
"""
the delimiter object
"""
c_dict = self.process_children_dict(elm)
pr = c_dict["dPr"]
null = D_DEFAULT.get("null")
s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T)
e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T)
return pr.text + D.format(
left=null if not s_val else escape_latex(s_val),
text=c_dict["e"],
right=null if not e_val else escape_latex(e_val),
)
def do_spre(self,elm):
"""
the Pre-Sub-Superscript object -- Not support yet
"""
pass
def do_spre(self, elm):
"""
the Pre-Sub-Superscript object -- Not support yet
"""
pass
def do_sub(self,elm):
text = self.process_children(elm)
return SUB.format(text)
def do_sub(self, elm):
text = self.process_children(elm)
return SUB.format(text)
def do_sup(self,elm):
text = self.process_children(elm)
return SUP.format(text)
def do_sup(self, elm):
text = self.process_children(elm)
return SUP.format(text)
def do_f(self,elm):
"""
the fraction object
"""
c_dict = self.process_children_dict(elm)
pr = c_dict['fPr']
latex_s = get_val(pr.type,default=F_DEFAULT,store=F)
return pr.text+latex_s.format(num=c_dict.get('num'),den=c_dict.get('den'))
def do_f(self, elm):
"""
the fraction object
"""
c_dict = self.process_children_dict(elm)
pr = c_dict["fPr"]
latex_s = get_val(pr.type, default=F_DEFAULT, store=F)
return pr.text + latex_s.format(num=c_dict.get("num"), den=c_dict.get("den"))
def do_func(self,elm):
"""
the Function-Apply object (Examples:sin cos)
"""
c_dict = self.process_children_dict(elm)
func_name = c_dict.get('fName')
return func_name.replace(FUNC_PLACE,c_dict.get('e'))
def do_func(self, elm):
"""
the Function-Apply object (Examples:sin cos)
"""
c_dict = self.process_children_dict(elm)
func_name = c_dict.get("fName")
return func_name.replace(FUNC_PLACE, c_dict.get("e"))
def do_fname(self,elm):
"""
the func name
"""
latex_chars = []
for stag,t,e in self.process_children_list(elm):
if stag == 'r':
if FUNC.get(t):
latex_chars.append(FUNC[t])
else :
raise NotImplemented("Not support func %s" % t)
else:
latex_chars.append(t)
t = BLANK.join(latex_chars)
return t if FUNC_PLACE in t else t+FUNC_PLACE #do_func will replace this
def do_fname(self, elm):
"""
the func name
"""
latex_chars = []
for stag, t, e in self.process_children_list(elm):
if stag == "r":
if FUNC.get(t):
latex_chars.append(FUNC[t])
else:
raise NotImplemented("Not support func %s" % t)
else:
latex_chars.append(t)
t = BLANK.join(latex_chars)
return t if FUNC_PLACE in t else t + FUNC_PLACE # do_func will replace this
def do_groupchr(self,elm):
"""
the Group-Character object
"""
c_dict = self.process_children_dict(elm)
pr = c_dict['groupChrPr']
latex_s = get_val(pr.chr)
return pr.text+latex_s.format(c_dict['e'])
def do_groupchr(self, elm):
"""
the Group-Character object
"""
c_dict = self.process_children_dict(elm)
pr = c_dict["groupChrPr"]
latex_s = get_val(pr.chr)
return pr.text + latex_s.format(c_dict["e"])
def do_rad(self,elm):
"""
the radical object
"""
c_dict = self.process_children_dict(elm)
text = c_dict.get('e')
deg_text = c_dict.get('deg')
if deg_text:
return RAD.format(deg=deg_text,text=text)
else:
return RAD_DEFAULT.format(text=text)
def do_eqarr(self,elm):
"""
the Array object
"""
return ARR.format(text=BRK.join(
[t for stag,t,e in self.process_children_list(elm,include=('e',))]))
def do_rad(self, elm):
"""
the radical object
"""
c_dict = self.process_children_dict(elm)
text = c_dict.get("e")
deg_text = c_dict.get("deg")
if deg_text:
return RAD.format(deg=deg_text, text=text)
else:
return RAD_DEFAULT.format(text=text)
def do_eqarr(self, elm):
"""
the Array object
"""
return ARR.format(
text=BRK.join(
[t for stag, t, e in self.process_children_list(elm, include=("e",))]
)
)
def do_limlow(self,elm):
"""
the Lower-Limit object
"""
t_dict = self.process_children_dict(elm,include=('e','lim'))
latex_s = LIM_FUNC.get(t_dict['e'])
if not latex_s :
raise NotImplemented("Not support lim %s" % t_dict['e'])
else:
return latex_s.format(lim=t_dict.get('lim'))
def do_limlow(self, elm):
"""
the Lower-Limit object
"""
t_dict = self.process_children_dict(elm, include=("e", "lim"))
latex_s = LIM_FUNC.get(t_dict["e"])
if not latex_s:
raise NotImplemented("Not support lim %s" % t_dict["e"])
else:
return latex_s.format(lim=t_dict.get("lim"))
def do_limupp(self,elm):
"""
the Upper-Limit object
"""
t_dict = self.process_children_dict(elm,include=('e','lim'))
return LIM_UPP.format(lim=t_dict.get('lim'),text=t_dict.get('e'))
def do_limupp(self, elm):
"""
the Upper-Limit object
"""
t_dict = self.process_children_dict(elm, include=("e", "lim"))
return LIM_UPP.format(lim=t_dict.get("lim"), text=t_dict.get("e"))
def do_lim(self,elm):
"""
the lower limit of the limLow object and the upper limit of the limUpp function
"""
return self.process_children(elm).replace(LIM_TO[0],LIM_TO[1])
def do_m(self,elm):
"""
the Matrix object
"""
rows = []
for stag,t,e in self.process_children_list(elm):
if stag is 'mPr':
pass
elif stag == 'mr':
rows.append(t)
return M.format(text=BRK.join(rows))
def do_lim(self, elm):
"""
the lower limit of the limLow object and the upper limit of the limUpp function
"""
return self.process_children(elm).replace(LIM_TO[0], LIM_TO[1])
def do_mr(self,elm):
"""
a single row of the matrix m
"""
return ALN.join(
[t for stag,t,e in self.process_children_list(elm,include=('e',))])
def do_m(self, elm):
"""
the Matrix object
"""
rows = []
for stag, t, e in self.process_children_list(elm):
if stag is "mPr":
pass
elif stag == "mr":
rows.append(t)
return M.format(text=BRK.join(rows))
def do_nary(self,elm):
"""
the n-ary object
"""
res = []
bo = ''
for stag,t,e in self.process_children_list(elm):
if stag == 'naryPr':
bo = get_val(t.chr,store=CHR_BO)
else :
res.append(t)
return bo+BLANK.join(res)
def do_mr(self, elm):
"""
a single row of the matrix m
"""
return ALN.join(
[t for stag, t, e in self.process_children_list(elm, include=("e",))]
)
def do_r(self,elm):
"""
Get text from 'r' element,And try convert them to latex symbols
@todo text style support , (sty)
@todo \text (latex pure text support)
"""
_str = []
for s in elm.findtext('./{0}t'.format(OMML_NS)):
#s = s if isinstance(s,unicode) else unicode(s,'utf-8')
_str.append(self._t_dict.get(s,s))
return escape_latex(BLANK.join(_str))
def do_nary(self, elm):
"""
the n-ary object
"""
res = []
bo = ""
for stag, t, e in self.process_children_list(elm):
if stag == "naryPr":
bo = get_val(t.chr, store=CHR_BO)
else:
res.append(t)
return bo + BLANK.join(res)
tag2meth={
'acc' : do_acc,
'r' : do_r,
'bar' : do_bar,
'sub' : do_sub,
'sup' : do_sup,
'f' : do_f,
'func': do_func,
'fName' : do_fname,
'groupChr' : do_groupchr,
'd' : do_d,
'rad' : do_rad,
'eqArr' : do_eqarr,
'limLow' : do_limlow,
'limUpp' : do_limupp,
'lim' : do_lim,
'm' : do_m,
'mr' : do_mr,
'nary' : do_nary,
}
def do_r(self, elm):
"""
Get text from 'r' element,And try convert them to latex symbols
@todo text style support , (sty)
@todo \text (latex pure text support)
"""
_str = []
for s in elm.findtext("./{0}t".format(OMML_NS)):
# s = s if isinstance(s,unicode) else unicode(s,'utf-8')
_str.append(self._t_dict.get(s, s))
return escape_latex(BLANK.join(_str))
tag2meth = {
"acc": do_acc,
"r": do_r,
"bar": do_bar,
"sub": do_sub,
"sup": do_sup,
"f": do_f,
"func": do_func,
"fName": do_fname,
"groupChr": do_groupchr,
"d": do_d,
"rad": do_rad,
"eqArr": do_eqarr,
"limLow": do_limlow,
"limUpp": do_limupp,
"lim": do_lim,
"m": do_m,
"mr": do_mr,
"nary": do_nary,
}

View file

@ -75,5 +75,6 @@ class DocxConverter(HtmlConverter):
style_map = kwargs.get("style_map", None)
pre_process_stream = pre_process_docx(file_stream)
return self._html_converter.convert_string(
mammoth.convert_to_html(pre_process_stream, style_map=style_map).value, **kwargs
mammoth.convert_to_html(pre_process_stream, style_map=style_map).value,
**kwargs,
)

View file

@ -272,9 +272,10 @@ def test_docx_equations() -> None:
assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found"
# Find block equations wrapped with double $$ and check if they are present
block_equations = re.findall(r'\$\$(.+?)\$\$', result.text_content)
block_equations = re.findall(r"\$\$(.+?)\$\$", result.text_content)
assert block_equations, "No block equations found in the document."
def test_input_as_strings() -> None:
markitdown = MarkItDown()