# SPDX-FileCopyrightText: 2009 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0
# Description: general purpose XML decoder
import xml.dom.minidom
from collections import UserDict
# from collections import UserDict, OrderedDict
[docs]
class CorruptXML(Exception):
pass
# Dictionaries are guaranteed to maintain order starting py3.6
# missing methods (were not in the custom class): __dict__, __reversed__, move_to_end
# OrderedDict from collections is not maintaining the __repr__ of a dict
[docs]
class OrderedDict(dict):
pass
# TODO: remove this class once tests pass. replaced by collections.OrderedDict
# This Class was obtained from
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
[docs]
class OrderedDict2(UserDict):
def __init__(self, dict=None):
self._keys = []
UserDict.__init__(self, dict)
def __delitem__(self, key):
UserDict.__delitem__(self, key)
self._keys.remove(key)
def __setitem__(self, key, item):
UserDict.__setitem__(self, key, item)
if key not in self._keys:
self._keys.append(key)
def __contains__(self, key):
return key in self._keys
[docs]
def clear(self):
UserDict.clear(self)
self._keys = []
[docs]
def copy(self):
dict = UserDict.copy(self)
dict._keys = self._keys[:]
return dict
[docs]
def items(self):
return list(zip(self._keys, list(self.values())))
[docs]
def keys(self):
return self._keys
[docs]
def popitem(self):
try:
key = self._keys[-1]
except IndexError:
raise KeyError("dictionary is empty")
val = self[key]
del self[key]
return (key, val)
[docs]
def setdefault(self, key, failobj=None):
UserDict.setdefault(self, key, failobj)
if key not in self._keys:
self._keys.append(key)
[docs]
def update(self, dict):
UserDict.update(self, dict)
for key in list(dict.keys()):
if key not in self._keys:
self._keys.append(key)
[docs]
def values(self):
return list(map(self.get, self._keys))
# convert a XML file into a dictionary
# ignore text sections
[docs]
def xmlfile2dict(
fname, use_ord_dict=False, always_singular_list=[] # if true, return OrderedDict instead of a regular dictionary
): # anything id listed here will be considered as a list
try:
doc = xml.dom.minidom.parse(fname)
except xml.parsers.expat.ExpatError as e:
raise CorruptXML(f"XML corrupt in file {fname}: {e}") from e
data = domel2dict(doc.documentElement, use_ord_dict, always_singular_list)
return data
# convert a XML string into a dictionary
# ignore text sections
#
# Example:
# <test date="1/2/07">
# <params what="xx">
# <param name="x" value="12"/>
# <param name="y" value="88"/>
# </params>
# <files>
# <file absname="/tmp/abc.txt"/>
# <file absname="/tmp/w.log" mod="-rw-r--r--"/>
# </files>
# <temperature F="100" C="40"/>
# </test>
# becomes
# {u'date': u'1/2/07',
# u'params': {u'y': {u'value':u'88'},
# u'x': {u'value':u'12'},
# u'what': u'xx'},
# u'files': [{u'absname':u'/tmp/abc.txt'},
# {u'mod':u'-rw-r--r--',u'absname:u'/tmp/w.log'}],
# u'temperature': {u'C': u'40',
# u'F': u'100'}
# }
#
[docs]
def xmlstring2dict(
instr, use_ord_dict=False, always_singular_list=[] # if true, return OrderedDict instead of a regular dictionary
): # anything id listed here will be considered as a list
doc = xml.dom.minidom.parseString(instr)
data = domel2dict(doc.documentElement, use_ord_dict, always_singular_list)
return data
########################################################
#
# I N T E R N A L
#
# Do not use directly
#
########################################################
[docs]
def getXMLElements(element):
basic_els = element.childNodes
# look only for element nodes
els = []
for el in basic_els:
if el.nodeType == el.ELEMENT_NODE:
els.append(el)
return els
[docs]
def getXMLAttributes(element, use_ord_dict):
ael = element.attributes
if use_ord_dict:
attrs = OrderedDict()
else:
attrs = {}
attr_len = ael.length
for i in range(attr_len):
attr = ael.item(i)
attrs[attr.nodeName] = attr.nodeValue
return attrs
[docs]
def is_singular_of(mysin, myplu, always_singular_list=[]):
if mysin in always_singular_list:
return True
if myplu[-1] != "s":
# if myplu does not end in s, it is not plural
return False
if (mysin + "s") == myplu:
# regular, like attr/attrs
return True
if (mysin[-1] == "s") and ((mysin + "es") == myplu):
# if ending with an s, like miss/misses
return True
if (mysin[-1] == "y") and ((mysin[:-1] + "ies") == myplu):
# if ending with an y, like entry/entries
return True
# else, no luck
return False
[docs]
def domel2dict(doc, use_ord_dict=False, always_singular_list=[]):
"""Recursive function transforming XML elements in a dictionary or list.
If the node is unique (or it has attributes and the kids have no 'name' attribute),
then a dictionary with all the attributes is returned
If the element is singular of the parent (english word is analyzed):
if it has a 'name' attribute or the parent has attributes, a dictionary is added to the parent (name is the key)
if if has no name and the parent is empty or a list, then is added to the parent (list)
:param doc: document or ELEMENT_NODE
:param use_ord_dict: use ordinate dictionary if True
:param always_singular_list: these are considered unique singular even if the word is singular form of a plural
:return: dictionary or list with the content
"""
myname = doc.nodeName
data = getXMLAttributes(doc, use_ord_dict) # first insert attributes
# insert all the subelements
els = getXMLElements(doc)
for el in els:
tag = el.tagName
# print tag
eldata = domel2dict(el, use_ord_dict, always_singular_list)
if is_singular_of(tag, myname, always_singular_list):
# subelements, like "param" - "params"
if "name" in eldata:
data[eldata["name"]] = eldata
del eldata["name"]
elif (data == {}) or (isinstance(data, list)): # first element, will define everything # already a list
# most probably one wants a list in this case
if data == {}:
data = []
data.append(eldata)
else:
# cannot use it as a list
data[tag] = eldata
else:
# just a regular subtree
data[tag] = eldata
return data