Source code for glideinwms.lib.xmlParse

# SPDX-FileCopyrightText: 2009 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0

# Description: general purpose XML decoder

import xml.dom.minidom

from collections import UserDict

# from collections import UserDict, OrderedDict


[docs] class CorruptXML(Exception): pass
# Dictionaries are guaranteed to maintain order starting py3.6 # missing methods (were not in the custom class): __dict__, __reversed__, move_to_end # OrderedDict from collections is not maintaining the __repr__ of a dict
[docs] class OrderedDict(dict): pass
# TODO: remove this class once tests pass. replaced by collections.OrderedDict # This Class was obtained from # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
[docs] class OrderedDict2(UserDict): def __init__(self, dict=None): self._keys = [] UserDict.__init__(self, dict) def __delitem__(self, key): UserDict.__delitem__(self, key) self._keys.remove(key) def __setitem__(self, key, item): UserDict.__setitem__(self, key, item) if key not in self._keys: self._keys.append(key) def __contains__(self, key): return key in self._keys
[docs] def clear(self): UserDict.clear(self) self._keys = []
[docs] def copy(self): dict = UserDict.copy(self) dict._keys = self._keys[:] return dict
[docs] def items(self): return list(zip(self._keys, list(self.values())))
[docs] def keys(self): return self._keys
[docs] def popitem(self): try: key = self._keys[-1] except IndexError: raise KeyError("dictionary is empty") val = self[key] del self[key] return (key, val)
[docs] def setdefault(self, key, failobj=None): UserDict.setdefault(self, key, failobj) if key not in self._keys: self._keys.append(key)
[docs] def update(self, dict): UserDict.update(self, dict) for key in list(dict.keys()): if key not in self._keys: self._keys.append(key)
[docs] def values(self): return list(map(self.get, self._keys))
# convert a XML file into a dictionary # ignore text sections
[docs] def xmlfile2dict( fname, use_ord_dict=False, always_singular_list=[] # if true, return OrderedDict instead of a regular dictionary ): # anything id listed here will be considered as a list try: doc = xml.dom.minidom.parse(fname) except xml.parsers.expat.ExpatError as e: raise CorruptXML(f"XML corrupt in file {fname}: {e}") from e data = domel2dict(doc.documentElement, use_ord_dict, always_singular_list) return data
# convert a XML string into a dictionary # ignore text sections # # Example: # <test date="1/2/07"> # <params what="xx"> # <param name="x" value="12"/> # <param name="y" value="88"/> # </params> # <files> # <file absname="/tmp/abc.txt"/> # <file absname="/tmp/w.log" mod="-rw-r--r--"/> # </files> # <temperature F="100" C="40"/> # </test> # becomes # {u'date': u'1/2/07', # u'params': {u'y': {u'value':u'88'}, # u'x': {u'value':u'12'}, # u'what': u'xx'}, # u'files': [{u'absname':u'/tmp/abc.txt'}, # {u'mod':u'-rw-r--r--',u'absname:u'/tmp/w.log'}], # u'temperature': {u'C': u'40', # u'F': u'100'} # } #
[docs] def xmlstring2dict( instr, use_ord_dict=False, always_singular_list=[] # if true, return OrderedDict instead of a regular dictionary ): # anything id listed here will be considered as a list doc = xml.dom.minidom.parseString(instr) data = domel2dict(doc.documentElement, use_ord_dict, always_singular_list) return data
######################################################## # # I N T E R N A L # # Do not use directly # ########################################################
[docs] def getXMLElements(element): basic_els = element.childNodes # look only for element nodes els = [] for el in basic_els: if el.nodeType == el.ELEMENT_NODE: els.append(el) return els
[docs] def getXMLAttributes(element, use_ord_dict): ael = element.attributes if use_ord_dict: attrs = OrderedDict() else: attrs = {} attr_len = ael.length for i in range(attr_len): attr = ael.item(i) attrs[attr.nodeName] = attr.nodeValue return attrs
[docs] def is_singular_of(mysin, myplu, always_singular_list=[]): if mysin in always_singular_list: return True if myplu[-1] != "s": # if myplu does not end in s, it is not plural return False if (mysin + "s") == myplu: # regular, like attr/attrs return True if (mysin[-1] == "s") and ((mysin + "es") == myplu): # if ending with an s, like miss/misses return True if (mysin[-1] == "y") and ((mysin[:-1] + "ies") == myplu): # if ending with an y, like entry/entries return True # else, no luck return False
[docs] def domel2dict(doc, use_ord_dict=False, always_singular_list=[]): """Recursive function transforming XML elements in a dictionary or list. If the node is unique (or it has attributes and the kids have no 'name' attribute), then a dictionary with all the attributes is returned If the element is singular of the parent (english word is analyzed): if it has a 'name' attribute or the parent has attributes, a dictionary is added to the parent (name is the key) if if has no name and the parent is empty or a list, then is added to the parent (list) :param doc: document or ELEMENT_NODE :param use_ord_dict: use ordinate dictionary if True :param always_singular_list: these are considered unique singular even if the word is singular form of a plural :return: dictionary or list with the content """ myname = doc.nodeName data = getXMLAttributes(doc, use_ord_dict) # first insert attributes # insert all the subelements els = getXMLElements(doc) for el in els: tag = el.tagName # print tag eldata = domel2dict(el, use_ord_dict, always_singular_list) if is_singular_of(tag, myname, always_singular_list): # subelements, like "param" - "params" if "name" in eldata: data[eldata["name"]] = eldata del eldata["name"] elif (data == {}) or (isinstance(data, list)): # first element, will define everything # already a list # most probably one wants a list in this case if data == {}: data = [] data.append(eldata) else: # cannot use it as a list data[tag] = eldata else: # just a regular subtree data[tag] = eldata return data