"""Store Python objects to (pickle-like) XML Documents Note 0: See http://gnosis.cx/publish/programming/xml_matters_1.txt for a detailed discussion of this module. Note 1: The XML-SIG distribution is changed fairly frequently while it is in beta versions. The changes in turn are extremely likely to affect the functioning of [xml_pickle]. This version of [xml_pickle] is known to work with PyXML 0.6.1, and will probably continue to work with later betas and also with the XML-SIG distribution included in Python 2.0. To download a current version of PyXML, go to: http://download.sourceforge.net/pyxml/ Version 0.22 of [xml_pickle] was designed around PyXML 0.5.2. If you use an older version of PyXML, you may download a compatible version of [xml_pickle] at: http://gnosis.cx/download/xml_pickle-0.22.py And a known-to-be-compatible PyXML distribution at: http://gnosis.cx/download/py_xml_04-21-00.exe http://gnosis.cx/download/py_xml_04-21-00.zip The first URL is the Windows self-installer, the latter is simply an archive of those files to be unpacked under $PYTHONPATH/xml. Usage: # By inheritence from xml_pickle import XML_Pickler class MyClass(XML_Pickler): # create some behavior and attributes for MyClass... o1 = MyClass() xml_str = o.dumps() o2 = MyClass() o2.loads(xml_str) # With inline instantiation from xml_pickle import XML_Pickler o1 = DataClass() # ...assign attribute values to o1... xml_str = XML_Pickler(o1).dumps() o2 = XML_Pickler().loads(xml_str) Classes: PyObject XML_Pickler Functions: thing_from_dom(dom_node, container) obj_from_node(node) subnodes(node) _attr_tag(...) _item_tag(...) _entry_tag(...) _tag_completer(...) _klass(...) safe_eval(s) safe_string(s) unsafe_string(s) """ __version__ = "$Revision: 0.30 $" __author__=["David Mertz (mertz@gnosis.cx)",] __thanks_to__=["Grant Munsey (gmunsey@Adobe.COM)", "Keith J. Farmer (deoradh@yahoo.com)", "Anthony Baxter (anthony@interlink.com.au)"] __copyright__=""" This file is released to the public domain. I (dqm) would appreciate it if you choose to keep derived works under terms that promote freedom, but obviously am giving up any rights to compel such. """ __history__=""" 0.1 Initial version 0.22 Compatible with PyXML 0.52 0.30 Compatible with PyXML 0.61+ """ from types import * from xml.dom import ext from xml.dom.ext.reader import Sax2 import cStringIO XMLPicklingError = "xml_pickle.XMLPicklingError" XMLUnpicklingError = "xml_pickle.XMLUnpicklingError" class PyObject: """Placeholder template class""" def __init__(self, __fakename__=None): if __fakename__: self.__fakename__ = __fakename__ class XML_Pickler: """Framework for 'pickle to XML'""" def __init__(self, py_obj=None): if py_obj is not None: if type(py_obj)<>InstanceType: raise ValueError, \ "XML_Pickler must be initialized with Instance (or None)" self.py_obj = py_obj else: self.py_obj = PyObject(self.__class__.__name__) def __setattr__(self, name, value): if name == 'py_obj': self.__dict__[name] = value else: setattr(self.py_obj, name, value) def __getattr__(self, name): return getattr(self.py_obj, name) def __delattr__(self, name): del self.py_obj.__dict__[name] def dump(self, fh): # admittedly, our approach requires creating whole output XML in # memory first, which could be large for complex object. Maybe # we'll make this more efficient later. fh.write(self.dumps()) def load(self, fh): return thing_from_dom(Sax2.FromXml(fh.read(), validate=0)) def dumps(self): xml_str = '\n' +\ '\n' xml_str = xml_str+'\n' % _klass(self.py_obj) for name in dir(self.py_obj): xml_str = xml_str+_attr_tag(name, getattr(self, name)) xml_str = xml_str+'' return xml_str def loads(self, xml_str): fh = cStringIO.StringIO(xml_str) obj = self.load(fh) fh.close() return obj #-- support functions def thing_from_dom(dom_node, container=None): """Converts an [xml_pickle] DOM tree to a "native" Python object""" for node in subnodes(dom_node): if node.nodeName == "PyObject": # Add all the subnodes to PyObject container container = thing_from_dom(node, obj_from_node(node)) elif node.nodeName == 'attr': try: node_type = node.attributes[('','type')].value except: print "node", node.attributes, repr(node.attributes) print node.attributes.keys() raise # WHAT?! node_name = node.attributes[('', 'name')].value if node_type == 'None': setattr(container, node_name, None) elif node_type == 'numeric': node_val = safe_eval(node.attributes[('','value')].value) setattr(container, node_name, node_val) elif node_type == 'string': node_val = node.attributes[('','value')].value node_val = unsafe_string(node_val) setattr(container, node_name, node_val) elif node_type == 'list': subcontainer = thing_from_dom(node, []) setattr(container, node_name, subcontainer) elif node_type == 'tuple': subcontainer = thing_from_dom(node, []) # use list then convert setattr(container, node_name, tuple(subcontainer)) elif node_type == 'dict': subcontainer = thing_from_dom(node, {}) setattr(container, node_name, subcontainer) elif node_type == 'PyObject': subcontainer = thing_from_dom(node, obj_from_node(node)) setattr(container, node_name, subcontainer) elif node.nodeName in ['item', 'key', 'val']: # -- Odd behavior warning -- # The 'node_type' expression has an odd tendency to be a # one-element tuple rather than a string. Doing the str() # fixes things, but I'm not sure why! # -- About key/val nodes -- # There *should not* be mutable types as keys, but to cover # all cases, elif's are defined for mutable types. Furthermore, # there should only ever be *one* item in any key/val list, # but we again rely on other validation of the XML happening. node_type = str(node.attributes[('','type')].value) if node_type == 'numeric': node_val = safe_eval(node.attributes[('','value')].value) container.append(node_val) elif node_type == 'string': node_val = node.attributes[('','value')].value node_val = unsafe_string(node_val) container.append(node_val) elif node_type == 'list': subcontainer = thing_from_dom(node, []) container.append(subcontainer) elif node_type == 'tuple': subcontainer = thing_from_dom(node, []) # use list then convert container.append(tuple(subcontainer)) elif node_type == 'dict': subcontainer = thing_from_dom(node, {}) container.append(subcontainer) elif node_type == 'PyObject': subcontainer = thing_from_dom(node, obj_from_node(node)) container.append(subcontainer) elif node.nodeName == 'entry': keyval = thing_from_dom(node, []) key, val = keyval[0], keyval[1] container[key] = val else: raise XMLUnpicklingError, \ "element %s is not in PyObjects.dtd" % node.nodeName return container def obj_from_node(node): # Get classname of object (with fallback to 'PyObject') try: if node.attributes: klass = node.attributes[('','class')].value else: klass = 'PyObject' except KeyError: klass = 'PyObject' # does the class exist, or should we create it? try: safe_eval(klass) except NameError: exec ('class %s: pass' % klass) return eval('%s()' % klass) def subnodes(node): return filter(lambda n: n.nodeName<>'#text', node.childNodes) def _attr_tag(name, thing, level=0): start_tag = ' '*level+('\n' if name == '__fakename__': return '' else: return _tag_completer(start_tag, thing, close_tag, level) def _item_tag(thing, level=0): start_tag = ' '*level+'\n' return _tag_completer(start_tag, thing, close_tag, level) def _entry_tag(key, val, level=0): start_tag = ' '*level+'\n' close_tag = ' '*level+'\n' start_key = ' '*level+' \n' key_block = _tag_completer(start_key, key, close_key, level+1) start_val = ' '*level+' \n' val_block = _tag_completer(start_val, val, close_val, level+1) return (start_tag + key_block + val_block + close_tag) def _tag_completer(start_tag, thing, close_tag, level=0): tag_body = '' if type(thing) == NoneType: start_tag = start_tag+'type="None" />\n' close_tag = '' elif type(thing) in [IntType, LongType, FloatType, ComplexType]: start_tag = start_tag+'type="numeric" value="%s" />\n' % `thing` close_tag = '' elif type(thing) in [StringType]: thing = safe_string(thing) start_tag = start_tag+'type="string" value="%s" />\n' % thing close_tag = '' elif type(thing) in [TupleType]: start_tag = start_tag+'type="tuple">\n' for item in thing: tag_body = tag_body+_item_tag(item, level+1) elif type(thing) in [ListType]: start_tag = start_tag+'type="list">\n' for item in thing: tag_body = tag_body+_item_tag(item, level+1) elif type(thing) in [DictType]: start_tag = start_tag+'type="dict">\n' for key, val in thing.items(): tag_body = tag_body+_entry_tag(key, val, level+1) elif type(thing) in [InstanceType]: start_tag = start_tag+'type="PyObject" class="%s">\n' % _klass(thing) for name in dir(thing): tag_body = tag_body+_attr_tag(name, getattr(thing, name), level+1) else: raise XMLPicklingError, "non-handled type %s." % type(thing) return start_tag+tag_body+close_tag def _klass(thing): if type(thing)<>InstanceType: raise ValueError, \ "non-Instance type %s passed to _klass()" % type(thing) if hasattr(thing, '__fakename__'): return thing.__fakename__ else: return thing.__class__.__name__ def safe_eval(s): if 0: # Condition for malicious string in eval() block raise "SecurityError", \ "Malicious string '%s' should not be eval()'d" % s else: return eval(s) def safe_string(s): import string, re # markup XML entities s = string.replace(s, '&', '&') s = string.replace(s, '<', '<') s = string.replace(s, '>', '>') s = string.replace(s, '"', '"') s = string.replace(s, "'", ''') # for others, use Python style escapes s = repr(s) return s[1:-1] # without the extra single-quotes def unsafe_string(s): import string, re # for Python escapes, exec the string # (niggle w/ literalizing apostrophe) s = string.replace(s, "'", r"\047") exec "s='"+s+"'" # XML entities (DOM does it for us) return s #-- Hand generated test object test_xml = """ """ #-- Self test if __name__ == "__main__": class MyClass: pass o = XML_Pickler() o.num = 37 o.str = "Hello World \n Special Chars: \t \000 < > & ' \207" o.lst = [1, 3.5, 2, 4+7j] o2 = MyClass() o2.tup = ("x", "y", "z") o2.num = 2+2j o2.dct = { "this": "that", "spam": "eggs", 3.14: "about PI" } o.obj = o2 print '------* Print python-defined pickled object *-----' print o.dumps() print '-----* Load a test xml_pickle object, and print it *-----' u = o.loads(test_xml) print XML_Pickler(u).dumps()