"""Store Python objects to (pickle-like) XML Documents
Note 0:
See http://gnosis.cx/publish/programming/xml_matters_1.txt
for a detailed discussion of this module.
Note 1:
The XML-SIG distribution is changed fairly frequently while
it is in beta versions. The changes in turn are extremely
likely to affect the functioning of [xml_pickle].
This version of [xml_pickle] is known to work with PyXML
0.6.1, and will probably continue to work with later betas
and also with the XML-SIG distribution included in Python
2.0. To download a current version of PyXML, go to:
http://download.sourceforge.net/pyxml/
Version 0.22 of [xml_pickle] was designed around PyXML 0.5.2.
If you use an older version of PyXML, you may download a
compatible version of [xml_pickle] at:
http://gnosis.cx/download/xml_pickle-0.22.py
And a known-to-be-compatible PyXML distribution at:
http://gnosis.cx/download/py_xml_04-21-00.exe
http://gnosis.cx/download/py_xml_04-21-00.zip
The first URL is the Windows self-installer, the latter is
simply an archive of those files to be unpacked under
$PYTHONPATH/xml.
Usage:
# By inheritence
from xml_pickle import XML_Pickler
class MyClass(XML_Pickler):
# create some behavior and attributes for MyClass...
o1 = MyClass()
xml_str = o.dumps()
o2 = MyClass()
o2.loads(xml_str)
# With inline instantiation
from xml_pickle import XML_Pickler
o1 = DataClass()
# ...assign attribute values to o1...
xml_str = XML_Pickler(o1).dumps()
o2 = XML_Pickler().loads(xml_str)
Classes:
PyObject
XML_Pickler
Functions:
thing_from_dom(dom_node, container)
obj_from_node(node)
subnodes(node)
_attr_tag(...)
_item_tag(...)
_entry_tag(...)
_tag_completer(...)
_klass(...)
safe_eval(s)
safe_string(s)
unsafe_string(s)
"""
__version__ = "$Revision: 0.30 $"
__author__=["David Mertz (mertz@gnosis.cx)",]
__thanks_to__=["Grant Munsey (gmunsey@Adobe.COM)",
"Keith J. Farmer (deoradh@yahoo.com)",
"Anthony Baxter (anthony@interlink.com.au)"]
__copyright__="""
This file is released to the public domain. I (dqm) would
appreciate it if you choose to keep derived works under terms
that promote freedom, but obviously am giving up any rights
to compel such.
"""
__history__="""
0.1 Initial version
0.22 Compatible with PyXML 0.52
0.30 Compatible with PyXML 0.61+
"""
from types import *
from xml.dom import ext
from xml.dom.ext.reader import Sax2
import cStringIO
XMLPicklingError = "xml_pickle.XMLPicklingError"
XMLUnpicklingError = "xml_pickle.XMLUnpicklingError"
class PyObject:
"""Placeholder template class"""
def __init__(self, __fakename__=None):
if __fakename__: self.__fakename__ = __fakename__
class XML_Pickler:
"""Framework for 'pickle to XML'"""
def __init__(self, py_obj=None):
if py_obj is not None:
if type(py_obj)<>InstanceType:
raise ValueError, \
"XML_Pickler must be initialized with Instance (or None)"
self.py_obj = py_obj
else:
self.py_obj = PyObject(self.__class__.__name__)
def __setattr__(self, name, value):
if name == 'py_obj':
self.__dict__[name] = value
else:
setattr(self.py_obj, name, value)
def __getattr__(self, name):
return getattr(self.py_obj, name)
def __delattr__(self, name):
del self.py_obj.__dict__[name]
def dump(self, fh):
# admittedly, our approach requires creating whole output XML in
# memory first, which could be large for complex object. Maybe
# we'll make this more efficient later.
fh.write(self.dumps())
def load(self, fh):
return thing_from_dom(Sax2.FromXml(fh.read(), validate=0))
def dumps(self):
xml_str = '\n' +\
'\n'
xml_str = xml_str+'\n' % _klass(self.py_obj)
for name in dir(self.py_obj):
xml_str = xml_str+_attr_tag(name, getattr(self, name))
xml_str = xml_str+''
return xml_str
def loads(self, xml_str):
fh = cStringIO.StringIO(xml_str)
obj = self.load(fh)
fh.close()
return obj
#-- support functions
def thing_from_dom(dom_node, container=None):
"""Converts an [xml_pickle] DOM tree to a "native" Python object"""
for node in subnodes(dom_node):
if node.nodeName == "PyObject":
# Add all the subnodes to PyObject container
container = thing_from_dom(node, obj_from_node(node))
elif node.nodeName == 'attr':
try:
node_type = node.attributes[('','type')].value
except:
print "node", node.attributes, repr(node.attributes)
print node.attributes.keys()
raise # WHAT?!
node_name = node.attributes[('', 'name')].value
if node_type == 'None':
setattr(container, node_name, None)
elif node_type == 'numeric':
node_val = safe_eval(node.attributes[('','value')].value)
setattr(container, node_name, node_val)
elif node_type == 'string':
node_val = node.attributes[('','value')].value
node_val = unsafe_string(node_val)
setattr(container, node_name, node_val)
elif node_type == 'list':
subcontainer = thing_from_dom(node, [])
setattr(container, node_name, subcontainer)
elif node_type == 'tuple':
subcontainer = thing_from_dom(node, []) # use list then convert
setattr(container, node_name, tuple(subcontainer))
elif node_type == 'dict':
subcontainer = thing_from_dom(node, {})
setattr(container, node_name, subcontainer)
elif node_type == 'PyObject':
subcontainer = thing_from_dom(node, obj_from_node(node))
setattr(container, node_name, subcontainer)
elif node.nodeName in ['item', 'key', 'val']:
# -- Odd behavior warning --
# The 'node_type' expression has an odd tendency to be a
# one-element tuple rather than a string. Doing the str()
# fixes things, but I'm not sure why!
# -- About key/val nodes --
# There *should not* be mutable types as keys, but to cover
# all cases, elif's are defined for mutable types. Furthermore,
# there should only ever be *one* item in any key/val list,
# but we again rely on other validation of the XML happening.
node_type = str(node.attributes[('','type')].value)
if node_type == 'numeric':
node_val = safe_eval(node.attributes[('','value')].value)
container.append(node_val)
elif node_type == 'string':
node_val = node.attributes[('','value')].value
node_val = unsafe_string(node_val)
container.append(node_val)
elif node_type == 'list':
subcontainer = thing_from_dom(node, [])
container.append(subcontainer)
elif node_type == 'tuple':
subcontainer = thing_from_dom(node, []) # use list then convert
container.append(tuple(subcontainer))
elif node_type == 'dict':
subcontainer = thing_from_dom(node, {})
container.append(subcontainer)
elif node_type == 'PyObject':
subcontainer = thing_from_dom(node, obj_from_node(node))
container.append(subcontainer)
elif node.nodeName == 'entry':
keyval = thing_from_dom(node, [])
key, val = keyval[0], keyval[1]
container[key] = val
else:
raise XMLUnpicklingError, \
"element %s is not in PyObjects.dtd" % node.nodeName
return container
def obj_from_node(node):
# Get classname of object (with fallback to 'PyObject')
try:
if node.attributes:
klass = node.attributes[('','class')].value
else:
klass = 'PyObject'
except KeyError: klass = 'PyObject'
# does the class exist, or should we create it?
try: safe_eval(klass)
except NameError:
exec ('class %s: pass' % klass)
return eval('%s()' % klass)
def subnodes(node):
return filter(lambda n: n.nodeName<>'#text', node.childNodes)
def _attr_tag(name, thing, level=0):
start_tag = ' '*level+('\n'
if name == '__fakename__': return ''
else:
return _tag_completer(start_tag, thing, close_tag, level)
def _item_tag(thing, level=0):
start_tag = ' '*level+'- \n'
return _tag_completer(start_tag, thing, close_tag, level)
def _entry_tag(key, val, level=0):
start_tag = ' '*level+'\n'
close_tag = ' '*level+'\n'
start_key = ' '*level+' \n'
key_block = _tag_completer(start_key, key, close_key, level+1)
start_val = ' '*level+' \n'
val_block = _tag_completer(start_val, val, close_val, level+1)
return (start_tag + key_block + val_block + close_tag)
def _tag_completer(start_tag, thing, close_tag, level=0):
tag_body = ''
if type(thing) == NoneType:
start_tag = start_tag+'type="None" />\n'
close_tag = ''
elif type(thing) in [IntType, LongType, FloatType, ComplexType]:
start_tag = start_tag+'type="numeric" value="%s" />\n' % `thing`
close_tag = ''
elif type(thing) in [StringType]:
thing = safe_string(thing)
start_tag = start_tag+'type="string" value="%s" />\n' % thing
close_tag = ''
elif type(thing) in [TupleType]:
start_tag = start_tag+'type="tuple">\n'
for item in thing:
tag_body = tag_body+_item_tag(item, level+1)
elif type(thing) in [ListType]:
start_tag = start_tag+'type="list">\n'
for item in thing:
tag_body = tag_body+_item_tag(item, level+1)
elif type(thing) in [DictType]:
start_tag = start_tag+'type="dict">\n'
for key, val in thing.items():
tag_body = tag_body+_entry_tag(key, val, level+1)
elif type(thing) in [InstanceType]:
start_tag = start_tag+'type="PyObject" class="%s">\n' % _klass(thing)
for name in dir(thing):
tag_body = tag_body+_attr_tag(name, getattr(thing, name), level+1)
else:
raise XMLPicklingError, "non-handled type %s." % type(thing)
return start_tag+tag_body+close_tag
def _klass(thing):
if type(thing)<>InstanceType:
raise ValueError, \
"non-Instance type %s passed to _klass()" % type(thing)
if hasattr(thing, '__fakename__'): return thing.__fakename__
else: return thing.__class__.__name__
def safe_eval(s):
if 0: # Condition for malicious string in eval() block
raise "SecurityError", \
"Malicious string '%s' should not be eval()'d" % s
else:
return eval(s)
def safe_string(s):
import string, re
# markup XML entities
s = string.replace(s, '&', '&')
s = string.replace(s, '<', '<')
s = string.replace(s, '>', '>')
s = string.replace(s, '"', '"')
s = string.replace(s, "'", ''')
# for others, use Python style escapes
s = repr(s)
return s[1:-1] # without the extra single-quotes
def unsafe_string(s):
import string, re
# for Python escapes, exec the string
# (niggle w/ literalizing apostrophe)
s = string.replace(s, "'", r"\047")
exec "s='"+s+"'"
# XML entities (DOM does it for us)
return s
#-- Hand generated test object
test_xml = """
-
-
"""
#-- Self test
if __name__ == "__main__":
class MyClass: pass
o = XML_Pickler()
o.num = 37
o.str = "Hello World \n Special Chars: \t \000 < > & ' \207"
o.lst = [1, 3.5, 2, 4+7j]
o2 = MyClass()
o2.tup = ("x", "y", "z")
o2.num = 2+2j
o2.dct = { "this": "that", "spam": "eggs", 3.14: "about PI" }
o.obj = o2
print '------* Print python-defined pickled object *-----'
print o.dumps()
print '-----* Load a test xml_pickle object, and print it *-----'
u = o.loads(test_xml)
print XML_Pickler(u).dumps()