#!/usr/bin/env python
# Convert an RELAX NG compact syntax schema to a Node tree
# This file released to the Public Domain by David Mertz
from __future__ import generators
import sys
from rnc_tokenize import token_list
class ParseError(SyntaxError): pass
for t in """
ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL
NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT
DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE
""".split(): globals()[t] = t
PAIRS = {'BEG_BODY':('END_BODY',BODY),
'BEG_PAREN':('END_PAREN',GROUP),
'BEG_ANNO':('END_ANNO',ANNOTATION)}
TAGS = { ONE: 'group',
SOME: 'oneOrMore',
MAYBE: 'optional',
ANY: 'zeroOrMore'}
DEFAULT_NAMESPACE = None
DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"']
OTHER_NAMESPACE = {}
CONTEXT_FREE = 0
try: enumerate
except: enumerate = lambda seq: zip(range(len(seq)),seq)
nodetypes = lambda nl: tuple(map(lambda n: n.type, nl))
toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks)
class Node(object):
__slots__ = ('type','value','name','quant')
def __iter__(self): yield self
__len__ = lambda self: 1
def __init__(self, type='', value=[], name=None, quant=ONE):
self.type = type
self.value = value
self.name = name
self.quant = quant
def format(self, indent=0):
out = [' '*indent+repr(self)]
write = out.append
if isinstance(self.value, str):
if self.type==COMMENT:
write(' '*(1+indent)+self.value)
else:
for node in self.value:
write(node.format(indent+1))
return '\n'.join(out)
def prettyprint(self):
print self.format()
def toxml(self):
if CONTEXT_FREE:
out = []
write = out.append
write('')
write('')
self.type = None
write(self.xmlnode(1))
write('')
return self.add_ns('\n'.join(out))
else:
return self.add_ns(self.xmlnode())
def xmlnode(self, indent=0):
out = []
write = out.append
if self.type == ROOT:
write('')
for x in self.value:
if not isinstance(x, Node):
raise TypeError, "Unhappy Node.value: "+repr(x)
elif x.type == START:
startelem = '' % x.value
write(' '*indent+startelem)
elif x.type == DEFINE:
write(' '*indent+'' % x.name)
write(x.xmlnode(indent+1))
write(' '*indent+'')
elif x.type == NAME:
write(' '*indent+ '' % x.value)
elif x.type == COMMENT:
write(' '*indent+'' % x.value)
elif x.type == LITERAL:
write(' '*indent+'%s' % x.value)
elif x.type == ANNOTATION:
write(' '*indent+\
'%s' % x.value)
elif x.type == INTERLEAVE:
write(' '*indent+'')
write(x.xmlnode(indent+1))
write(' '*indent+'')
elif x.type == SEQ:
write(x.xmlnode(indent+1))
elif x.type == CHOICE:
write(' '*indent+'')
write(x.xmlnode(indent+1))
write(' '*indent+'')
elif x.type == GROUP:
write(x.xmlnode(indent))
elif x.type == TEXT:
write(' '*indent+'')
elif x.type == EMPTY:
write(' '*indent+'')
elif x.type == DATATAG:
DATATYPE_LIB[0] = 1 # Use datatypes
if x.name is None: # no paramaters
write(' '*indent+'' % x.value)
else:
write(' '*indent+'' % x.name)
p = '%s' % x.value
write(' '*(indent+1)+p)
write(' '*indent+'')
elif x.type == ELEM:
if x.quant == ONE:
write(' '*indent+'' % x.name)
write(x.xmlnode(indent+1))
write(' '*indent+'')
else:
write(' '*indent+'<%s>' % TAGS[x.quant])
write(' '*(indent+1)+'' % x.name)
write(x.xmlnode(indent+2))
write(' '*(indent+1)+'')
write(' '*indent+'%s>' % TAGS[x.quant])
elif x.type == ATTR:
if x.value[0].type == TEXT:
write(' '*indent+'' % x.name)
elif x.value[0].type == EMPTY:
write(' '*indent+'' % x.name)
write(' '*(indent+1)+'')
write(' '*indent+'')
return '\n'.join(out)
def __repr__(self):
return "Node(%s,%s,%s)[%d]" % (self.type, self.name,
self.quant, len(self.value))
def add_ns(self, xml):
"Add namespace attributes to top level element"
lines = xml.split('\n')
self.nest_annotations(lines) # annots not allowed before root elem
for i, line in enumerate(lines):
ltpos = line.find('<')
if ltpos >= 0 and line[ltpos+1] not in ('!','?'):
# We've got an element tag, not PI or comment
new = line[:line.find('>')]
new += ' xmlns="http://relaxng.org/ns/structure/1.0"'
if DEFAULT_NAMESPACE is not None:
new += '\n ns=%s' % DEFAULT_NAMESPACE
if DATATYPE_LIB[0]:
new += '\n datatypeLibrary=%s' % DATATYPE_LIB[1]
for ns, url in OTHER_NAMESPACE.items():
new += '\n xmlns:%s=%s' % (ns, url)
new += '>'
lines[i] = new
break
return '\n'.join(lines)
def nest_annotations(self, lines):
"Nest any top annotation within first element"
top_annotations = []
for i, line in enumerate(lines[:]):
if line.find('= 0:
top_annotations.append(line)
del lines[i]
else:
ltpos = line.find('<')
if ltpos >= 0 and line[ltpos+1] not in ('!','?'):
break
for line in top_annotations:
lines.insert(i, ' '+line)
def findmatch(beg, nodes, offset):
level = 1
end = PAIRS[beg][0]
for i,t in enumerate(nodes[offset:]):
if t.type == beg: level += 1
elif t.type == end: level -= 1
if level == 0:
return i+offset
raise EOFError, ("No closing token encountered for %s @ %d"
% (beg,offset))
def match_pairs(nodes):
newnodes = []
i = 0
while 1:
if i >= len(nodes): break
node = nodes[i]
if node.type in PAIRS.keys():
# Look for enclosing brackets
match = findmatch(node.type, nodes, i+1)
matchtype = PAIRS[node.type][1]
node = Node(type=matchtype, value=nodes[i+1:match])
node.value = match_pairs(node.value)
newnodes.append(node)
i = match+1
else:
newnodes.append(node)
i += 1
if i >= len(nodes): break
if nodes[i].type in (ANY, SOME, MAYBE):
newnodes[-1].quant = nodes[i].type
i += 1
nodes[:] = newnodes
return nodes
def type_bodies(nodes):
newnodes = []
i = 0
while 1:
if i >= len(nodes): break
if nodetypes(nodes[i:i+3]) == (ELEM, NAME, BODY) or \
nodetypes(nodes[i:i+3]) == (ATTR, NAME, BODY):
name, body = nodes[i+1].value, nodes[i+2]
value, quant = type_bodies(body.value), body.quant
node = Node(nodes[i].type, value, name, quant)
newnodes.append(node)
i += 3
elif nodetypes(nodes[i:i+2]) == (DATATAG, PATTERN):
node = Node(DATATAG, nodes[i+1].value, nodes[i].value)
newnodes.append(node)
i += 2
elif nodes[i] == DEFINE:
print nodes[i:]
else:
if nodes[i].type == GROUP: # Recurse into groups
value = type_bodies(nodes[i].value)
nodes[i] = Node(GROUP, value, None, nodes[i].quant)
newnodes.append(nodes[i])
i += 1
nodes[:] = newnodes
return nodes
def nest_defines(nodes):
"Attach groups to named patterns"
newnodes = []
i = 0
while 1:
if i >= len(nodes): break
node = nodes[i]
newnodes.append(node)
if node.type == DEFINE:
group = []
while (i+1) < len(nodes) and nodes[i+1].type <> DEFINE:
group.append(nodes[i+1])
i += 1
node.name = node.value
node.value = Node(GROUP, group)
i += 1
nodes[:] = newnodes
return nodes
def intersperse(nodes):
"Look for interleaved, choice, or sequential nodes in groups/bodies"
for node in nodes:
if node.type in (ELEM, ATTR, GROUP, LITERAL):
val = node.value
ntypes = [n.type for n in val if not isinstance(val,str)]
inters = [t for t in ntypes if t in (INTERLEAVE,CHOICE,SEQ)]
inters = dict(zip(inters,[0]*len(inters)))
if len(inters) > 1:
raise ParseError, "Ambiguity in sequencing: %s" % node
if len(inters) > 0:
intertype = inters.keys()[0]
items = []
for pat in node.value:
if pat.type <> intertype:
items.append(pat)
node.value = Node(intertype, items)
if not isinstance(node.value, str): # No recurse to terminal str
intersperse(node.value)
return nodes
def scan_NS(nodes):
"Look for any namespace configuration lines"
global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE
for node in nodes:
if node.type == DEFAULT_NS:
DEFAULT_NAMESPACE = node.value
elif node.type == NS:
ns, url = map(str.strip, node.value.split('='))
OTHER_NAMESPACE[ns] = url
elif node.type == ANNOTATION and not OTHER_NAMESPACE.has_key('a'):
OTHER_NAMESPACE['a'] =\
'"http://relaxng.org/ns/compatibility/annotations/1.0"'
elif node.type == DATATYPES:
DATATYPE_LIB[:] = [1, node.value]
elif node.type == START:
CONTEXT_FREE = 1
def make_nodetree(tokens):
nodes = toNodes(tokens)
match_pairs(nodes)
type_bodies(nodes)
nest_defines(nodes)
intersperse(nodes)
scan_NS(nodes)
root = Node(ROOT, nodes)
return root
if __name__=='__main__':
make_nodetree(token_list(sys.stdin.read())).prettyprint()