#!/usr/bin/env python # Convert an RELAX NG compact syntax schema to a Node tree # This file released to the Public Domain by David Mertz from __future__ import generators import sys from rnc_tokenize import token_list class ParseError(SyntaxError): pass for t in """ ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE """.split(): globals()[t] = t PAIRS = {'BEG_BODY':('END_BODY',BODY), 'BEG_PAREN':('END_PAREN',GROUP), 'BEG_ANNO':('END_ANNO',ANNOTATION)} TAGS = { ONE: 'group', SOME: 'oneOrMore', MAYBE: 'optional', ANY: 'zeroOrMore'} DEFAULT_NAMESPACE = None DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"'] OTHER_NAMESPACE = {} CONTEXT_FREE = 0 try: enumerate except: enumerate = lambda seq: zip(range(len(seq)),seq) nodetypes = lambda nl: tuple(map(lambda n: n.type, nl)) toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks) class Node(object): __slots__ = ('type','value','name','quant') def __iter__(self): yield self __len__ = lambda self: 1 def __init__(self, type='', value=[], name=None, quant=ONE): self.type = type self.value = value self.name = name self.quant = quant def format(self, indent=0): out = [' '*indent+repr(self)] write = out.append if isinstance(self.value, str): if self.type==COMMENT: write(' '*(1+indent)+self.value) else: for node in self.value: write(node.format(indent+1)) return '\n'.join(out) def prettyprint(self): print self.format() def toxml(self): if CONTEXT_FREE: out = [] write = out.append write('') write('') self.type = None write(self.xmlnode(1)) write('') return self.add_ns('\n'.join(out)) else: return self.add_ns(self.xmlnode()) def xmlnode(self, indent=0): out = [] write = out.append if self.type == ROOT: write('') for x in self.value: if not isinstance(x, Node): raise TypeError, "Unhappy Node.value: "+repr(x) elif x.type == START: startelem = '' % x.value write(' '*indent+startelem) elif x.type == DEFINE: write(' '*indent+'' % x.name) write(x.xmlnode(indent+1)) write(' '*indent+'') elif x.type == NAME: write(' '*indent+ '' % x.value) elif x.type == COMMENT: write(' '*indent+'' % x.value) elif x.type == LITERAL: write(' '*indent+'%s' % x.value) elif x.type == ANNOTATION: write(' '*indent+\ '%s' % x.value) elif x.type == INTERLEAVE: write(' '*indent+'') write(x.xmlnode(indent+1)) write(' '*indent+'') elif x.type == CHOICE: write(' '*indent+'') write(x.xmlnode(indent+1)) write(' '*indent+'') elif x.type == GROUP: write(x.xmlnode(indent)) elif x.type == TEXT: write(' '*indent+'') elif x.type == EMPTY: write(' '*indent+'') elif x.type == DATATAG: DATATYPE_LIB[0] = 1 # Use datatypes if x.name is None: # no paramaters write(' '*indent+'' % x.value) else: write(' '*indent+'' % x.name) p = '%s' % x.value write(' '*(indent+1)+p) write(' '*indent+'') elif x.type == ELEM: if x.quant == ONE: write(' '*indent+'' % x.name) write(x.xmlnode(indent+1)) write(' '*indent+'') else: write(' '*indent+'<%s>' % TAGS[x.quant]) write(' '*(indent+1)+'' % x.name) write(x.xmlnode(indent+2)) write(' '*(indent+1)+'') write(' '*indent+'' % TAGS[x.quant]) elif x.type == ATTR: if x.value[0].type == TEXT: write(' '*indent+'' % x.name) elif x.value[0].type == EMPTY: write(' '*indent+'' % x.name) write(' '*(indent+1)+'') write(' '*indent+'') return '\n'.join(out) def __repr__(self): return "Node(%s,%s,%s)[%d]" % (self.type, self.name, self.quant, len(self.value)) def add_ns(self, xml): "Add namespace attributes to top level element" lines = xml.split('\n') self.nest_annotations(lines) # annots not allowed before root elem for i, line in enumerate(lines): ltpos = line.find('<') if ltpos >= 0 and line[ltpos+1] not in ('!','?'): # We've got an element tag, not PI or comment new = line[:line.find('>')] new += ' xmlns="http://relaxng.org/ns/structure/1.0"' if DEFAULT_NAMESPACE is not None: new += '\n ns=%s' % DEFAULT_NAMESPACE if DATATYPE_LIB[0]: new += '\n datatypeLibrary=%s' % DATATYPE_LIB[1] for ns, url in OTHER_NAMESPACE.items(): new += '\n xmlns:%s=%s' % (ns, url) new += '>' lines[i] = new break return '\n'.join(lines) def nest_annotations(self, lines): "Nest any top annotation within first element" top_annotations = [] for i, line in enumerate(lines[:]): if line.find('= 0: top_annotations.append(line) del lines[i] else: ltpos = line.find('<') if ltpos >= 0 and line[ltpos+1] not in ('!','?'): break for line in top_annotations: lines.insert(i, ' '+line) def findmatch(beg, nodes, offset): level = 1 end = PAIRS[beg][0] for i,t in enumerate(nodes[offset:]): if t.type == beg: level += 1 elif t.type == end: level -= 1 if level == 0: return i+offset raise EOFError, ("No closing token encountered for %s @ %d" % (beg,offset)) def match_pairs(nodes): newnodes = [] i = 0 while 1: if i >= len(nodes): break node = nodes[i] if node.type in PAIRS.keys(): # Look for enclosing brackets match = findmatch(node.type, nodes, i+1) matchtype = PAIRS[node.type][1] node = Node(type=matchtype, value=nodes[i+1:match]) node.value = match_pairs(node.value) newnodes.append(node) i = match+1 else: newnodes.append(node) i += 1 if i >= len(nodes): break if nodes[i].type in (ANY, SOME, MAYBE): newnodes[-1].quant = nodes[i].type i += 1 nodes[:] = newnodes return nodes def type_bodies(nodes): newnodes = [] i = 0 while 1: if i >= len(nodes): break if nodetypes(nodes[i:i+3]) == (ELEM, NAME, BODY) or \ nodetypes(nodes[i:i+3]) == (ATTR, NAME, BODY): name, body = nodes[i+1].value, nodes[i+2] value, quant = type_bodies(body.value), body.quant node = Node(nodes[i].type, value, name, quant) newnodes.append(node) i += 3 elif nodetypes(nodes[i:i+2]) == (DATATAG, PATTERN): node = Node(DATATAG, nodes[i+1].value, nodes[i].value) newnodes.append(node) i += 2 elif nodes[i] == DEFINE: print nodes[i:] else: if nodes[i].type == GROUP: # Recurse into groups value = type_bodies(nodes[i].value) nodes[i] = Node(GROUP, value, None, nodes[i].quant) newnodes.append(nodes[i]) i += 1 nodes[:] = newnodes return nodes def nest_defines(nodes): "Attach groups to named patterns" newnodes = [] i = 0 while 1: if i >= len(nodes): break node = nodes[i] newnodes.append(node) if node.type == DEFINE: group = [] while (i+1) < len(nodes) and nodes[i+1].type <> DEFINE: group.append(nodes[i+1]) i += 1 node.name = node.value node.value = Node(GROUP, group) i += 1 nodes[:] = newnodes return nodes def intersperse(nodes): "Look for interleaved, choice, or sequential nodes in groups/bodies" for node in nodes: if node.type in (ELEM, ATTR, GROUP, LITERAL): val = node.value ntypes = [n.type for n in val if not isinstance(val,str)] inters = [t for t in ntypes if t in (INTERLEAVE,CHOICE,SEQ)] inters = dict(zip(inters,[0]*len(inters))) if len(inters) > 1: raise ParseError, "Ambiguity in sequencing: %s" % node if len(inters) > 0: intertype = inters.keys()[0] items = [] for pat in node.value: if pat.type <> intertype: items.append(pat) node.value = Node(intertype, items) if not isinstance(node.value, str): # No recurse to terminal str intersperse(node.value) return nodes def scan_NS(nodes): "Look for any namespace configuration lines" global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE for node in nodes: if node.type == DEFAULT_NS: DEFAULT_NAMESPACE = node.value elif node.type == NS: ns, url = map(str.strip, node.value.split('=')) OTHER_NAMESPACE[ns] = url elif node.type == ANNOTATION and not OTHER_NAMESPACE.has_key('a'): OTHER_NAMESPACE['a'] =\ '"http://relaxng.org/ns/compatibility/annotations/1.0"' elif node.type == DATATYPES: DATATYPE_LIB[:] = [1, node.value] elif node.type == START: CONTEXT_FREE = 1 def make_nodetree(tokens): nodes = toNodes(tokens) match_pairs(nodes) type_bodies(nodes) nest_defines(nodes) intersperse(nodes) scan_NS(nodes) root = Node(ROOT, nodes) return root if __name__=='__main__': make_nodetree(token_list(sys.stdin.read())).prettyprint()