#--------------------- book2html.py ---------------------# #!/usr/bin/python """Convert ASCII book source files for HTML presentation" Usage: python book2html.py [title] < source.txt > target.html """ __author__=["David Mertz (mertz@gnosis.cx)",] __version__="November 2002" from __future__ import generators import sys, re, string, time from colorize import ParsePython from cgi import escape #-- Define some HTML boilerplate html_open =\ """ %s """ html_title = "Automatically Generated HTML" html_close = "" code_block = \ """

%s

%s
""" #-- End of boilerplate #-- State constants for s in ("BLANK CHAPTER SECTION SUBSECT SUBSUB MODLINE " "MODNAME PYSHELL CODESAMP NUMLIST BODY QUOTE " "SUBBODY TERM DEF RULE VERTSPC").split(): exec "%s = '%s'" % (s,s) markup = {CHAPTER:'h1', SECTION:'h2', SUBSECT:'h3', SUBSUB:'h4', BODY:'p', QUOTE:'blockquote', NUMLIST:'blockquote', DEF:'blockquote'} divs = {RULE:'hr', VERTSPC:'br'} class Regexen: def __init__(self): # blank line is empty, spaces/dashes only, or proc instruct self.blank = re.compile("^[ -]*$|^ THIS IS [A-Z]+$") self.chapter = re.compile("^(CHAPTER|APPENDIX|FRONTMATTER)") self.section = re.compile("^SECTION") self.subsect = re.compile("^ (TOPIC|PROBLEM|EXERCISE)") self.subsub = re.compile("^ [A-Z 0-9-]+:$") # chk befr body self.modline = re.compile("^ =+$") self.pyshell = re.compile("^ +>>>") self.codesamp = re.compile("^ +#[*]?[-=]+ .+ [-=]+#") self.numlist = re.compile("^ \d+[.] ") # chk befr body self.body = re.compile("^ \S") # 2 spc indent self.quote = re.compile("^ ?\S") # 4-5 spc indnt self.subbody = re.compile("^ +") # 6+ spc indent self.rule = re.compile("^ (-\*-|!!!)$") self.vertspc = re.compile("^ \+\+\+$") def Make_Blocks(fpin=sys.stdin, r=Regexen()): #-- Initialize the globals global state, blocks, laststate state, laststate = BLANK, BLANK blocks = [[BLANK]] #-- Break the file into relevant chunks for line in fpin.xreadlines(): line = line.rstrip() # Normalize line endings #-- for "one-line states" just act (no accumulation) if r.blank.match(line): if inState(PYSHELL): newState(laststate) else: blocks[-1].append("") elif r.rule.match(line): newState(RULE) elif r.vertspc.match(line): newState(VERTSPC) elif r.chapter.match(line): newState(CHAPTER) elif r.section.match(line): newState(SECTION) elif r.subsect.match(line): newState(SUBSECT) elif r.subsub.match(line): newState(SUBSUB) elif r.modline.match(line): newState(MODLINE) elif r.numlist.match(line): newState(NUMLIST) elif r.pyshell.match(line): if not inState(PYSHELL): newState(PYSHELL) elif r.codesamp.match(line): newState(CODESAMP) #-- now the multi-line states that are self-defining elif r.body.match(line): if not inState(BODY): newState(BODY) elif r.quote.match(line): if inState(MODLINE): newState(MODNAME) elif r.blank.match(line): newState(BLANK) elif not inState(QUOTE): newState(QUOTE) #-- now the "multi-line states" which eat further lines elif inState(MODLINE, PYSHELL, CODESAMP, NUMLIST, DEF): "stay in this state until we get a blank line" "...or other one-line prior type, but shouldn't happen" elif r.subbody.match(line): "Sub-body is tricky: it might belong with several states:" "PYSHELL, CODESAMP, NUMLIST, or as a def after BODY" if inState(BODY): newState(DEF) elif inState(BLANK): if laststate==DEF: pass elif inState(DEF, CODESAMP, PYSHELL, NUMLIST, MODNAME): pass else: raise ValueError, \ "unexpected input block state: %s\n%s" %(state,line) if inState(MODLINE, RULE, VERTSPC): pass elif r.blank.match(line): pass else: blocks[-1].append(line) return LookBack(blocks) def LookBack(blocks): types = [f[0] for f in blocks] for i in range(len(types)-1): this, next = types[i:i+2] if (this,next)==(BODY,DEF): blocks[i][0] = TERM return blocks def newState(name): global state, laststate, blocks if name not in (BLANK, MODLINE): blocks.append([name]) laststate = state state = name def inState(*names): return state in names def Process_Blocks(blocks, fpout=sys.stdout, title=html_title): fpout.write(html_open % title) for block in blocks: # Massage each block as needed typ, lines = block[0], block[1:] tag = markup.get(typ, None) div = divs.get(typ, None) if tag is not None: map(fpout.write, wrap_html(lines, tag)) elif div is not None: fpout.write('<%s />\n' % div) elif typ in (PYSHELL, CODESAMP): fpout.write(fixcode('\n'.join(lines),style=typ)) elif typ in (MODNAME,): mod = '

%s

'%'\n'.join(lines) fpout.write(mod) elif typ in (TERM,): terms = '
\n'.join(lines) fpout.write('

%s

\n' % terms) else: sys.stderr.write(typ+'\n') fpout.write(html_close) #-- Functions for start of block-type state def wrap_html(lines, tag): txt = '\n'.join(lines) for para in txt.split('\n\n'): if para: yield '<%s>%s\n' %\ (tag,URLify(Typography(escape(para))),tag) def fixcode(block, style=CODESAMP): block = LeftMargin(block) # Move to left # Pull out title if available title = 'Code Sample' if style==CODESAMP: re_title = re.compile('^#\*?\-+ (.+) \-+#$', re.M) if_title = re_title.match(block) if if_title: title = if_title.group(1) block = re_title.sub('', block) # take title out of code # Decide if it is Python code firstline = block[:block.find('\n')] if re.search(r'\.py_?|[Pp]ython|>>>', title+firstline): # Has .py, py_, Python/python, or >>> on first line/title block = ParsePython(block.rstrip()).toHTML() return code_block % (Typography(title), block) # elif the-will-and-the-way-is-there-to-format-language-X: ... else: return code_block % (Typography(title), escape(block).strip()) def LeftMargin(txt): "Remove as many leading spaces as possible from whole block" for l in range(12,-1,-1): re_lead = '(?sm)'+' '*l+'\S' if re.match(re_lead, txt): break txt = re.sub('(?sm)^'+' '*l, '', txt) return txt def URLify(txt): # Conv special IMG URL's: Alt Text: http://site.org/img.png} # (don't actually try quite as hard to validate URL though) txt = re.sub('(?sm){(.*?):\s*(http://.*)}', '\\1', txt) # Convert regular URL's txt = re.sub('(?:[^="])((?:http|ftp|file)://(?:[^ \n\r<\)]+))(\s)', '\\1\\2', txt) return txt def Typography(txt): rc = re.compile # cut down line length MS = re.M | re.S # [module] names r = rc(r"""([\(\s'/">]|^)\[(.*?)\]([<\s\.\),:;'"?!/-])""", MS) txt = r.sub('\\1\\2\\3',txt) # *strongly emphasize* words r = rc(r"""([\(\s'/"]|^)\*(.*?)\*([\s\.\),:;'"?!/-])""", MS) txt = r.sub('\\1\\2\\3', txt) # -emphasize- words r = rc(r"""([\(\s'/"]|^)-(.+?)-([\s\.\),:;'"?!/])""", MS) txt = r.sub('\\1\\2\\3', txt) # _Book Title_ citations r = rc(r"""([\(\s'/"]|^)_(.*?)_([\s\.\),:;'"?!/-])""", MS) txt = r.sub('\\1\\2\\3', txt) # 'Function()' names r = rc(r"""([\(\s/"]|^)'(.*?)'([\s\.\),:;"?!/-])""", MS) txt = r.sub("\\1\\2\\3", txt) # `library.func()` names r = rc(r"""([\(\s/"]|^)`(.*?)`([\s\.\),:;"?!/-])""", MS) txt = r.sub('\\1\\2\\3', txt) return txt if __name__ == '__main__': blocks = Make_Blocks() if len(sys.argv) > 1: Process_Blocks(blocks, title=sys.argv[1]) else: Process_Blocks(blocks)