#!/usr/bin/env python # Do cheap-and-cheerful syntax highlighting for specially-marked alltt # environments in a LaTeX file. This will look for comments like this: # \end{alltt} % Haskell production: cStyleNames doName # and then go through the contents, removing any existing wrapper macros # and inserting new ones. # Adam Sampson import sys, os, re class HaskellSyn: keywords = [ "as", "case", "class", "data", "default", "deriving", "do", "else", "forall", "foreign", "hiding", "if", "import", "in", "infix", "infixl", "infixr", "instance", "let", "mdo", "module", "newtype", "of", "qualified", "then", "type", "where", ] operators = [ "<-", "->", ";", "=", "==", "::", ] class OccamSyn: keywords = [ "AFTER", "ALT", "AND", "ANY", "AT", "BITAND", "BITNOT", "BITOR", "BOOL", "BYTE", "BYTESIN", "CASE", "CHAN", "DATA", "ELSE", "FALSE", "FOR", "FROM", "FUNCTION", "IF", "IN", "INITIAL", "INLINE", "INT", "INT16", "INT32", "INT64", "IS", "MINUS", "MOSTNEG", "MOSTPOS", "NOT", "OF", "OFFSETOF", "OR", "PACKED", "PAR", "PLACE", "PLACED", "PLUS", "PORT", "PRI", "PROC", "PROCESSOR", "PROTOCOL", "REAL32", "REAL64", "REC", "RECORD", "RECURSIVE", "REM", "RESHAPES", "RESULT", "RESULT", "RETYPES", "ROUND", "SEQ", "SIZE", "SKIP", "STOP", "TIMER", "TIMES", "TRUE", "TRUNC", "TYPE", "VAL", "VALOF", "VECSPACE", "WHILE", "WORKSPACE", ] operators = [ ":=", "?", "!", "+", "-", "*", "/", ] class CSyn: keywords = [ "_Bool", "_Complex", "_Imaginary", "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "false", "float", "for", "goto", "if", "inline", "int", "long", "register", "restrict", "return", "short", "signed", "sizeof", "static", "struct", "switch", "true", "typedef", "union", "unsigned", "void", "volatile", "while", ] operators = [ "=", "+", "-", "*", "/", ] languages = { "Haskell": HaskellSyn(), "occam": OccamSyn(), "C": CSyn(), } def process(line, args): lang = languages[args[0]] specials = args[1:] out = "" # Strip out the existing macros. in_cmd = 0 i = 0 while i < len(line): if line[i] == "\\": i += 1 if line[i] < 'a' or line[i] > 'z': out += "\\" + line[i] else: while line[i] != "{" and i < len(line): i += 1 in_cmd += 1 elif line[i] == "}" and in_cmd > 0: in_cmd -= 1 else: out += line[i] i += 1 # Add new ones. for kw in lang.keywords: out = re.sub(r'\b' + kw + r'\b', "\\\\keyword{%s}" % kw, out) for op in lang.operators: out = out.replace(" %s " % op, " \\operator{%s} " % op) def fn(m): return "\\stringconst{%s}" % m.group(1) out = re.sub(r'(".*?[^\\]")', fn, out) out = re.sub(r'(\'.*?[^\\]\')', fn, out) def fn(m): return "\\numconst{%s}" % m.group(1) out = re.sub(r'\b(-?[0-9]+(\.[0-9]+([eE][0-9]+)?)?)\b', fn, out) macro = "highlight" for s in specials: if s[-1] == ":": macro = s[:-1] else: out = re.sub(r'\b' + s + r'\b', "\\\\%s{%s}" % (macro, s), out) return out def highlight(fn): f = open(fn) fo = open(fn + ".new", "w") at_eof = False while not at_eof: m = None while not at_eof: l = f.readline() if l == "": at_eof = True break fo.write(l) m = re.match(r'^\\begin{alltt}', l.rstrip()) if m is not None: break lines = [] while not at_eof: l = f.readline() if l == "": at_eof = True break m = re.match(r'^\\end{alltt}\s*(%\s*(\S.*))?$', l.rstrip()) if m is not None: if m.group(2) is not None: args = m.group(2).split() lines = [process(ll, args) for ll in lines] fo.write("".join(lines)) fo.write(l) break lines.append(l) f.close() fo.close() os.rename(fn, fn + "~") os.rename(fn + ".new", fn) if __name__ == "__main__": for fn in sys.argv[1:]: highlight(fn)