#!/usr/bin/python # Human XML: convert to and from a more human-friendly syntax for XML. # Adam Sampson # This is based on simple textual transformations. # Hux XML Notes # # B B is expanded recursively # <--A--> A cannot contain -- # <> must balance in A; A cannot start -- # Only ?> is recognised in A # <[[A]]> <[!CDATA[A]]> Only ]]> is recognised in A import sys, getopt class BufferedInput: def __init__(self, f): self.f = f self.buf = "" self.pos = 0 def getc(self): if self.pos == len(self.buf): self.buf = self.f.read(4096) self.pos = 0 if self.buf == "": return "" c = self.buf[self.pos] self.pos += 1 return c class BufferedOutput: def __init__(self, f): self.f = f self.buf = [] def flush(self): self.f.write("".join(self.buf)) self.buf = [] def put(self, s): self.buf.append(s) if len(self.buf) > 100: self.flush() def die(*s): print >>sys.stderr, "Fatal: " + "".join(map(str, s)) sys.exit(1) def copy_bang(bi, bo): level = 1 c = None while not (c == ">" and level == 0): c = bi.getc() if c == "": die("EOF inside ") elif c == "<": level += 1 elif c == ">": level -= 1 bo.put(c) def copy_until(bi, bo, watch): history = " " * len(watch) while history != watch: c = bi.getc() if c == "": die("EOF while looking for " + watch) bo.put(c) history = history[1:] + c def is_whitespace(c): return c in "\x20\x09\x0D\x0A" def hux_to_xml(bi, bo): stack = [] while 1: c = bi.getc() if c == "": break elif c == "<": bo.put(c) c = bi.getc() if c == "": die("Trailing <") elif c == ">": die("<> makes no sense") elif c == "!": bo.put(c) copy_bang(bi, bo) elif c == "?": bo.put(c) copy_until(bi, bo, "?>") elif c == "-": c = bi.getc() if c != "-": die("Expected <--") bo.put("!--") copy_until(bi, bo, "-->") elif c == "[": c = bi.getc() if c != "[": die("Expected <[[") copy_until(bi, bo, "]]>") else: bo.put(c) element = c name_done = False while 1: c = bi.getc() if c == "": die("EOF inside start tag") elif c == "/": bo.put(">") stack.append(element) break elif c == ">": bo.put("/>") break elif not name_done: bo.put(c) if is_whitespace(c): name_done = True else: element += c elif c in "\"'": bo.put(c) copy_until(bi, bo, c) else: bo.put(c) elif c == ">": if stack == []: die("Found > at top level") bo.put("") else: bo.put(c) if stack != []: die("Missing >s at end of file for: " + " ".join(stack)) def xml_to_hux(bi, bo): while 1: c = bi.getc() if c == "": break elif c == "<": c = bi.getc() if c == "": die("Trailing <") elif c == ">": die("<> makes no sense") elif c == "/": while c != ">": c = bi.getc() if c == "": die("EOF in closing tag") bo.put(">") elif c == "!": c = bi.getc() if c == "": die("EOF inside ": die(" makes no sense") elif c == "-": c = bi.getc() if c != "-": die("Expected ") else: bo.put("") elif c == "?": bo.put("") else: bo.put("<") bo.put(c) while 1: c = bi.getc() if c == "": die("EOF inside start tag") elif c == "/": c = bi.getc() if c != ">": die("Expected />") bo.put(">") break elif c == ">": bo.put("/") break elif c in "\"'": bo.put(c) copy_until(bi, bo, c) else: bo.put(c) elif c == ">": bo.put(">") else: bo.put(c) def usage(): print "Usage: hux [OPTION]... [FILE]..." print "Convert between the hux and standard XML encodings." print print " -d convert XML to hux (default: hux to XML)" print " -o FILENAME write output to FILENAME (default: stdout)" print " --help display this help and exit" print print "Report bugs to ." if __name__ == "__main__": try: opts, args = getopt.getopt(sys.argv[1:], "o:dh", ["help"]) except getopt.GetoptError: usage() sys.exit(1) outfn = None decode = False for (o, a) in opts: if o in ("-h", "--help"): usage() sys.exit(0) elif o == "-o": outfn = a elif o == "-d": decode = True def process(bi, bo): if decode: xml_to_hux(bi, bo) else: hux_to_xml(bi, bo) if outfn is None: out = sys.stdout else: out = open(outfn, "w") bo = BufferedOutput(out) if args == []: process(BufferedInput(sys.stdin), bo) else: for infn in args: f = open(infn) bi = BufferedInput(f) process(bi, bo) f.close() bo.flush() out.close()