#!/usr/bin/env python # Code to decode an ITS mailbox into mbox format. # Adam Sampson, import sys, mailbox, time, rfc822, re, string class ITSMailbox(mailbox._Mailbox): """Read an ITS-style semi-babyl mailbox, such as those included in the GPL release of the ITS source code, possibly stripping a GPL notice from the top.""" def _search_start(self): while 1: pos = self.fp.tell() line = self.fp.readline() if pos == 0 and line[:14] == "Copyright (c) ": # It's got the GPL header at the top. Read until a line of dashes. while line[0] != "-": line = self.fp.readline() if not line: raise EOFError if (line[0] >= 'A' and line[0] <= 'Z') or (line[0] >= 'a' and line[0] <= 'z'): self.fp.seek(pos) return def _search_end(self): while 1: pos = self.fp.tell() line = self.fp.readline() if not line: return if line[0] == '\037': self.fp.seek(pos) return def parsedate(ds): """Do the same as rfc822.parsedate, but cope with various formats that were in use in the late 70s and early 80s as well.""" date = rfc822.parsedate(ds) if not date: # "Tue Oct 10 12:22:02 2000-EDT" if ds[-4] == "-": date = rfc822.parsedate(string.replace(ds, "-", " ")) if not date: # "Tue,11 Oct 85 12:00:00 EST" if string.find(ds, ",") > -1: date = rfc822.parsedate(string.replace(ds, ",", " ")) if not date: # "25 Sep 1981 0023-GMT". m = re.match(r"(\d+) (\w+) (\d+) (\d\d)(\d\d)-(\w+)", ds) if m: date = rfc822.parsedate("Mon " + m.group(2) + " " + m.group(1) + " " + m.group(4) + ":" + m.group(5) + ":00 " + m.group(3) + " " + m.group(6)) if not date: # "22 September 1982 23:21-EDT (Wednesday)" # "18 Dec 81 3:59:26-EST (Fri)" m = re.match(r"(\d+) (\w\w\w)\w* (\d+)[ ]+(\d+):(\d\d)(:\d\d)?-(\w+) \((\w\w\w)\w*\)", ds) if m: sec = m.group(6) if not sec: sec = ":00" year = m.group(3) if len(year) == 2: year = "19" + year date = rfc822.parsedate(m.group(8) + " " + m.group(2) + " " + m.group(1) + " " + m.group(4) + ":" + m.group(5) + sec + " " + year + " " + m.group(7)) if not date: # "5-DEC-1981 16:02:44.12" if re.match(r"\d+-\w+", ds): date = rfc822.parsedate(string.replace(ds, "-", " ")[:-3]) if not date: # "19 Mar 1981 at 0318-PST" m = re.match(r"(\d+ \w+ \d+) at (\d\d)(\d\d)-(\w+)", ds) if m: date = rfc822.parsedate(m.group(1) + " " + m.group(2) + ":" + m.group(3) + ":00 " + m.group(4)) if not date: # "25 Sep 87 1540 PDT" m = re.match(r"(\d+ \w+ \d+)[ ]+(\d\d)(\d\d) (\w+)$", ds) if m: date = rfc822.parsedate(m.group(1) + " " + m.group(2) + ":" + m.group(3) + ":00 " + m.group(4)) if not date: # "15 Mar 1981 (Sunday) 2100-EDT" m = re.match(r"(\d+ \w+ \d+) \((\w\w\w)\w*\) (\d\d)(\d\d)-(\w+)", ds) if m: date = rfc822.parsedate(m.group(1) + " " + m.group(3) + ":" + m.group(4) + ":00 " + m.group(5)) if not date: # "Monday, September 12, 1983 5:40AM-EDT" m = re.match(r"\w+, (\w\w\w)\w* (\d+), (\d+) (\d+):(\d+)(\w\w)-(\w+)", ds) if m: hour = string.atoi(m.group(4)) if string.lower(m.group(6)) == "pm": hour += 12 date = rfc822.parsedate(m.group(1) + " " + m.group(2) + " " + str(hour) + ":" + m.group(5) + ":00 " + m.group(3) + " " + m.group(7)) if not date: # "Jun 17, 1985 10:38am" m = re.match(r"(\w\w\w) (\d+), (\d+) (\d+):(\d+)(\w\w)", ds) if m: hour = string.atoi(m.group(4)) if string.lower(m.group(6)) == "pm": hour += 12 date = rfc822.parsedate(m.group(1) + " " + m.group(2) + " " + str(hour) + ":" + m.group(5) + ":00 " + m.group(3)) if not date: print >>sys.stderr, "unparsable date: " + ds return date def writeunixmailbox(mb, output): """Write a mailbox object to the output file in Unix mailbox format.""" while 1: msg = mb.next() if not msg: break ds = msg.getheader("date") if ds: # The message had a date stamp. date = time.asctime(parsedate(ds)) name, addr = msg.getaddr("from") output.write("From " + addr + " " + date + "\n") for hdr in msg.headers: output.write(hdr) output.write("\n") msg.rewindbody() for line in msg.fp.readlines(): if line[:5] == "From ": line = ">" + line if line[-1] != "\n": line += "\n" output.write(line) # Insert an extra blank line if it didn't end with one. if line != "\n": output.write("\n") else: # The message didn't have a datestamp, so it's probably # in the old ITS format. text = reduce(lambda a,b: a+b, msg.headers) msg.rewindbody() r = msg.fp.read() if r and r != "": text += "\n" + r l = string.split(text, "\n") # "rms@MIT-ML (Sent by ___014@MIT-ML) 01/29/83 17:48:54" # "CSTACY, PGS, ELLEN@MIT-MC (Sent by CSTACY@MIT-MC) 10/15/82 15:13:08 Re: for future reference" # "Sheldon Furst @MIT-MC (Sent by ___065@MIT-MC) 09/30/79 19:33:28" m = re.match(r"^(.*@[\w-]+) (\(.*\) )?(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)( )?(.*)$", l[0]) if not m: print >>sys.stderr, ">>" + text + "<<" raise "Non-RFC822 message with unparsable header" fromaddr = m.group(1) month = string.atoi(m.group(3)) date = time.asctime(parsedate(m.group(4) + " " + ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][month - 1] + " " + m.group(6) + ":" + m.group(7) + ":" + m.group(8) + " 19" + m.group(5))) output.write("From " + string.replace(fromaddr, " ", "") + " " + date + "\n") output.write("From: " + fromaddr + "\n") output.write("Date: " + date + "\n") output.write("Subject: " + m.group(10) + "\n") flag = 0 for line in l[1:]: if flag == 0 and string.find(line, ":") == -1: flag = 1 output.write("\n") output.write(line + "\n") if line != "": output.write("\n") if __name__ == "__main__": for arg in sys.argv: f = open(arg, "r") writeunixmailbox(ITSMailbox(f), sys.stdout)