#!/usr/bin/env python """ Parse the output of a PostgreSQL dump from an RT system, and upload the tickets contained therein to Trac. Adam Sampson The dump was created with: select * from tickets,transactions,attachments where queue=8 and transactions.objectid=tickets.effectiveid and status!='deleted' and attachments.transactionid=transactions.id; """ ignore_tickets = (10585, 19843, 19934, 19972, 20024) trac_url = "https://projects-dev.cs.kent.ac.uk/projects/swigoccam/trac/" trac_username = "ats" trac_password = "" import sys, re, mechanize, urllib2 def die(*s): print >>sys.stderr, "".join(map(str, s)) sys.exit(1) def parse_dump(f): cont_re = re.compile('^\s+: (.*)$') field_re = re.compile('^(\S+)\s*\| (.*)$') f.readline() # The tables in order -- fortunately the first column in each is "id", # so it's easy to tell where they start! table_names = ("unknown", "tickets", "transactions", "attachments") n = 0 fields = {} last = None while 1: l = f.readline() if l == "" or l.startswith("-[ RECORD"): yield fields n = 0 fields = {} last = None if l == "": break else: continue l = l[:-1] m = cont_re.match(l) if m is not None and last is not None: fields[last] += "\n" + m.group(1) continue m = field_re.match(l) if m is not None: field = m.group(1) if field == "id": n += 1 field = table_names[n] + "_" + field fields[field] = m.group(2) last = field continue if l.strip() != "": die("Unrecognised line: " + l) def collate(d): tables = {} for row in d: for (k, v) in row.items(): (table, field) = k.split("_", 1) t = tables.setdefault(table, {}) id = row[table + "_id"] item = t.setdefault(id, {}) item[field] = v return tables def sanitise(s): s = s.replace("@", " at ").replace("{{{", "<<<").replace("}}}", ">>>") return s def output(tables): tr_a = {} for (id, att) in tables['attachments'].items(): if att['content'].startswith("This message has been generated automatically"): continue tr_a.setdefault(att['transactionid'], []).append(id) t_a = {} for (id, trans) in tables['transactions'].items(): t_a.setdefault(trans['objectid'], []).extend(tr_a.get(id, [])) tickets = tables['tickets'].values()[:] tickets.sort(lambda l, r: cmp(int(l['effectiveid']), int(r['effectiveid']))) header_re = re.compile(r'^([^:]*):\s+(.*)$') n = 0 for ticket in tickets: id = ticket['effectiveid'] if int(id) in ignore_tickets: continue atts = [tables['attachments'][i] for i in t_a[id]] atts.sort(lambda l, r: cmp(l['created'], r['created'])) subject = sanitise(ticket['subject']) print "Ticket #%s (%s): %s" % (id, ticket["status"], subject) attachments = [] descr = ["Imported automatically from RT ticket %s.\n" % id] seen = {} for att in atts: ct = att['contenttype'] if ct in ('multipart/mixed', 'multipart/signed', 'application/pgp-signature'): continue header_lines = att['headers'].split("\n") body = att['content'] if att['contentencoding'] not in ("", "none"): body = body.decode(att['contentencoding']) if body.startswith('Email:') or body.startswith('Cc:') or body[:80].find("was acted upon.") != -1: ls = body.split("\n") i = 0 while i < len(ls): if ls[i] == "": header_lines += ls[:i] ls = ls[i + 1:] break i += 1 body = "\n".join(ls) body = body.strip() if body in seen: continue seen[body] = True headers = {} for l in header_lines: l = l.strip() m = header_re.match(l) if m is not None: headers[m.group(1).lower()] = m.group(2) title = att['created'] if ct != "text/plain" or att['filename'] != '': if att['filename'] != '': afn = att['filename'] filename = "%s_%s" % (att['id'], att['filename']) else: afn = "%s.txt" % att['id'] filename = "%s.txt" % att['id'] f = open("attachments/" + filename, "w") f.write(body) f.close() print " Attachment: " + filename attachments.append((afn, ct, "attachments/" + filename)) continue descr += ["\n== %s ==\n\n" % sanitise(title)] descr += ["{{{\n", sanitise(body), "\n}}}\n"] f = open("out/" + id, "w") f.write("Subject: " + subject + "\n\n") f.write("".join(descr)) f.close() print " Creating ticket" b = mechanize.Browser() b.set_handle_robots(False) b.add_password(trac_url, trac_username, trac_password) b.open(trac_url + "newticket") assert b.viewing_html() b.select_form(nr = 1) b["description"] = "".join(descr) b["summary"] = subject resp = b.submit(nr = 1) assert b.viewing_html() data = resp.read() m = re.search(r'Ticket #(\d+)', data) if m is None: die("Can't find ticket number") ticket_num = m.group(1) print " ... success, ticket number %s" % ticket_num for (filename, ct, realfn) in attachments: print " Attaching", filename b.open(trac_url + "ticket/" + ticket_num) assert b.viewing_html() b.select_form(nr = 2) b.submit() # "Attach File" assert b.viewing_html() b.select_form(nr = 1) f = open(realfn) b.add_file(f, content_type = ct, filename = filename, name = "attachment") b.submit() assert b.viewing_html() f.close() if ticket["status"] == "resolved": print " Resolving" resp = b.open(trac_url + "ticket/" + ticket_num) assert b.viewing_html() f = open("blah", "w") f.write(resp.read()) f.close() num_forms = 0 for form in b.forms(): num_forms += 1 b.select_form(nr = num_forms - 1) b["action"] = ["resolve"] # ClientForm bug: it chops the newline off the end of the description, # so we have to set it again. b["description"] = "".join(descr) b["summary"] = subject b.submit(nr = 1) assert b.viewing_html() n += 1 #if n > 5: # break if __name__ == "__main__": d = parse_dump(sys.stdin) tables = collate(d) output(tables)