#!/usr/bin/python # Based on version control history, check that copyright notices mention all # the years in which files were altered. # Adam Sampson import sys, os, re from offog import warn def parse_darcs(f): """Parse the output of "darcs log --last N -v".""" changes = {} year = None for l in f.readlines(): l = l[:-1] # Tue May 19 13:58:49 BST 2009 Neil Brown ... m = re.match(r'^[A-Z].* (\d\d\d\d) ', l) if m is not None: year = m.group(1) continue # hunk ./common/Types.hs 204 m = re.match(r'^ +hunk \./([^ ]*) ', l) if m is not None: s = changes.setdefault(m.group(1), set()) s.add(year) return changes def scan_files(fns): """Given a list of filenames (some of which might not exist), read their copyright headers.""" changes = {} for fn in fns: try: f = open(fn) except IOError: continue s = set() changes[fn] = s n = 0 for l in f.readlines(): n += 1 l = l.strip() m = re.match(r'^Copyright \(C\) (.*) (.*)$', l) if m is not None: years = re.split(r',\s*', m.group(1)) if years != sorted(years): warn(fn, ":", n, ": years out of order") for year in years: if not re.match(r'^\d{4}$', year): warn(fn, ":", n, ": non-standard year list") author = m.group(2) for year in years: s.add(year) continue m = re.match(r'^Copyright.*20', l) if m is not None: warn(fn, ":", n, ": non-standard copyright line: '", l, "'") f.close() return changes if __name__ == "__main__": history = parse_darcs(sys.stdin) fns = history.keys() notices = scan_files(fns) for fn in sorted(fns): if fn not in notices: warn(fn, ": no longer exists") continue if len(notices[fn]) == 0: warn(fn, ": no copyright notices") continue missing = history[fn] - notices[fn] if len(missing) != 0: s = ", ".join(sorted(missing)) warn(fn, ": missing years: ", s) sys.stdout.write("sed 's/^\(Copyright.*\)\( University.*\)$/\\1, %s\\2/' %s\n" % (s, fn))