#!/bin/python # News statistics. # Copyright 2000, 2001, 2002 Adam Sampson # Ristribute under the terms of the GNU # General Public License version 2, or, at your option, any later version. import sys import os import nntplib import re import string import time posters = {} numlines = {} excluded = {} removables = [ "nospamplease", "-nospam-", "nospam" ] # Put people you want to exclude from the listing in here. exclude = [ ] # People read from "tagged" have a star by their names. try: tagged = map(string.strip, open("tagged").readlines()) except IOError: tagged = [] n = nntplib.NNTP("news.ukc.ac.uk") group = sys.argv[1] resp, count, first, last, name = n.group(group) count = string.atoi(count) resp, senders = n.xhdr("from", first + "-" + last) resp, lines = n.xhdr("lines", first + "-" + last) r = re.compile(r"([\w\$\.]+@[-a-z0-9]+\.[-a-z0-9\.]+).*", re.IGNORECASE) tlines = 0 for n in range(len(senders)): id, sender = senders[n] id, nlines = lines[n] try: nlines = string.atoi(nlines) except ValueError: nlines = 0 try: mailaddr = string.lower(r.search(sender).group(1)) except AttributeError: mailaddr = "unknown" for x in removables: mailaddr = string.replace(mailaddr, x, "") if mailaddr in exclude: excluded[mailaddr] = 1 continue if posters.has_key(mailaddr): posters[mailaddr] = posters[mailaddr] + 1 numlines[mailaddr] = numlines[mailaddr] + nlines else: posters[mailaddr] = 1 numlines[mailaddr] = nlines tlines = tlines + nlines people = posters.keys() def postersfunc(a, b): if posters[a] < posters[b]: return 1 if posters[a] == posters[b]: return 0 return -1 def numlinesfunc(a, b): if numlines[a] < numlines[b]: return 1 if numlines[a] == numlines[b]: return 0 return -1 def lenfunc(a, b): la = (1.0*numlines[a])/posters[a] lb = (1.0*numlines[b])/posters[b] if la < lb: return 1 if la == lb: return 0 return -1 def printlisting(): print " %-30s %5s %5s %5s %5s %5s" % ("email address", "posts", "%post", "lines", "%line", "len") for n in range(len(people)): x = people[n] if x in tagged: k = "*" else: k = " " print "%3d%s %-30s %5d %5.1f %5d %5.1f %5.1f" % (n+1, k, x, posters[x], (100.0*posters[x])/count, numlines[x], (100.0*numlines[x])/tlines, (1.0*numlines[x])/posters[x]) print "Group summary for " + group + ": " + str(count) + " articles, " + str(tlines) + " lines." print "Computed at " + time.asctime(time.localtime(time.time())) + "." print print "Sorted by number of posts:" people.sort(postersfunc) printlisting() print print "Sorted by number of lines:" people.sort(numlinesfunc) printlisting() print print "Sorted by mean post length:" people.sort(lenfunc) printlisting() if len(excluded.keys()) > 0: print print "Excluded from the charts:" for x in excluded.keys(): print x