#!/bin/python
# News statistics.
# Copyright 2000, 2001, 2002 Adam Sampson <azz@gnu.org>
# Ristribute under the terms of the GNU
# General Public License version 2, or, at your option, any later version.

import sys
import os
import nntplib
import re
import string
import time

posters = {}
numlines = {}
excluded = {}

removables = [ "nospamplease", "-nospam-", "nospam" ]
# Put people you want to exclude from the listing in here.
exclude = [ ]

# People read from "tagged" have a star by their names.
try:
	tagged = map(string.strip, open("tagged").readlines())
except IOError:
	tagged = []
n = nntplib.NNTP("news.ukc.ac.uk")
group = sys.argv[1]
resp, count, first, last, name = n.group(group)
count = string.atoi(count)
resp, senders = n.xhdr("from", first + "-" + last)
resp, lines = n.xhdr("lines", first + "-" + last)
r = re.compile(r"([\w\$\.]+@[-a-z0-9]+\.[-a-z0-9\.]+).*", re.IGNORECASE)
tlines = 0
for n in range(len(senders)):
	id, sender = senders[n]
	id, nlines = lines[n]
	try:
		nlines = string.atoi(nlines)
	except ValueError:
		nlines = 0
	try:
		mailaddr = string.lower(r.search(sender).group(1))
	except AttributeError:
		mailaddr = "unknown"
	for x in removables: mailaddr = string.replace(mailaddr, x, "")
	if mailaddr in exclude:
		excluded[mailaddr] = 1
		continue
	if posters.has_key(mailaddr):
		posters[mailaddr] = posters[mailaddr] + 1
		numlines[mailaddr] = numlines[mailaddr] + nlines
	else:
		posters[mailaddr] = 1
		numlines[mailaddr] = nlines
	tlines = tlines + nlines
people = posters.keys()
def postersfunc(a, b):
	if posters[a] < posters[b]: return 1
	if posters[a] == posters[b]: return 0
	return -1
def numlinesfunc(a, b):
	if numlines[a] < numlines[b]: return 1
	if numlines[a] == numlines[b]: return 0
	return -1
def lenfunc(a, b):
	la = (1.0*numlines[a])/posters[a]
	lb = (1.0*numlines[b])/posters[b]
	if la < lb: return 1
	if la == lb: return 0
	return -1
def printlisting():
	print "    %-30s %5s %5s %5s %5s %5s" % ("email address", "posts", "%post", "lines", "%line", "len")
	for n in range(len(people)):
		x = people[n]
		if x in tagged:
			k = "*"
		else:
			k = " "
		print "%3d%s %-30s %5d %5.1f %5d %5.1f %5.1f" % (n+1, k, x, posters[x], (100.0*posters[x])/count, numlines[x], (100.0*numlines[x])/tlines, (1.0*numlines[x])/posters[x])

print "Group summary for " + group + ": " + str(count) + " articles, " + str(tlines) + " lines."
print "Computed at " + time.asctime(time.localtime(time.time())) + "."
print
print "Sorted by number of posts:"
people.sort(postersfunc)
printlisting()
print
print "Sorted by number of lines:"
people.sort(numlinesfunc)
printlisting()
print
print "Sorted by mean post length:"
people.sort(lenfunc)
printlisting()
if len(excluded.keys()) > 0:
	print
	print "Excluded from the charts:"
	for x in excluded.keys(): print x