#!/bin/perl # Feed a newsgroup archive on stdin; produces "brain food" on stdout. # Trained on ASR. # - ats@offog.org, 30th July 2001 $state = 0; $buf = ""; sub snarf { $buf .= "$_[0] "; } sub eject { $_ = $buf; FOO: { if (/^$/) { last FOO; } if (/:\/\//) { last FOO; } if (/@.*@/) { last FOO; } if (/<.*@.*>/) { last FOO; } if (length() < 20) { last FOO; } s/[^\d]//g; if (length() > length($buf)/10) { last FOO; } print "$buf\n"; } $buf = ""; } while (<>) { chomp; s/^\s+//; s/\s+$//; if ($state == 0) { if (/^From /) { eject(); $state = 1; next; } elsif (/^-- $/) { eject(); $state = 2; next; } elsif (/^[->}:#|]/) { next; } elsif (/^In article/) { next; } elsif (/wrote in/) { next; } elsif (/[A-Z] [A-Z] [A-Z]/) { next; } elsif (/:$/) { eject(); next; } elsif (/^$/) { eject(); next; } else { snarf($_); } } elsif ($state == 1) { if (/^$/) { $state = 0; next; } } elsif ($state == 2) { if (/^From /) { $state = 1; next; } } }