#!/usr/bin/env perl # Convert Telsa's diary on stdin to an RSS feed on stdout. # Usage: perl telsa-rss.pl diary.rss # Adam Sampson use strict; # This isn't a CGI script; we just want to use the CGI module's # HTML-escaping routines. use CGI qw/:standard/; # The URL of the diary page, so we can construct links to it. my $diary_url = "http://www.linux.org.uk/~telsa/Diary/diary.html"; # The maximum number of articles to include in the RSS. my $max_articles = 10; # Slurp the entire page into memory. $/ = undef; my $html = <>; # Pick out all the articles. If the page format changes, this regexp # will need altering to match. my @articles; while ($html =~ m/
(.*?)<\/strong><\/a><\/dt>.*?
\s*(.*?)\s*<\/dd>/sgi) { push @articles, [$1, $2, $3]; } # Now generate the RSS. See # for the RSS 2.0 specification. print < The more accurate diary. Really. $diary_url Telsa Gwynne's diary. en-gb EOF charset("UTF-8"); my $count = 0; foreach my $article (@articles) { my ($id, $title, $body) = @$article; $title = escapeHTML($title); $body = escapeHTML($body); print < $title $diary_url#$id $body EOF last if ++$count >= $max_articles; } print < EOF