#!/usr/bin/env perl
# Convert Telsa's diary on stdin to an RSS feed on stdout.
# Usage: perl telsa-rss.pl <diary.html >diary.rss
# Adam Sampson <ats@offog.org>

use strict;
# This isn't a CGI script; we just want to use the CGI module's
# HTML-escaping routines.
use CGI qw/:standard/;

# The URL of the diary page, so we can construct links to it.
my $diary_url = "http://www.linux.org.uk/~telsa/Diary/diary.html";

# The maximum number of articles to include in the RSS.
my $max_articles = 10;

# Slurp the entire page into memory.
$/ = undef;
my $html = <>;

# Pick out all the articles. If the page format changes, this regexp
# will need altering to match.
my @articles;
while ($html =~ m/<dt><a name="(.*?)"><strong>(.*?)<\/strong><\/a><\/dt>.*?<dd>\s*(.*?)\s*<\/dd>/sgi) {
	push @articles, [$1, $2, $3];
}

# Now generate the RSS. See <http://blogs.law.harvard.edu/tech/rss>
# for the RSS 2.0 specification.
print <<EOF;
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
	<channel>
		<title>The more accurate diary. Really.</title>
		<link>$diary_url</link>
		<description>Telsa Gwynne's diary.</description>
		<language>en-gb</language>
EOF

charset("UTF-8");
my $count = 0;
foreach my $article (@articles) {
	my ($id, $title, $body) = @$article;
	$title = escapeHTML($title);
	$body = escapeHTML($body);
	print <<EOF;
		<item>
			<title>$title</title>
			<link>$diary_url#$id</link>
			<description>$body</description>
		</item>
EOF
	last if ++$count >= $max_articles;
}

print <<EOF;
	</channel>
</rss>
EOF
