#!/usr/bin/perl -w # vim:foldmethod=marker # Manage a collection of RDF files describing books. # Adam Sampson use strict; use RDF::Redland; use Net::Amazon; use ZOOM; use Term::ReadLine; use MARC::Record; use POSIX; use Encode; use XML::Writer; use IO::File; #{{{ configuration my $db_dir = "/home/azz/Work/library"; my $items_uri = "http://offog.org/library/items/"; #}}} #{{{ RDF namespaces my %ns = ( "dc" => "http://purl.org/dc/elements/1.1/", "alms" => "http://offog.org/xmlns/alms/", ); #}}} #{{{ globals my $term = new Term::ReadLine("ALMS"); my $id = undef; my $model = undef; my $item_node = undef; my @sticky = (); #}}} #{{{ helper functions sub db_fn ($) { my ($id) = @_; return "$db_dir/$id"; } sub uri ($) { my ($uri) = @_; return new RDF::Redland::URI($uri); } #}}} #{{{ fields and their mapping to RDF # FIXME should have a defined display order for these my %fields = ( "title" => ["title", "dc:title"], "series" => ["series", "alms:series"], "author" => ["author", "dc:creator"], "code" => ["ISBN/UPC code", "dc:identifier"], "publisher" => ["publisher", "dc:publisher"], "published" => ["date published", "alms:datePublished"], "medium" => ["medium", "dc:format"], "listprice" => ["list price", "alms:listPrice"], "pricepaid" => ["price paid", "alms:pricePaid"], "notes" => ["notes", "alms:notes"], "classmark" => ["LoC classmark", "alms:classmark"], "source" => ["source", "dc:source"], "condition" => ["condition", "alms:condition"], "catalogued" => ["date catalogued", "alms:dateCatalogued"], "location" => ["location", "alms:location"], ); my %preds = (); map { $fields{$_}->[1] =~ /(.*):(.*)/; $preds{$_} = uri($ns{$1} . $2); } (keys %fields); my $id_pred = uri($ns{'alms'} . "itemNumber"); my $read_pred = uri($ns{'alms'} . "read"); #}}} #{{{ status display sub start_status ($) { my ($msg) = @_; $| = 1; print "[$msg]"; } sub end_status () { # FIXME this ought to use Term::Cap print "\r\033[0K"; $| = 0; } #}}} #{{{ searching for items #{{{ Amazon my $amazon = undef; sub search_amazon (%) { unless (defined $amazon) { open F, "<$ENV{HOME}/.amazon_token" or die "Cannot open ~/.amazon_token\n"; my $token = ; chomp $token; close F; $amazon = Net::Amazon->new(token => $token, max_pages => 1); } start_status("Doing Amazon search..."); my $resp = $amazon->search(@_); end_status(); unless ($resp->is_success()) { print "Error from Amazon: " . $resp->message() . "\n"; return (); } my @results = (); for my $prop ($resp->properties) { my %item = (); my $type = $prop->Catalog(); if ($type eq 'Book') { $item{"title"} = $prop->title(); $item{"author"} = join(", ", $prop->authors()); $item{"publisher"} = $prop->publisher(); $item{"code"} = $prop->isbn() ? "ISBN " . $prop->isbn() : ""; } elsif ($type eq 'Music' or $type eq 'Classical') { $item{"title"} = $prop->album(); $item{"author"} = join(", ", $prop->artists()); $item{"publisher"} = $prop->label(); $item{"code"} = $prop->upc() ? "UPC " . $prop->upc() : ""; } else { print "Ignoring Amazon result " . $prop->ProductName() . " because catalog $type is unknown.\n"; next; } if ($item{"title"} =~ /^(.*)\s+\((.+)\)$/) { $item{"title"} = $1; $item{"series"} = $2; } $item{"published"} = $prop->year(); $item{"medium"} = $prop->Media(); $item{"listprice"} = $prop->ListPrice(); push @results, \%item; } return @results; } #}}} #{{{ Z39.50 my $z3950 = undef; sub search_loc ($) { my ($cql) = @_; unless (defined $z3950) { start_status("Connecting to LoC..."); $z3950 = new ZOOM::Connection("z3950.loc.gov:7090/voyager"); $z3950->option(preferredRecordSyntax => "usmarc"); end_status(); } my $set; eval { $set = $z3950->search(new ZOOM::Query::PQF($cql)); }; if ($@) { print "LoC Z39.50 error: code " . $@->code() . ": " . $@->message() . "\n"; return (); } my @results = (); foreach my $i (0..($set->size() - 1)) { my %item = (); my $record = $set->record($i); print "MARC: " . $record->render() . "\n"; my $marc = MARC::Record->new_from_usmarc($record->raw); my @titles = (); foreach my $title ($marc->subfield("245", "a"), $marc->subfield("245", "b")) { $title =~ s/ [\/:]$//; push @titles, $title; } $item{"title"} = join(": ", @titles); my @authors = (); foreach my $author ($marc->subfield("100", "a"), $marc->subfield("700", "a")) { $author =~ s/([^,]*), (.*)/$2 $1/; push @authors, $author; } $item{"author"} = join(", ", @authors); $item{"published"} = $marc->publication_date(); $item{"publisher"} = $marc->subfield("260", "b") || ""; $item{"publisher"} =~ s/\s*,$//; my $rawcode = $marc->subfield("020", "a"); $item{"code"} = $rawcode ? "ISBN $rawcode" : ""; $item{"listprice"} = $marc->subfield("350", "a") || "" ; $item{"classmark"} = $marc->subfield("050", "a") || ""; push @results, \%item; } return @results; } #}}} sub find_from_isbn ($) { my ($isbn) = @_; my @results; @results = search_amazon(asin => $isbn, locale => "uk"); return @results if scalar(@results) != 0; @results = search_amazon(asin => $isbn); return @results if scalar(@results) != 0; @results = search_loc("\@attr 1=7 $isbn"); return @results if scalar(@results) != 0; return (); } sub find_from_upc ($) { my ($upc) = @_; my @results; @results = search_amazon(upc => $upc, mode => "music", locale => "uk"); return @results if scalar(@results) != 0; @results = search_amazon(upc => $upc, mode => "music"); return @results if scalar(@results) != 0; return (); } #}}} #{{{ displaying items sub show_item ($) { my ($item) = @_; foreach my $field (sort keys %$item) { printf "\%-20s: \%s\n", $fields{$field}->[0], $item->{$field}; } } sub show_model () { return unless defined $id; print "\n"; my @read = (); my %item = (); my $pattern = new RDF::Redland::Statement($item_node, undef, undef); foreach my $statement ($model->find_statements($pattern)) { my $pred = $statement->predicate()->uri(); my $obj = decode("UTF-8", $statement->object()->as_string()); my $found = 0; foreach my $key (keys %preds) { if ($preds{$key}->equals($pred)) { $item{$key} = $obj; $found = 1; last; } } if ($id_pred->equals($pred)) { if (int($obj) != $id) { print "Warning: item number $obj does not match expected $id.\n"; } $found = 1; } if ($read_pred->equals($pred)) { push @read, $obj; $found = 1; } unless ($found) { print "(Unknown predicate " . $pred->as_string . " in RDF not shown.)\n"; } } show_item(\%item); if (@read > 0) { print "\nreading log:\n"; foreach my $read (@read) { print " $read\n"; } } } #}}} #{{{ Item model operations sub clear_model () { my $storage = new RDF::Redland::Storage(); $model = new RDF::Redland::Model($storage, ""); $item_node = new RDF::Redland::Node(uri("$items_uri$id")); } sub new_item () { unless (defined $id) { $id = 1; } while (-e db_fn($id)) { $id++; } clear_model(); add_field($id_pred, "$id"); print "Created new item $id.\n"; } sub load_item ($) { my ($new_id) = @_; unless (-e db_fn($new_id)) { print "Item $new_id does not exist.\n"; return; } $id = $new_id; clear_model(); my $parser = new RDF::Redland::Parser("rdfxml"); $parser->parse_into_model(uri("file:" . db_fn($id)), undef, $model); } sub save_model ($$) { my ($model, $fn) = @_; my $serializer = new RDF::Redland::Serializer("rdfxml"); for my $n (keys %ns) { $serializer->set_namespace($n, $ns{$n}); } $serializer->serialize_model_to_file($fn, undef, $model); } sub save_item () { save_model($model, db_fn($id)); } #}}} #{{{ RDF statement operations sub add_field ($$) { my ($pred, $s) = @_; if ($s) { my $node = new RDF::Redland::Node(encode("UTF-8", $s)); $model->add_statement($item_node, $pred, $node); } } sub remove_statements ($) { my ($pattern) = @_; foreach my $statement ($model->find_statements($pattern)) { $model->remove_statement($statement); } } #}}} #{{{ Searching the whole database sub get_all_items () { my $storage = new RDF::Redland::Storage(); my $all_model = new RDF::Redland::Model($storage, ""); my $parser = new RDF::Redland::Parser("rdfxml"); opendir DIR, $db_dir || die "cannot list database directory"; foreach my $fn (readdir DIR) { next unless $fn =~ /^\d+$/; $parser->parse_into_model(uri("file:$db_dir/$fn"), undef, $all_model); } closedir DIR; return $all_model; } sub search_all_items ($$) { my ($field, $find) = @_; $find =~ s/"/\\"/g; my $qs = join("", map { "PREFIX $_: <$ns{$_}>\n" } (keys %ns)) . "SELECT ?id ?title ?value " . "WHERE { ?item dc:title ?title . " . "?item alms:itemNumber ?id . " . "?item $fields{$field}->[1] ?value " . "FILTER regex(?value, \"$find\", \"i\") }"; my $all_model = get_all_items(); my $query = new RDF::Redland::Query($qs, undef, undef, "sparql"); my $results = $query->execute($all_model); my @list = (); while (!$results->finished) { my $item_id = int($results->binding_value(0)->as_string); my $title = $results->binding_value(1)->as_string; my $value = $results->binding_value(2)->as_string; push @list, [$item_id, $title, $value]; $results->next_result; } $id = undef; print "\nFound " . scalar @list . " results.\n"; foreach my $item (sort { $a->[0] <=> $b->[0] } @list) { my ($item_id, $title, $value) = @$item; print "\n"; if (scalar @list == 1) { load_item($item_id); last; } printf "\%5d title: \%s\n", $item_id, $title; if ($field ne 'title') { print " $field: $value\n"; } } } #}}} #{{{ XML output sub normalise_authors ($) { my ($in) = @_; my @names = (); foreach my $name (split(/,\s+/, $in)) { $name = lc $name; $name =~ s/\./ /g; $name =~ s/\s+/ /g; $name =~ s/^(sir|lord|the \S+) //; $name =~ s/ *$//; if ($name =~ /^(.*) (\S+)$/) { my ($fore, $sur) = ($1, $2); $fore =~ s/\b(\S)\S+/$1/g; $name = "$sur $fore"; } $name =~ s/\b(.)/uc $1/eg; push @names, $name; } return join("; ", @names); } sub save_model_xml ($$) { my ($model, $fn) = @_; my $output = new IO::File(">$fn"); my $xml = new XML::Writer(OUTPUT => $output, ENCODING => "utf-8", DATA_MODE => 1, DATA_INDENT => 2); $xml->xmlDecl(); my @ns_decls = (); foreach my $n (keys %ns) { push @ns_decls, "xmlns:$n" => $ns{$n}; } $xml->startTag("alms:items", @ns_decls); my $pattern = new RDF::Redland::Statement(undef, $id_pred, undef); foreach my $st ($model->find_statements($pattern)) { my $item_id = $st->object->as_string; $xml->startTag("alms:item", "alms:itemNumber" => $item_id); $pattern = new RDF::Redland::Statement($st->subject, undef, undef); foreach my $st ($model->find_statements($pattern)) { my $pred = $st->predicate->uri(); foreach my $field (keys %preds) { my $data = decode("UTF-8", $st->object->as_string); if ($pred->equals($preds{$field})) { $xml->dataElement($fields{$field}->[1], $data); if ($field eq "author") { $xml->dataElement("alms:normalisedCreator", normalise_authors($data)); } last; } } } $xml->endTag("alms:item"); } $xml->endTag("alms:items"); $xml->end(); $output->close(); } #}}} #{{{ add sub add (@) { my $use; my $n = scalar(@_); foreach my $item (@_) { foreach my $key (keys %$item) { delete $item->{$key} unless $item->{$key}; } } if ($n == 0) { print "No matches found.\n"; return; } elsif ($n == 1) { $use = $_[0]; } else { print "Multiple matches found.\n"; my $i = 0; foreach my $item (@_) { print "\nChoice $i:\n"; show_item($item); } do { my $line = $term->readline("Which one (or \"none\")? "); if ($line eq 'none') { print "\nNo matches found.\n"; return; } $i = int($line); } while ($i >= 0 && $i < $n); $use = $_[$i]; } $use->{"catalogued"} = strftime('%Y-%m-%dT%H:%MZ', gmtime()); new_item(); foreach my $key (keys %$use) { add_field($preds{$key}, $use->{$key}); } save_item(); foreach my $line (@sticky) { command($line); } } #}}} #{{{ command parser sub command ($); sub command ($) { my ($line) = @_; if ($line =~ /^([0-9X]{10}|[0-9X]{13})$/) { add(find_from_isbn($line)); } elsif ($line =~ /^[0-9]{12}$/) { add(find_from_upc($line)); } elsif ($line =~ /^=(.*)$/) { my @results = ({ "title" => $1 }); add(@results); } elsif ($line =~ /^\+(.*)$/) { push @sticky, $1; } elsif ($line =~ /^-(.*)$/) { @sticky = grep { $_ !~ /^$1/ } @sticky; } elsif ($line =~ /^\/(\S+)\s+(\S.*)$/) { my ($field, $find) = ($1, $2); if (exists $fields{$field}) { search_all_items($field, $find); } else { print "Unknown field $field.\n"; } } elsif ($line =~ /^(\S+)(?:\s+(.*))?$/) { my ($command, $arg) = ($1, $2); if (exists $fields{$command} and defined $id) { remove_statements(new RDF::Redland::Statement($item_node, $preds{$command}, undef)); add_field($preds{$command}, $arg); save_item(); } elsif ($command eq "read") { $arg = "unknown" unless defined $arg; add_field($read_pred, $arg); save_item(); } elsif ($command eq "unread") { my $obj = $arg ? new RDF::Redland::Node($arg) : undef; remove_statements(new RDF::Redland::Statement($item_node, $read_pred, $obj)); save_item(); } elsif ($command eq "quit") { last; } elsif ($command eq "select") { load_item(int($arg)); } elsif ($command eq "delete" and defined $id) { unlink(db_fn($id)); print "Deleted item $id.\n"; $id = undef; } elsif ($command eq "dumpdb" and defined $arg) { save_model(get_all_items(), $arg); } elsif ($command eq "dumpxml" and defined $arg) { save_model_xml(get_all_items(), $arg); } elsif ($command eq "foreach" and (defined $arg) and $arg =~ /^(\d+)-(\d+)\s+(.*)$/) { my ($from, $to, $cmd) = ($1, $2, $3); foreach my $i (int($from)..int($to)) { command("select $i"); command($cmd); } } else { print <[0]; } print < 0) { command(join(" ", @ARGV)); return; } while (1) { show_model(); print "\n"; if (@sticky > 0) { print "Sticky: " . join("; ", @sticky) . "\n"; } my $line = $term->readline("ALMS" . (defined $id ? " $id" : "") . "> "); return unless defined($line) and $line ne "quit"; command($line); } } main(); #}}}