#!/usr/bin/env python import os, re cdindex_dir = "/n/stuff/cdindex" webpages_dir = "/tmp/catalog" class Disc: def __init__(self, id): self.id = id f = open(cdindex_dir + "/titles/" + id) self.title = f.read().strip() f.close() f = open(cdindex_dir + "/contents/" + id) self.files = {} for l in f.readlines(): (size, name) = l.strip().split(":", 1) self.files[name] = int(size) f.close() def load_discs(): return map(Disc, os.listdir(cdindex_dir + "/titles/")) the_re = re.compile(r"the\s*", re.I) numbers_re = re.compile(r"(\d+)") expand_memo = {} def fair_numeric_sort(a, b): def expand_number(match): return "%20d" % (int(match.group(1)),) def expand_numbers(s): if expand_memo.has_key(s): return expand_memo[s] r = re.sub(the_re, "", s) r = re.sub(numbers_re, expand_number, r) expand_memo[s] = r return r return cmp(expand_numbers(a.lower()), expand_numbers(b.lower())) def html_header(f, title): print >>f, """ cdcatalog: """ + title + """

""" + title + """

""" def html_footer(f): print >>f, """""" def nice_size(size): """Return a nicely-formatted file size.""" sizes = [ "B", "KiB", "MiB", "GiB", "TiB" ] s = "empty" for n in range(len(sizes)): r = 1024 ** n if size > r: s = str(size / r) + sizes[n] return s def escape(s): return s.replace("&", "&").replace("<", "<").replace(">", ">") if __name__ == "__main__": ds = load_discs() discs = {} files = {} for d in ds: discs[d.id] = d for f in d.files.keys(): files[f] = d f = open(webpages_dir + "/disc-" + d.id + ".html", "w") html_header(f, escape("Disc " + d.id + ": " + d.title)) print >>f, '' fs = d.files.keys() fs.sort(fair_numeric_sort) for file in fs: print >>f, '' print >>f, '
FileSize
' + escape(file) + '' + nice_size(d.files[file]) + '
' html_footer(f) f.close() indexes = {} def make_index_page(filename, title, items): indexes[title] = filename f = open(webpages_dir + "/index-" + filename + ".html", "w") html_header(f, escape(title)) print >>f, '' for (title, id) in items: print >>f, '' print >>f, '
TitleDisc
' + escape(title) + '' + id + '
' html_footer(f) f.close() ids = discs.keys() ids.sort(fair_numeric_sort) make_index_page("byid", "By ID", [(discs[id].title, id) for id in ids]) def titles_sort(a, b): return fair_numeric_sort(discs[a].title, discs[b].title) ids.sort(titles_sort) make_index_page("bytitle", "By raw title", [(discs[id].title, id) for id in ids]) split_titles = [] files = [] for d in ds: st = d.title.split("; ") for s in st: split_titles.append((s, d.id)) for f in d.files.keys(): files.append((f, d.id)) def first_sort(a, b): return fair_numeric_sort(a[0], b[0]) split_titles.sort(first_sort) make_index_page("bysplit", "By split title", split_titles) files.sort(first_sort) make_index_page("byfile", "By files", files) f = open(webpages_dir + "/index.html", "w") html_header(f, "Main index") titles = indexes.keys() titles.sort(fair_numeric_sort) for title in titles: print >>f, '

' + title + '

' html_footer(f) f.close()