#!/usr/bin/env python """ ajournal: generic static web pages from a collection of journal articles. Adam Sampson Requirements: Python 2.3+, mxDateTime, feedwriter/safefiles/entities, markdown.py (for "Format: markdown") This is a very simple blogging tool; it's quite similar to blosxom in static mode. Each article is in approximately-RFC2822 format: a collection of headers (which may be folded across multiple lines by inserting a line break before any whitespace), followed by a blank line, followed by content. Articles are sorted hierachically into directories. The top-level directory in any ajournal-managed tree should contain a ".ajournal" directory. Headers that may be specified in articles: Title The title of the article (optional defaults to filename). Date The date of the article (optional; if absent, date unknown). Virtual Directories other than the implicit one this article appears in (optional). Format The format the body of the article is in. If "html" or unspecified, no processing is done. If "markdown", the body is converted from Markdown format to HTML. The .dir file in each directory is used for the directory indexes; the body will be prepended to the generated indexes, and the following headers may also be specified: Title The directory title (optional; defaults to directory name). Description Description used in RSS feeds (optional). The .settings file in the top-level directory contains global settings. Paths are relative to the top-level directory. These headers may be specified: Template The HTML file to use as a template for generated pages; __TITLE__, __BODY__, __CRUMBS__ and __HEAD__ will be replaced with the appropriate content. Output The directory to write output into. Base-Path The site-absolute path to use in HTML to reach the output directory (for instance, "/~me/myblog"). Global-Prefix The string to prepend to the Base-Path for global references such as URLs in RSS feeds (for instance, "http://www.myhosting.com"). Reference A string "x=y"; [[[x]]] will then be replaced by x in content, and [[[#x arg1 arg2...]]] will be replaced by y with $1, $2 in y replaced by the arguments. """ # FIXME Reference-ID header in articles CHARSET = "ISO-8859-1" import sys, os, re, getopt, string, time, select, pickle import feedwriter, safefiles, entities import mx.DateTime as mxtime import markdown def warn(*s): print >>sys.stderr, " ".join(map(str, s)) def die(*s): warn(*s) sys.exit(1) def html_escape(s): return entities.encode_entities(s, entities.html4_entities) def parse_date(s): if s is None or s == "unknown": return -1 return int(mxtime.ISO.ParseDateTimeUTC(s).ticks()) def make_date(t): if t == -1: return "unknown" return mxtime.ISO.strUTC(mxtime.localtime(t)) def output_name(s): if s[-1] == "/": return s else: return s + ".html" normal_chars = string.letters + string.digits def normalise_string(s): s = s.lower() while s != "" and s[0] not in normal_chars: s = s[1:] return s def cmp_strings(a, b): return cmp(normalise_string(a), normalise_string(b)) def articles_sort_date(a, b): c = cmp(b.date, a.date) if c != 0: return c return articles_sort_title(a, b) def articles_sort_title(a, b): return cmp_strings(a.title, b.title) def wordcount(html): html = html.lower() html = re.sub(r"(<[^>]*>|&[^;]*;)", "", html) html = re.sub(r"[^a-z0-9\s]", "", html) html = html.strip() if html == "": return 0 return len(re.findall(r"\s+", html)) + 1 class Path: def __init__(self, s = "/"): self.absolute = 0 if s[0] == "/": self.absolute = 1 s = s[1:] els = s.split("/") self.path = els[:-1] self.filename = els[-1] def str(self): s = "" if self.absolute: s = "/" return s + "".join([p + "/" for p in self.path]) + self.filename def __str__(self): return "[Path " + self.str() + "]" def __cmp__(self, rhs): return cmp(self.str(), rhs.str()) def __hash__(self): return hash(self.str()) def clone(self): p = Path() p.absolute = self.absolute p.path = self.path p.filename = self.filename return p def dir(self): p = self.clone() p.filename = "" return p def dirname(self): return self.path[-1] def parent(self): p = self.clone() # This has the correct behaviour for "parent of /". p.path = self.path[:-1] p.filename = "" return p def suffix(self, s): p = self.clone() p.filename += s return p def join(self, rhs): if rhs.absolute: return rhs p = self.clone() p.path = self.path + rhs.path p.filename = rhs.filename return p rootpath = Path("/") class References: def __init__(self): self.references = {} self.unresolved = {} def add_reference(self, key, url): self.references[key] = url def get(self, key, path): if not self.references.has_key(key): self.unresolved[key] = path return None return self.references[key] def link_to(self, key, path): r = self.get(key, path) if r is None: return key return "" + key + "" def macro(self, s, path): vs = s[1:].split(None, 1) r = self.get(vs[0], path) if r is None: return s num_groups = 1 ms = re.findall(r"\$(\d+)", r) for m in ms: if int(m) > num_groups: num_groups = int(m) vs = s[1:].split(None, num_groups) if len(vs) < num_groups + 1: print >>sys.stderr, "Reference " + vs[0] + " has too few arguments (in " + s + ")" return s def expand(m): return vs[int(m.group(1))] return re.sub(r"\$(\d+)", expand, r) def show_unresolved(self): us = self.unresolved.keys() if us == []: return us.sort(cmp_strings) for u in us: print >>sys.stderr, "Reference: " + u + "=unresolved (in " + self.unresolved[u].str() + ")" class Article: _header_re = re.compile(r"(\S+):\s*(.*)") _continuation_re = re.compile(r"(\s+.*)") def __init__(self, coll, path): self.coll = coll self.path = path self.headers = [] self.mtime = -1 self.body = "" self.title = None self.date = -1 self.cats = [path.dir()] def load(self, fn): f = open(fn) self.headers = [] while 1: s = f.readline().decode(CHARSET) if s.endswith("\r\n"): s = s[:-2] + "\n" if s == "\n" or s == "": break m = self._header_re.match(s) if m is not None: (header, content) = m.group(1, 2) self.headers.append((header, content)) continue m = self._continuation_re.match(s) if m is not None: if self.headers == []: die("continuation before real header in", fn) content = m.group(1) oldvalue = self.headers[-1][1][:-1] self.headers[-1][1] = oldvalue + content continue die("unrecognised line in headers in", fn) self.body = f.read().decode(CHARSET) self.mtime = os.fstat(f.fileno()).st_mtime f.close() self.title = self.header("Title") if self.title is None: self.title = self.path.filename try: self.date = parse_date(self.header("Date")) except: die("error parsing Date header in", fn) self.cats = [self.path.dir()] h = self.header("Virtual") if h is not None: for vp in h.split(" "): if vp[-1] != "/": vp += "/" self.cats.append(self.path.join(Path(vp))) def header(self, header): for (h, v) in self.headers: if h == header: return v return None def all_headers(self, header): return [v for (h, v) in self.headers if h == header] def byline(self, frompath, refs): if self.date != -1: s = time.strftime("%Y-%m-%d %H:%M; ", time.gmtime(self.date)) else: s = "" catlinks = [self.coll.categories[c].link_to(frompath) for c in self.cats] s += "in " + ", ".join(catlinks) + "; " wc = wordcount(self.body_html(refs, False)) if wc == 0: s += "no content" else: s += str(wc) + " words" return s _reference_re = re.compile(r"\[\[\[([^\]]*)\]\]\]") _space_re = re.compile(r"\s+") def body_html(self, refs, fill_empty = True): body = self.body def link_reference(m): n = re.sub(self._space_re, " ", m.group(1)) # FIXME ugly if n[0] == "#": return refs.macro(n, self.path) return refs.link_to(n, self.path) body = re.sub(self._reference_re, link_reference, body) fmt = self.header("Format") if fmt is None or fmt == "html": pass elif fmt == "markdown": body = markdown.markdown(body, html4 = True) else: die("unknown Format: " + fmt) if body.strip() == "" and fill_empty: return "

(No content.)

" return body def get_title(self): t = self.header("Title") if t is not None: return t return self.path.dirname() def link_to(self, frompath): return self.coll.make_link(frompath, self.path, self.get_title()) class Directory(Article): def __init__(self, coll, path): Article.__init__(self, coll, path) self.articles = [] self.settings = None def add_article(self, article): # FIXME Inefficient (but probably insignificant) if article not in self.articles: self.articles.append(article) class Collection: def __init__(self, dir): self.articles = {} self.categories = {} self.settings = None self.references = References() self.add_recursively(rootpath, dir) if self.settings is None: die("no .settings found") for v in self.settings.all_headers("Reference"): (key, url) = v.split("=", 1) self.references.add_reference(key, url) self.template = safefiles.read_file(self.setting("Template")) def add_recursively(self, cat, filepath): self.add_category(cat) for fn in os.listdir(filepath): real = filepath + "/" + fn if fn == ".dir": self.categories[cat].load(real) elif fn == ".settings" and cat == rootpath: self.settings = Article(self, cat) self.settings.load(real) elif fn[0] == "." or fn == "CVS" or fn[-1] == "~" or fn[-4:] == ".bak": continue elif os.path.isdir(real): dpath = cat.join(Path(fn + "/")) self.add_recursively(dpath, real) else: fpath = cat.join(Path(fn)) a = Article(self, fpath) a.load(real) self.articles[fpath] = a for acat in a.cats: self.add_category(acat, a) def add_category(self, cat, article = None): while 1: if not self.categories.has_key(cat): self.categories[cat] = Directory(self, cat) if article is not None: self.categories[cat].add_article(article) if cat == rootpath: break cat = cat.parent() def setting(self, key): return self.settings.header(key) def cat_setting(self, cat, key): return self.categories[cat].header(key) styles = { "": "Latest articles", "-name": "All articles (by name)", "-date": "All articles (by date)" } def make_index(self, cat, style): cato = self.categories[cat] as = cato.articles if style == "": as.sort(articles_sort_date) as = chop_to_latest(as) elif style == "-name": as.sort(articles_sort_title) elif style == "-date": as.sort(articles_sort_date) bits = [cato.body_html(self.references, False)] bits.append("
\n") if style != "": bits.append("\n") else: for a in as: bits.append("
\n") bits.append("

" + self.make_link(cat, a.path, a.title) + "

\n") bits.append("

" + a.byline(cat, self.references) + "

\n") # FIXME Show abstract for long articles? bits.append(a.body_html(self.references)) bits.append("
\n") bits.append("
\n\n") bits.append("\n\n") bits.append("
\n") bits.append("

Categories

\n") bits.append("\n
\n\n") title = cato.get_title() head = """\n""" return generate_page(self.template, title, "".join(bits), self.make_crumbs(cat), head) def update(self): basepath = self.setting("Base-Path") baseurl = self.setting("Global-Prefix") + basepath outputdir = self.setting("Output") cats = self.categories.keys() cats.sort() for cat in cats: catpath = cat.str() realpath = outputdir + catpath try: os.makedirs(realpath) except OSError: # Directory already exists. pass title = self.categories[cat].get_title() description = self.cat_setting(cat, "Description") c = feedwriter.Channel(title = title, link = baseurl + catpath, description = description) as = self.categories[cat].articles as.sort(articles_sort_date) for a in chop_to_latest(as): apath = baseurl + output_name(a.path.str()) date = a.date if date == -1: date = None c.add_item(title = a.title, link = apath, description = a.body_html(self.references), pubDate = date) update_file(realpath + "feed.rss", c.rss2()) for style in self.styles.keys(): body = self.make_index(cat, style) update_file(realpath + "index" + style + ".html", body.encode(CHARSET)) for a in self.articles.values(): cat = a.cats[0] content = '

' + a.byline(a.path, self.references) + '

\n' content += a.body_html(self.references) body = generate_page(self.template, a.title, content, self.make_crumbs(cat.dir())) fn = outputdir + output_name(a.path.str()) update_file(fn, body.encode(CHARSET)) self.references.show_unresolved() def make_link(self, source, dest, title): """Generate an HTML link from path source to path dest.""" if source == dest: # The current location -- so no need for it to be a # link. return html_escape(title) if source.path == dest.path and dest.filename != "": link = output_name(dest.filename) else: # FIXME when dest is a subdir of source, this # can generate a shorter relative path link = self.setting("Base-Path") + output_name(dest.str()) return "" + html_escape(title) + "" def make_crumbs(self, cat): """Generate a breadcrumb trail to a category.""" sep = " » " topcat = cat crumbs = [] while 1: crumbs.append(self.make_link(topcat, cat, self.categories[cat].get_title())) if cat == rootpath: break cat = cat.parent() crumbs.reverse() return "".join([sep + c for c in crumbs]) def get_cat_tree(self): """Return a tree (represented as a dict) of the categories.""" tree = {} for cat in self.categories.keys(): if cat == rootpath: continue p = cat.parent() if tree.has_key(p): tree[p].append(cat) else: tree[p] = [cat] return tree def generate_page(template, title, body, crumbs = "", head = ""): """Generate an HTML page from a template.""" fields = { "__TITLE__": html_escape(title), "__BODY__": body, "__CRUMBS__": crumbs, "__HEAD__": head } for f in fields.keys(): template = template.replace(f, fields[f]) return template def chop_to_latest(as): """Given a list of articles in newest-first order, reduce it so that it only includes the last week's worth, or 10, whichever is greater.""" if as == []: return [] start = as[0].date while len(as) > 10 and (start - as[-1].date) > (7*24*60*60): as = as[:-1] return as def update_file(name, contents): """Write data into a file only if it would change the contents. This isn't race-safe (which is why it's not in safefiles).""" try: if safefiles.read_file(name) == contents: return except IOError: # File doesn't yet exist. pass print "Updating:", name safefiles.write_file(name, contents) def new_article(name): try: f = open(name) except IOError: f = None if f is not None: die("article", name, "already exists") f = open(name, "w") print >>f, "Title: " print >>f, "Date: " + time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()) print >>f, "Format: markdown" print >>f, "" f.close() visual = os.getenv("VISUAL") os.execvp(visual, [visual, name]) die("can't exec", visual) def usage(): print "ajournal by Adam Sampson " print "-d|--dir DIR Operate in DIR rather than current directory" print "-h, --help Show usage" print "-n|--new NAME Create new article called NAME" def main(args): try: opts, args = getopt.getopt(args, "d:hn:", ["dir", "help", "new"]) except getopt.GetoptError: usage() sys.exit(1) startdir = "." for (o, a) in opts: if o in ("-h", "--help"): usage() sys.exit(0) elif o in ("-d", "--dir"): startdir = a elif o in ("-n", "--new"): new_article(a) os.chdir(startdir) maxdepth = 40 while 1: maxdepth -= 1 if maxdepth == 0: die("can't find top directory") if os.path.isdir(".ajournal"): break os.chdir("..") col = Collection(".") col.update() if __name__ == "__main__": main(sys.argv[1:])