import os, time, cgi import rawdoglib.plugins, rawdoglib.rawdog import libxml2 schema_xml = """ """ class XML_Archiver_Exception(Exception): pass class XML_Archiver: def __init__(self, rawdog, config): if config['defines'].has_key('outputxml'): self.out_file = config['defines']['outputxml'] else: self.out_file = 'output.xml.gz' self.doc_open() self.xml_feeds = self.xml.xpathEval('/rawdog/feeds')[0] self.xml_articles = self.xml.xpathEval('/rawdog/articles')[0] self.xml.setProp('last', str(time.time())) self.sync_bits(self.xml, config['defines'].items()) def doc_open(self): if os.path.isfile(self.out_file): schema = libxml2.parseMemory(schema_xml, len(schema_xml)) rngp = schema.relaxNGNewDocParserCtxt().relaxNGParse() ctxt = rngp.relaxNGNewValidCtxt() self.doc = libxml2.parseFile(self.out_file) if self.doc.relaxNGValidateDoc(ctxt) is 0: self.xml = self.doc.children else: raise XML_Archiver_Exception("Can't parse old XML: " + self.out_file) else: self.doc = libxml2.newDoc("1.0") self.xml = self.doc.newChild(None, 'rawdog', None) self.xml.newChild(None, 'feeds', None) self.xml.newChild(None, 'articles', None) def sync_bits(self, parent, items): for name, value in items: bit = parent.xpathEval('bit[@name="' + name + '"]') if len(bit) is 0: bit = parent.newChild(None, 'bit', None) bit.setProp('name', name) else: bit = bit[0] bit.setContent(value) def describe(self, parent, description): xml_d = parent.xpathEval('describe') if len(xml_d) is 0: xml_d = parent.newChild(None, 'describe', description) else: xml_d[0].setContent(description) def feed_sync(self, rawdog, config, feed, feed_data, error, non_fatal): feed_info = feed_data["feed"] xml_feed = self.xml_feeds.xpathEval('feed[@id="' + feed.get_id(config) + '"]') if len(xml_feed) is 0: xml_feed = self.xml_feeds.newChild(None, 'feed', None) else: xml_feed = xml_feed[0] if feed_info.has_key('description'): self.describe(xml_feed, feed_info['description']) else: self.describe(xml_feed, '') xml_feed.setProp('title', feed_info['title_detail']['value']) xml_feed.setProp('link', feed.url) xml_feed.setProp('id', feed.get_id(config)) xml_feed.setProp('update_last', str(feed.last_update)) xml_feed.setProp('update_next', str(feed.last_update + feed.period)) xml_feed.setProp('period', str(feed.period)) self.sync_bits(xml_feed, feed.args.items()) return True def article_add(self, rawdog, config, article, now): xml_article = self.xml_articles.newChild(None, 'article', None) self.__article_sync(xml_article, rawdog, config, article) def article_sync(self, rawdog, config, article, now): xml_article = self.xml_articles.xpathEval('article[@id="' + article.hash + '"]') if len(xml_article) is 0: xml_article = self.xml_articles.newChild(None, 'article', None) else: xml_article = xml_article[0] self.__article_sync(xml_article, rawdog, config, article) def __article_sync(self, xml_article, rawdog, config, article): entry_info = article.entry_info xml_article.setProp('id', article.hash) xml_article.setProp('feed', rawdog.feeds[article.feed].get_id(config)) xml_article.setProp('title', entry_info['title_raw']) xml_article.setProp('date', str(article.date)) xml_article.setProp('last_seen', str(article.last_seen)) xml_article.setProp('added', str(article.added)) if entry_info.has_key('link'): xml_article.setProp('link', entry_info['link']) if entry_info.has_key('content'): for content in entry_info['content']: content = content['value'] elif entry_info.has_key('summary_detail'): content = entry_info['summary_detail']['value'] content = cgi.escape(content).encode('utf8', 'ignore') self.describe(xml_article, content) articles = rawdog.articles if articles.has_key('HACK_sekkrit_flags'): if articles['HACK_sekkrit_flags'].has_key(article.hash): self.sync_bits(xml_article, articles['HACK_sekkrit_flags'][article.hash].items()) return True def __write(self): self.doc.setDocCompressMode(9) self.doc.saveFormatFile(self.out_file, 1) self.doc.freeDoc() def write(self, rawdog, config): self.__write() return True xml_archiver = {} def startup(rawdog, config): xml_archiver = XML_Archiver(rawdog, config) rawdoglib.plugins.attach_hook("feed_fetched", xml_archiver.feed_sync) rawdoglib.plugins.attach_hook("article_added", xml_archiver.article_add) rawdoglib.plugins.attach_hook("article_updated", xml_archiver.article_sync) rawdoglib.plugins.attach_hook("shutdown", xml_archiver.write) return True rawdoglib.plugins.attach_hook("startup", startup)