# Utility module for using Amazon's XML services. # Adam Sampson import os, urllib, xml.dom.minidom, time, hmac, hashlib, lxml.html, re from offog import die def get_token(): """Return (token, secret_key).""" f = open(os.environ["HOME"] + "/.amazon_token") fields = f.read().strip().split() f.close() return (fields[0], fields[1]) def call(args): """Call an Amazon API with the given args, returning an xml.dom.minidom-parsed version of the response.""" (token, secret_key) = get_token() all_args = {} all_args.update(args) all_args["Service"] = "AWSECommerceService" all_args["SubscriptionId"] = token all_args["Timestamp"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) def aquote(s): return urllib.quote(s, "~") uri_args = [(aquote(k) + "=" + aquote(v)).encode("UTF-8") for k, v in all_args.items()] uri_args.sort() uri_host = "webservices.amazon.co.uk" uri_path = "/onca/xml" uri_query = "&".join(uri_args) uri = "http://%s%s?%s" % (uri_host, uri_path, uri_query) sign_data = chr(10).join(["GET", uri_host, uri_path, uri_query]) signature = hmac.new(secret_key, sign_data, hashlib.sha256).digest() uri += "&Signature=" + aquote(signature.encode("base64").strip()) f = urllib.urlopen(uri) data = xml.dom.minidom.parse(f) f.close() return data def item_url(asin): """Return the URL for an item.""" return "http://www.amazon.co.uk/dp/" + asin def get_text(node): """Return the contents of the first text node encountered in a minidom subtree.""" for cn in node.childNodes: if cn.nodeType == node.TEXT_NODE: return cn.data return None def get_wishlist_items(wishlist_id): """Return a dictionary mapping item IDs to titles from the given wishlist.""" items = {} page_num = 1 last_page = False while not last_page: # Amazon used to have an API call for this (ListLookup), but it # disappeared some time in 2010... url = "http://www.amazon.co.uk/registry/wishlist/%s/?_encoding=UTF8&filter=all&sort=date-added&layout=compact&reveal=all&page=%d" % (wishlist_id, page_num) page = lxml.html.parse(url).getroot() page.make_links_absolute(url) sortbar = page.cssselect(".sortbarText") if len(sortbar) == 0: die("No sortbarText found") m = re.search(r'Page (\d+) of (\d+)', sortbar[0].text_content()) if m is None: die("No 'Page x of y' found") if int(m.group(1)) == int(m.group(2)): last_page = True found_any = False for anchor in page.cssselect(".compact-items a"): m = re.search(r'/dp/([^/]+)', anchor.get("href")) if m is not None: items[m.group(1)] = anchor.text_content() found_any = True if not found_any: die("No items found") page_num += 1 return items