# GARstow general utilities
# Copyright 2003, 2004, 2005, 2006, 2007, 2009, 2010, 2012 Adam Sampson <ats@offog.org>

import sys, re, os, stat, urllib, HTMLParser, urlparse

def status(*args):
	print >>sys.stderr, " ==> " + "".join(map(str, args))
	sys.stderr.flush()

def warn(*args):
	print >>sys.stderr, "".join(map(str, args))
	sys.stderr.flush()

def die(*args):
	warn(*args)
	sys.exit(1)

class memoised:
	"""Decorator to memoise a function."""
	def __init__(self, function):
		self.memo = {}
		self.function = function
	def __call__(self, *args, **kwargs):
		key = (args, tuple(kwargs.items()))
		if key in self.memo:
			return self.memo[key]
		r = apply(self.function, args, kwargs)
		self.memo[key] = r
		return r

def union(a, b):
	r = {}
	for x in a:
		r[x] = 1
	for x in b:
		r[x] = 1
	return r.keys()

def norm_readlink(link):
	assert link[0] == "/"
	# Resolve all symlinks in the path.
	outpath = "/"
	for part in link[1:].split("/"):
		outpath += part
		if stat.S_ISLNK(os.lstat(outpath).st_mode):
			outpath = os.path.normpath(os.path.join(os.path.dirname(outpath), os.readlink(outpath)))
		outpath += "/"
	return outpath[:-1]

def parse_version(s):
	"""Parse a version number into a list that can be used as a sort key."""
	# This has to fudge around a bit so that -rcs sort correctly.
	s = s.replace("-rc", ".-")
	s = s.replace("rc", ".-")
	s = s.replace("-pre", ".-")
	s = s.replace("pre", ".-")
	s = s.replace("beta", ".-")
	l = s.split(".")
	for i in range(len(l)):
		try:
			l[i] = int(l[i])
			if l[i] < 0:
				l[i] = -100 - l[i]
		except ValueError:
			pass
	l.append(0)
	return l

def mkdir_p(dir):
	"""Ensure that a directory exists.
	(This may silently fail to make the directory, but since you normally
	try to do something with it immediately afterwards that's unlikely to
	be a problem.)"""
	try:
		os.makedirs(dir)
	except OSError:
		pass

def read_url(url):
	"""Read all the data from a URL."""

	f = urllib.urlopen(url)
	data = f.read()
	f.close()
	return data

def find_html_links(url, data = None):
	"""Return a list of the link targets in an HTML page."""

	if data is None:
		data = read_url(url)

	class LinkFinder(HTMLParser.HTMLParser):
		def __init__(self, base):
			HTMLParser.HTMLParser.__init__(self)
			self.links = []
			self.base = base

		def handle_starttag(self, tag, attrs):
			if tag == "a":
				for name, value in attrs:
					if name == "href":
						self.links.append(urlparse.urljoin(self.base, value))

	lf = LinkFinder(url)
	lf.feed(data)
	return lf.links

def list_http_dir(url):
	"""Return a list of files and subdirectories from an HTTP directory listing.
	This is a bit of a hack, but it'll probably work in most cases."""

	# Make sure we have a trailing /, and remove duplicated ones (except ://).
	url = re.sub(r'([^:])/+', r'\1/', url + "/")

	# Run links, and extract URLs from its output (which will include a
	# complete list of links).
	entries = []
	for u in find_html_links(url):
		if u.startswith(url):
			entry = u[len(url):]
			if entry.endswith("/"):
				entry = entry[:-1]
			if not entry.startswith("?"):
				entries.append(entry)

	return entries

