""" filter.py: utility functions for filtering data through a command, with optional persistent caching. Adam Sampson """ def filter_through(command, data): """Filter data through a command (an execvp-style list), and return the result. Die if the command fails.""" (childi, src) = os.pipe() (dest, childo) = os.pipe() pid = os.fork() if pid == 0: os.close(src) os.close(dest) os.dup2(childi, 0) os.dup2(childo, 1) os.execvp(command[0], command) os._exit(1) os.close(childi) os.close(childo) output = [] pos = 0 max = len(data) chunk = 4096 while 1: if src is None: ofs = [] else: ofs = [src] (ifs, ofs, efs) = select.select([dest], ofs, []) if dest in ifs: d = os.read(dest, chunk) if d == "": os.close(dest) break output.append(d) if src in ofs: n = os.write(src, data[pos:pos + chunk]) pos += n if pos == max: os.close(src) src = None if src is not None: die("output EOF before input EOF:", command) (pid, estat) = os.waitpid(pid, 0) if not os.WIFEXITED(estat) and os.WEXITSTATUS(estat) == 0: die("command failed:", command) return "".join(output) class FilterCache: """Provide caching for filter_through in a given file, expiring items that haven't been used.""" def __init__(self, file): self.file = file try: f = open(file) self.cache = pickle.load(f) f.close() except IOError: self.cache = {} for k in self.cache: self.cache[k][0] = False def close(self): for k, v in self.cache.items(): if not v[0]: del self.cache[k] tempname = self.file + ".new-%d" % (os.getpid(),) f = open(tempname, "w") pickle.dump(self.cache, f) f.close() os.rename(tempname, self.file) def filter(self, command, data): key = (command, data) if key in self.cache: self.cache[key][0] = True else: self.cache[key] = [True, filter_through(command, data)] return self.cache[key][1]