#!/usr/bin/env python """ CGI interface to tidy and xsltproc Share and Enjoy. """ import cgi, md5, os, sys, time, urlparse, urllib from stat import * from urlparse import urlparse, urlunparse # Configuation variables XSLT='/usr/bin/xsltproc' TIDY='/usr/local/bin/tidy' CURL='/usr/bin/curl' cache_dir='/www/decafbad.com/data/tidyxslt' cache_time=3*60*60 # 3 hours def cacheFunc(func, **kwargs): # Come up with a cache file name based on the hash of argument values keys = kwargs.keys() keys.sort() m = md5.new() for k in keys: # Skip the cache argument if k != 'cache': m.update(str(kwargs[k])) fn = os.path.join(cache_dir, m.hexdigest()) if kwargs['cache'] == '0' or \ not os.path.isfile(fn) or \ (time.time() - os.stat(fn)[ST_MTIME]) > cache_time: # Caching is disabled, there's no cached output, or the cache # has expired, so call the function. out = func(**kwargs) fout = open(fn, 'w') fout.write(out) fout.close() else: # The cache file exists and has not expired, so serve it up. fin = open(fn, 'r') out = fin.read() fin.close() return out def tidyxslt(**kwargs): docAddr = urlunparse(urlparse(kwargs['docAddr'])) xslAddr = urlunparse(urlparse(kwargs['xslAddr'])) out = "Content-Type: text/xml\n\n" cmd = '%s --compressed -s %s | ' % (CURL, docAddr) + \ '%s --indent yes --doctype strict --output-encoding latin1 ' % (TIDY)+ \ ' --force-output yes -asxml -q -f /dev/null | '+ \ '%s %s -' % (XSLT, xslAddr) p = os.popen(cmd, 'r') out = out + p.read() p.close() return out def serveRequest(): fields = cgi.FieldStorage() if not fields.has_key('doc'): # Serve up a simple form interface print "Content-Type: text/html" print print """ xsltproc service

Tidy and XSLT

Address of XSL:

Address of document to tidy:

""" else: print cacheFunc(tidyxslt, docAddr=fields.getvalue('doc', ''), xslAddr=fields.getvalue('xsl', ''), cache=fields.getvalue('cache', '1')) if __name__ == '__main__': if os.environ.has_key('SCRIPT_NAME'): serveRequest()