Mercurial > if-archive-json-mirror
changeset 2:27b872aee1a7
Added mirror.py.
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Tue, 27 May 2008 23:45:16 -0700 |
parents | 85c3f7bf09d6 |
children | f6395490cd7d |
files | generate_index.py mirror.py xhr_proxy.py |
diffstat | 3 files changed, 25 insertions(+), 47 deletions(-) [+] |
line wrap: on
line diff
--- a/generate_index.py Tue May 27 23:30:34 2008 -0700 +++ b/generate_index.py Tue May 27 23:45:16 2008 -0700 @@ -2,6 +2,7 @@ import urllib2 import re import os +import cPickle import json @@ -13,6 +14,7 @@ INDEX_URL = "http://www.ifarchive.org/indexes/Master-Index.xml" XML_FILENAME = "Master-Index.xml" JSON_FILENAME = "if-archive.js" +PICKLE_FILENAME = "if-archive.pickle" if __name__ == "__main__": if not os.path.exists(XML_FILENAME): @@ -53,3 +55,6 @@ dicts.sort(key = getdesc) open(JSON_FILENAME, "w").write("stories = " + json.write(dicts)) + + print "Writing %s" % PICKLE_FILENAME + cPickle.dump(dicts, open(PICKLE_FILENAME, "w"))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirror.py Tue May 27 23:45:16 2008 -0700 @@ -0,0 +1,20 @@ +import os +import cPickle +import urllib2 +import distutils.dir_util + +import generate_index + +BASE_URL = "http://ifarchive.flavorplex.com" + +if __name__ == "__main__": + dicts = cPickle.load(open(generate_index.PICKLE_FILENAME, "r")) + for info in dicts: + path = info['path'] + if not os.path.exists(path): + url = "%s/%s" % (BASE_URL, path) + print "Fetching %s..." % url + fileobj = urllib2.urlopen(url) + contents = fileobj.read() + distutils.dir_util.mkpath(os.path.dirname(path)) + open(path, "wb").write(contents)
--- a/xhr_proxy.py Tue May 27 23:30:34 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -#! /usr/bin/env python - -import re -import os -import cgi -import cgitb -import urllib2 -import distutils.dir_util - -cgitb.enable() - -form = cgi.FieldStorage() -if form.has_key("file"): - path = form["file"].value -else: - path = "" - -__main__ = __import__("__main__") - -mydir = os.path.dirname(__main__.__file__) - -ROOT_DIR = os.path.abspath(os.path.join(mydir, "..", "if-archive")) -ZCODE_REGEXP = r".*\.z([1-8]|blorb)$" - -localpath = os.path.normpath(os.path.join(ROOT_DIR, path)) - -result = None - -try: - if not re.match(ZCODE_REGEXP, path): - result = "ERROR: File does not appear to be a zcode file." - if not localpath.startswith(ROOT_DIR): - result = "ERROR: Security violation: can't retrieve file below root dir." - elif os.path.exists(localpath): - result = "SUCCESS: Path exists." - else: - fileobj = urllib2.urlopen("http://www.ifarchive.org/if-archive/%s" % path) - contents = fileobj.read() - distutils.dir_util.mkpath(os.path.dirname(localpath)) - open(localpath, "wb").write(contents) - result = "SUCCESS: File retrieved." -except Exception, e: - result = "ERROR: Unexpected exception: %s" % e - -print "Content-Type: text/plain" -print -print result