changeset 2:27b872aee1a7

Added mirror.py.
author Atul Varma <varmaa@toolness.com>
date Tue, 27 May 2008 23:45:16 -0700
parents 85c3f7bf09d6
children f6395490cd7d
files generate_index.py mirror.py xhr_proxy.py
diffstat 3 files changed, 25 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/generate_index.py	Tue May 27 23:30:34 2008 -0700
+++ b/generate_index.py	Tue May 27 23:45:16 2008 -0700
@@ -2,6 +2,7 @@
 import urllib2
 import re
 import os
+import cPickle
 
 import json
 
@@ -13,6 +14,7 @@
 INDEX_URL = "http://www.ifarchive.org/indexes/Master-Index.xml"
 XML_FILENAME = "Master-Index.xml"
 JSON_FILENAME = "if-archive.js"
+PICKLE_FILENAME = "if-archive.pickle"
 
 if __name__ == "__main__":
     if not os.path.exists(XML_FILENAME):
@@ -53,3 +55,6 @@
     dicts.sort(key = getdesc)
 
     open(JSON_FILENAME, "w").write("stories = " + json.write(dicts))
+
+    print "Writing %s" % PICKLE_FILENAME
+    cPickle.dump(dicts, open(PICKLE_FILENAME, "w"))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirror.py	Tue May 27 23:45:16 2008 -0700
@@ -0,0 +1,20 @@
+import os
+import cPickle
+import urllib2
+import distutils.dir_util
+
+import generate_index
+
+BASE_URL = "http://ifarchive.flavorplex.com"
+
+if __name__ == "__main__":
+    dicts = cPickle.load(open(generate_index.PICKLE_FILENAME, "r"))
+    for info in dicts:
+        path = info['path']
+        if not os.path.exists(path):
+            url = "%s/%s" % (BASE_URL, path)
+            print "Fetching %s..." % url
+            fileobj = urllib2.urlopen(url)
+            contents = fileobj.read()
+            distutils.dir_util.mkpath(os.path.dirname(path))
+            open(path, "wb").write(contents)
--- a/xhr_proxy.py	Tue May 27 23:30:34 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-#! /usr/bin/env python
-
-import re
-import os
-import cgi
-import cgitb
-import urllib2
-import distutils.dir_util
-
-cgitb.enable()
-
-form = cgi.FieldStorage()
-if form.has_key("file"):
-    path = form["file"].value
-else:
-    path = ""
-
-__main__ = __import__("__main__")
-
-mydir = os.path.dirname(__main__.__file__)
-
-ROOT_DIR = os.path.abspath(os.path.join(mydir, "..", "if-archive"))
-ZCODE_REGEXP = r".*\.z([1-8]|blorb)$"
-
-localpath = os.path.normpath(os.path.join(ROOT_DIR, path))
-
-result = None
-
-try:
-    if not re.match(ZCODE_REGEXP, path):
-        result = "ERROR: File does not appear to be a zcode file."
-    if not localpath.startswith(ROOT_DIR):
-        result = "ERROR: Security violation: can't retrieve file below root dir."
-    elif os.path.exists(localpath):
-        result = "SUCCESS: Path exists."
-    else:
-        fileobj = urllib2.urlopen("http://www.ifarchive.org/if-archive/%s" % path)
-        contents = fileobj.read()
-        distutils.dir_util.mkpath(os.path.dirname(localpath))
-        open(localpath, "wb").write(contents)
-        result = "SUCCESS: File retrieved."
-except Exception, e:
-    result = "ERROR: Unexpected exception: %s" % e
-
-print "Content-Type: text/plain"
-print
-print result