annotate generate_index.py @ 2:27b872aee1a7

Added mirror.py.
author Atul Varma <varmaa@toolness.com>
date Tue, 27 May 2008 23:45:16 -0700
parents 30b149f2cdf1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
1 import elementtree.ElementTree as ET
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
2 import urllib2
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
3 import re
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
4 import os
2
27b872aee1a7 Added mirror.py.
Atul Varma <varmaa@toolness.com>
parents: 0
diff changeset
5 import cPickle
0
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
6
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
7 import json
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
8
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
9 ZCODE_REGEXP = r".*\.z([1-8]|blorb)$"
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
10 EXCLUDED_DIRS = ["if-archive/infocom",
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
11 "if-archive/solutions",
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
12 "if-archive/starters"]
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
13 EXCLUDED_DIRS_REGEXP = "|".join(EXCLUDED_DIRS)
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
14 INDEX_URL = "http://www.ifarchive.org/indexes/Master-Index.xml"
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
15 XML_FILENAME = "Master-Index.xml"
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
16 JSON_FILENAME = "if-archive.js"
2
27b872aee1a7 Added mirror.py.
Atul Varma <varmaa@toolness.com>
parents: 0
diff changeset
17 PICKLE_FILENAME = "if-archive.pickle"
0
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
18
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
19 if __name__ == "__main__":
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
20 if not os.path.exists(XML_FILENAME):
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
21 print "Fetching %s." % INDEX_URL
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
22
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
23 data = urllib2.urlopen(INDEX_URL).read()
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
24 open(XML_FILENAME, "w").write(data)
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
25
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
26 print "Scanning files."
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
27
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
28 page = ET.ElementTree(file=XML_FILENAME)
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
29 files = page.findall("file")
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
30 zfiles = [
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
31 filenode for filenode in files
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
32 if re.match(ZCODE_REGEXP, filenode.find("name").text) and
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
33 filenode.find("description") is not None and
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
34 not re.match(EXCLUDED_DIRS_REGEXP, filenode.find("path").text)
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
35 ]
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
36
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
37 dicts = []
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
38
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
39 print "Writing %s" % JSON_FILENAME
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
40
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
41 for filenode in zfiles:
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
42 desc = filenode.find("description").text.strip()
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
43 if desc[-1] == ")":
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
44 desc = desc[:desc.rindex("(")]
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
45 elif desc[-1] == "]":
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
46 desc = desc[:desc.rindex("[")]
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
47 dicts.append(
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
48 {"path" : filenode.find("path").text,
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
49 "desc" : desc.encode("utf-8")}
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
50 )
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
51
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
52 def getdesc(a):
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
53 return a["desc"]
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
54
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
55 dicts.sort(key = getdesc)
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
56
30b149f2cdf1 Origination, copied over from http://code.google.com/p/parchment/ revision 32.
Atul Varma <varmaa@toolness.com>
parents:
diff changeset
57 open(JSON_FILENAME, "w").write("stories = " + json.write(dicts))
2
27b872aee1a7 Added mirror.py.
Atul Varma <varmaa@toolness.com>
parents: 0
diff changeset
58
27b872aee1a7 Added mirror.py.
Atul Varma <varmaa@toolness.com>
parents: 0
diff changeset
59 print "Writing %s" % PICKLE_FILENAME
27b872aee1a7 Added mirror.py.
Atul Varma <varmaa@toolness.com>
parents: 0
diff changeset
60 cPickle.dump(dicts, open(PICKLE_FILENAME, "w"))