changeset 9:9d6306907982

added url_cache.py
author Atul Varma <varmaa@toolness.com>
date Thu, 31 Dec 2009 15:43:25 -0800
parents 7b6bec689805
children b6259e713533
files url_cache.py
diffstat 1 files changed, 23 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/url_cache.py	Thu Dec 31 15:43:25 2009 -0800
@@ -0,0 +1,23 @@
+import urllib2
+import logging
+
+class UrlCache(object):
+    def __init__(self, storage):
+        self.urls = storage
+
+    def refresh(self, url):
+        req = urllib2.Request(url)
+        if url in self.urls and 'lastmod' in self.urls[url]:
+            req.add_header('If-Modified-Since', self.urls[url]['lastmod'])
+        try:
+            logging.debug('fetching %s.' % url)
+            response = urllib2.urlopen(req)
+            info = {'data': response.read()}
+            if 'Last-Modified' in response.info():
+                info['lastmod'] = response.info()['Last-Modified']
+            self.urls[url] = info
+        except urllib2.HTTPError, e:
+            if e.code == 304:
+                logging.debug('url not modified.')
+            else:
+                raise