Mercurial > daily-edition
changeset 9:9d6306907982
added url_cache.py
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Thu, 31 Dec 2009 15:43:25 -0800 |
parents | 7b6bec689805 |
children | b6259e713533 |
files | url_cache.py |
diffstat | 1 files changed, 23 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/url_cache.py Thu Dec 31 15:43:25 2009 -0800 @@ -0,0 +1,23 @@ +import urllib2 +import logging + +class UrlCache(object): + def __init__(self, storage): + self.urls = storage + + def refresh(self, url): + req = urllib2.Request(url) + if url in self.urls and 'lastmod' in self.urls[url]: + req.add_header('If-Modified-Since', self.urls[url]['lastmod']) + try: + logging.debug('fetching %s.' % url) + response = urllib2.urlopen(req) + info = {'data': response.read()} + if 'Last-Modified' in response.info(): + info['lastmod'] = response.info()['Last-Modified'] + self.urls[url] = info + except urllib2.HTTPError, e: + if e.code == 304: + logging.debug('url not modified.') + else: + raise