Mercurial > bzapi
view bzapi.py @ 28:ce19838a318d
we now split large batches of bugs into small segments of 10 each
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Thu, 24 Dec 2009 11:54:11 -0800 |
parents | f717ecd3ede1 |
children | 9a7052db1045 |
line wrap: on
line source
import logging import urllib2 import urllib from datetime import datetime import pymongo import simplejson as json def split_seq(seq, size): """ Split up seq in pieces of size """ # Taken from http://code.activestate.com/recipes/425044/ return [seq[i:i+size] for i in range(0, len(seq), size)] def open_url(url, headers, query_args=None, urllib2=urllib2): if query_args: full_url = "%s?%s" % (url, urllib.urlencode(query_args)) else: full_url = url logging.debug('retrieving %s' % full_url) request = urllib2.Request(full_url) for name, value in headers.items(): request.add_header(name, value) return urllib2.urlopen(request) def normalize_bug(bug): for name in ['last_change_time', 'creation_time']: bug[name] = datetime_from_iso(bug[name]) bug['_id'] = bug['id'] def datetime_from_rfc1123(timestamp): return datetime.strptime(timestamp, '%a, %d %b %Y %H:%M:%S GMT') def datetime_to_iso(dt): return "%sZ" % (dt.replace(microsecond=0).isoformat('T')) def datetime_from_iso(timestamp): return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ') def sanitize(obj): if type(obj) == dict: bad_names = [name for name in obj if "." in name] for name in bad_names: new_name = name.replace('.', '_DOT_') obj[new_name] = obj[name] del obj[name] for name in obj: sanitize(obj[name]) elif type(obj) == list: for item in obj: sanitize(item) class CachedSearch(object): MAX_BUG_BATCH_SIZE = 10 def __init__(self, api, collection, **kwargs): self.observers = [] self.options = kwargs self.bugs = collection self.api = api self._update_last_update() def add_observer(self, observer): self.observers.append(observer) def _update_last_update(self): bugs = self.bugs.find().sort("retrieved_time", pymongo.ASCENDING).limit(1) if bugs.count() == 0: self.last_update = None else: self.last_update = bugs[0]['retrieved_time'] def _retrieve_full_bugs(self, bug_ids): params = {'id': ','.join(bug_ids), 'id_mode': 'include', 'comments': '1', 'history': '1'} response = self.api.get('/bug', **params) bugs = response['data']['bugs'] for bug in bugs: logging.debug('updating bug %s' % bug['id']) normalize_bug(bug) bug['retrieved_time'] = response['date'] bug['needs_full_update'] = False self.bugs.save(bug) for observer in self.observers: observer.notify({'bug': bug['id']}) def update(self): params = {} params.update(self.options) if self.last_update: params['changed_after'] = self.last_update response = self.api.get('/bug', **params) bugs = response['data']['bugs'] for bug in bugs: normalize_bug(bug) old_bug = self.bugs.find_one({'id': bug['id']}) if ((old_bug is None) or (bug['last_change_time'] > old_bug['last_change_time'])): if not old_bug: old_bug = bug old_bug['needs_full_update'] = True old_bug['retrieved_time'] = response['date'] self.bugs.save(old_bug) bugs_to_update = self.bugs.find({'needs_full_update': True}) bug_ids = [bug['id'] for bug in bugs_to_update] if bug_ids: for segment in split_seq(bug_ids, self.MAX_BUG_BATCH_SIZE): self._retrieve_full_bugs(segment) self._update_last_update() class BugzillaApi(object): def __init__(self, base_url, collection, username=None, password=None, open_url=open_url): self._open_url = open_url self.base_url = base_url self.username = username self.password = password config = collection.find_one() if not config: config = self.get('/configuration')['data'] sanitize(config) collection.insert(config) self.config = config def _validate_component(self, product, component=None): products = self.config['product'] if product not in products: msg = 'product %s not in configuration' % repr(product) raise ValueError(msg) if component and component not in products[product]['component']: msg = 'component %s of product %s not in configuration' % ( repr(component), repr(product) ) raise ValueError(msg) def get(self, url, **kwargs): now = datetime.utcnow().replace(microsecond=0) for name, value in kwargs.items(): if isinstance(value, datetime): kwargs[name] = datetime_to_iso(value) params = {} if self.username and self.password: params.update({'username': self.username, 'password': self.password}) params.update(kwargs) if 'product' in params: self._validate_component(params['product'], params.get('component')) response = self._open_url( url=self.base_url + url, query_args=params, headers={'Accept': 'application/json', 'Content-Type': 'application/json'}, ) # TODO: instead of 'now', we'd like to use the 'Date' # HTTP header, but it's actually completely wrong in # the case of bugzilla.mozilla.org, so we'll assume # our timekeeping is better. #'date': datetime_from_rfc1123(response.info()['Date'])} return {'data': json.loads(response.read()), 'date': now}