Mercurial > bzapi
view bzapi.py @ 37:c6b41464c021 default tip
factored async web server into separate file
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Thu, 24 Dec 2009 18:34:51 -0800 |
parents | 6b0a17f31342 |
children |
line wrap: on
line source
""" bzapi - Access to the Bugzilla REST API w/ MongoDB integration This module provides access to the Bugzilla REST API with fast caching of search queries, configuration information and so forth via MongoDB. For information on the Bugzilla REST API, see: https://wiki.mozilla.org/Bugzilla:REST_API For information on MongoDB, see: http://www.mongodb.org/ This module requires pymongo, which can be found here: http://pypi.python.org/pypi/pymongo/ """ import logging import urllib2 import urllib from datetime import datetime try: import json except ImportError: import simplejson as json import pymongo def split_seq(seq, size): """ Split up the given sequence into pieces of the given size. Taken from http://code.activestate.com/recipes/425044/. """ return [seq[i:i+size] for i in range(0, len(seq), size)] def open_url(url, headers, query_args=None, urllib2=urllib2): """ Open the given URL with the given request headers dictionary and optional querystring arguments dictionary. Returns a urllib2.Response object. """ if query_args: full_url = "%s?%s" % (url, urllib.urlencode(query_args)) else: full_url = url logging.debug('retrieving %s' % full_url) request = urllib2.Request(full_url) for name, value in headers.items(): request.add_header(name, value) return urllib2.urlopen(request) def normalize_bug(bug): """ Converts all ISO-formatted date strings in the Bugzilla JSON bug object [1] to Python datetime objects, and sets the MongoDB primary key of the bug to the bug id number. [1] https://wiki.mozilla.org/Bugzilla:REST_API:Objects#Bug """ for name in ['last_change_time', 'creation_time']: bug[name] = datetime_from_iso(bug[name]) bug['_id'] = bug['id'] def datetime_from_rfc1123(timestamp): """ Converts a rfc 1123-formatted date string to a Python datetime object. """ return datetime.strptime(timestamp, '%a, %d %b %Y %H:%M:%S GMT') def datetime_to_iso(dt): """ Converts a Python datetime object to an ISO 8601 formatted string, also with a 'Z' at the end so that Bugzilla likes it. """ return "%sZ" % (dt.replace(microsecond=0).isoformat('T')) def datetime_from_iso(timestamp): """ Converts an ISO 8601 formatted string with a 'Z' at the end of it (as provided by Bugzilla) to a Python datetime object. """ return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ') def sanitize_for_mongodb(obj): """ Sanitizes all dictionaries in the given Python object for insertion into MongoDB, replacing any '.' in key names in them with '_DOT_'. This function works 'deeply', recursing through sub-objects, but it assumes that the given object doesn't contain reference cycles. """ if type(obj) == dict: bad_names = [name for name in obj if "." in name] for name in bad_names: new_name = name.replace('.', '_DOT_') obj[new_name] = obj[name] del obj[name] for name in obj: sanitize_for_mongodb(obj[name]) elif type(obj) == list: for item in obj: sanitize_for_mongodb(item) class CachedSearch(object): MAX_BUG_BATCH_SIZE = 10 def __init__(self, api, collection, **kwargs): self.observers = [] self.options = kwargs self.bugs = collection self.api = api self._update_last_update() def add_observer(self, observer): self.observers.append(observer) def _update_last_update(self): bugs = self.bugs.find().sort("retrieved_time", pymongo.ASCENDING).limit(1) if bugs.count() == 0: self.last_update = None else: self.last_update = bugs[0]['retrieved_time'] def _retrieve_full_bugs(self, bug_ids): params = {'id': ','.join(bug_ids), 'id_mode': 'include', 'comments': '1', 'history': '1'} response = self.api.get('/bug', **params) bugs = response['data']['bugs'] for bug in bugs: logging.debug('updating bug %s' % bug['id']) normalize_bug(bug) bug['retrieved_time'] = response['date'] bug['needs_full_update'] = False self.bugs.save(bug) for observer in self.observers: observer.notify({'bug': bug['id']}) def update(self): params = {} params.update(self.options) if self.last_update: params['changed_after'] = self.last_update response = self.api.get('/bug', **params) bugs = response['data']['bugs'] for bug in bugs: normalize_bug(bug) old_bug = self.bugs.find_one({'id': bug['id']}) if ((old_bug is None) or (bug['last_change_time'] > old_bug['last_change_time'])): if not old_bug: old_bug = bug old_bug['needs_full_update'] = True old_bug['retrieved_time'] = response['date'] self.bugs.save(old_bug) bugs_to_update = self.bugs.find({'needs_full_update': True}) bug_ids = [bug['id'] for bug in bugs_to_update] if bug_ids: for segment in split_seq(bug_ids, self.MAX_BUG_BATCH_SIZE): self._retrieve_full_bugs(segment) self._update_last_update() class BugzillaApi(object): def __init__(self, base_url, collection, username=None, password=None, open_url=open_url): self._open_url = open_url self.base_url = base_url self.username = username self.password = password config = collection.find_one() if not config: config = self.get('/configuration')['data'] sanitize_for_mongodb(config) collection.insert(config) self.config = config def _validate_component(self, product, component=None): products = self.config['product'] if product not in products: msg = 'product %s not in configuration' % repr(product) raise ValueError(msg) if component and component not in products[product]['component']: msg = 'component %s of product %s not in configuration' % ( repr(component), repr(product) ) raise ValueError(msg) def get(self, url, **kwargs): now = datetime.utcnow().replace(microsecond=0) for name, value in kwargs.items(): if isinstance(value, datetime): kwargs[name] = datetime_to_iso(value) params = {} if self.username and self.password: params.update({'username': self.username, 'password': self.password}) params.update(kwargs) if 'product' in params: self._validate_component(params['product'], params.get('component')) response = self._open_url( url=self.base_url + url, query_args=params, headers={'Accept': 'application/json', 'Content-Type': 'application/json'}, ) # TODO: instead of 'now', we'd like to use the 'Date' # HTTP header, but it's actually completely wrong in # the case of bugzilla.mozilla.org, so we'll assume # our timekeeping is better. #'date': datetime_from_rfc1123(response.info()['Date'])} return {'data': json.loads(response.read()), 'date': now}