view bzapi.py @ 37:c6b41464c021 default tip

factored async web server into separate file
author Atul Varma <varmaa@toolness.com>
date Thu, 24 Dec 2009 18:34:51 -0800
parents 6b0a17f31342
children
line wrap: on
line source

"""
    bzapi - Access to the Bugzilla REST API w/ MongoDB integration

    This module provides access to the Bugzilla REST API with fast
    caching of search queries, configuration information and so forth
    via MongoDB.

    For information on the Bugzilla REST API, see:

      https://wiki.mozilla.org/Bugzilla:REST_API

    For information on MongoDB, see:

      http://www.mongodb.org/

    This module requires pymongo, which can be found here:

      http://pypi.python.org/pypi/pymongo/
"""

import logging
import urllib2
import urllib
from datetime import datetime

try:
    import json
except ImportError:
    import simplejson as json

import pymongo

def split_seq(seq, size):
    """
    Split up the given sequence into pieces of the given size.

    Taken from http://code.activestate.com/recipes/425044/.
    """

    return [seq[i:i+size] for i  in range(0, len(seq), size)]

def open_url(url, headers, query_args=None, urllib2=urllib2):
    """
    Open the given URL with the given request headers dictionary
    and optional querystring arguments dictionary.

    Returns a urllib2.Response object.
    """

    if query_args:
        full_url = "%s?%s" % (url, urllib.urlencode(query_args))
    else:
        full_url = url

    logging.debug('retrieving %s' % full_url)
    request = urllib2.Request(full_url)

    for name, value in headers.items():
        request.add_header(name, value)

    return urllib2.urlopen(request)

def normalize_bug(bug):
    """
    Converts all ISO-formatted date strings in the Bugzilla JSON
    bug object [1] to Python datetime objects, and sets the MongoDB
    primary key of the bug to the bug id number.

    [1] https://wiki.mozilla.org/Bugzilla:REST_API:Objects#Bug
    """

    for name in ['last_change_time', 'creation_time']:
        bug[name] = datetime_from_iso(bug[name])
    bug['_id'] = bug['id']

def datetime_from_rfc1123(timestamp):
    """
    Converts a rfc 1123-formatted date string to a Python datetime
    object.
    """

    return datetime.strptime(timestamp, '%a, %d %b %Y %H:%M:%S GMT')

def datetime_to_iso(dt):
    """
    Converts a Python datetime object to an ISO 8601 formatted
    string, also with a 'Z' at the end so that Bugzilla likes it.
    """

    return "%sZ" % (dt.replace(microsecond=0).isoformat('T'))

def datetime_from_iso(timestamp):
    """
    Converts an ISO 8601 formatted string with a 'Z' at the end
    of it (as provided by Bugzilla) to a Python datetime object.
    """

    return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')

def sanitize_for_mongodb(obj):
    """
    Sanitizes all dictionaries in the given Python object for
    insertion into MongoDB, replacing any '.' in key names
    in them with '_DOT_'.

    This function works 'deeply', recursing through sub-objects,
    but it assumes that the given object doesn't contain
    reference cycles.
    """

    if type(obj) == dict:
        bad_names = [name for name in obj
                     if "." in name]
        for name in bad_names:
            new_name = name.replace('.', '_DOT_')
            obj[new_name] = obj[name]
            del obj[name]
        for name in obj:
            sanitize_for_mongodb(obj[name])
    elif type(obj) == list:
        for item in obj:
            sanitize_for_mongodb(item)

class CachedSearch(object):
    MAX_BUG_BATCH_SIZE = 10

    def __init__(self, api, collection, **kwargs):
        self.observers = []
        self.options = kwargs

        self.bugs = collection
        self.api = api
        self._update_last_update()

    def add_observer(self, observer):
        self.observers.append(observer)

    def _update_last_update(self):
        bugs = self.bugs.find().sort("retrieved_time",
                                     pymongo.ASCENDING).limit(1)
        if bugs.count() == 0:
            self.last_update = None
        else:
            self.last_update = bugs[0]['retrieved_time']

    def _retrieve_full_bugs(self, bug_ids):
        params = {'id': ','.join(bug_ids),
                  'id_mode': 'include',
                  'comments': '1',
                  'history': '1'}
        response = self.api.get('/bug', **params)
        bugs = response['data']['bugs']
        for bug in bugs:
            logging.debug('updating bug %s' % bug['id'])
            normalize_bug(bug)
            bug['retrieved_time'] = response['date']
            bug['needs_full_update'] = False
            self.bugs.save(bug)
            for observer in self.observers:
                observer.notify({'bug': bug['id']})

    def update(self):
        params = {}
        params.update(self.options)
        if self.last_update:
            params['changed_after'] = self.last_update
        response = self.api.get('/bug', **params)
        bugs = response['data']['bugs']
        for bug in bugs:
            normalize_bug(bug)
            old_bug = self.bugs.find_one({'id': bug['id']})
            if ((old_bug is None) or
                (bug['last_change_time'] > old_bug['last_change_time'])):
                if not old_bug:
                    old_bug = bug
                old_bug['needs_full_update'] = True
            old_bug['retrieved_time'] = response['date']
            self.bugs.save(old_bug)
        bugs_to_update = self.bugs.find({'needs_full_update': True})
        bug_ids = [bug['id'] for bug in bugs_to_update]
        if bug_ids:
            for segment in split_seq(bug_ids, self.MAX_BUG_BATCH_SIZE):
                self._retrieve_full_bugs(segment)
            self._update_last_update()

class BugzillaApi(object):
    def __init__(self, base_url, collection, username=None, password=None,
                 open_url=open_url):
        self._open_url = open_url

        self.base_url = base_url
        self.username = username
        self.password = password
        config = collection.find_one()
        if not config:
            config = self.get('/configuration')['data']
            sanitize_for_mongodb(config)
            collection.insert(config)
        self.config = config

    def _validate_component(self, product, component=None):
        products = self.config['product']
        if product not in products:
            msg = 'product %s not in configuration' % repr(product)
            raise ValueError(msg)
        if component and component not in products[product]['component']:
            msg = 'component %s of product %s not in configuration' % (
                repr(component),
                repr(product)
                )
            raise ValueError(msg)

    def get(self, url, **kwargs):
        now = datetime.utcnow().replace(microsecond=0)

        for name, value in kwargs.items():
            if isinstance(value, datetime):
                kwargs[name] = datetime_to_iso(value)

        params = {}
        if self.username and self.password:
            params.update({'username': self.username,
                           'password': self.password})
        params.update(kwargs)

        if 'product' in params:
            self._validate_component(params['product'],
                                     params.get('component'))

        response = self._open_url(
            url=self.base_url + url,
            query_args=params,
            headers={'Accept': 'application/json',
                     'Content-Type': 'application/json'},
            )

        # TODO: instead of 'now', we'd like to use the 'Date'
        # HTTP header, but it's actually completely wrong in
        # the case of bugzilla.mozilla.org, so we'll assume
        # our timekeeping is better.

        #'date': datetime_from_rfc1123(response.info()['Date'])}

        return {'data': json.loads(response.read()),
                'date': now}