Mercurial > enso_core
changeset 45:d7c68ba81215
Added enso.utils.decorators and enso.utils.memoize.
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Mon, 25 Feb 2008 09:15:44 -0600 |
parents | 78bb785c992e |
children | e37e56835647 |
files | enso/utils/decorators.py enso/utils/memoize.py tests/test_decorators.py tests/test_memoize.py |
diffstat | 4 files changed, 557 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/enso/utils/decorators.py Mon Feb 25 09:15:44 2008 -0600 @@ -0,0 +1,101 @@ +# Copyright (c) 2008, Humanized, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of Enso nor the names of its contributors may +# be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY Humanized, Inc. ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL Humanized, Inc. BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# ---------------------------------------------------------------------------- +# +# enso.utils.decorators +# +# ---------------------------------------------------------------------------- + +""" + Contains utility functions for decorators. Note that this module + does not contain actual decorators, but rather *utilities* for + decorators to use. +""" + +# ---------------------------------------------------------------------------- +# Imports +# ---------------------------------------------------------------------------- + +import inspect +import sys + + +# ---------------------------------------------------------------------------- +# Functionality +# ---------------------------------------------------------------------------- + +def finalizeWrapper( origFunc, wrappedFunc, decoratorName ): + """ + Makes some final modifications to the decorated or 'wrapped' + version of a function by making the wrapped version's name and + docstring match those of the original function. + + 'decoratorName' is the string name for the decorator, + e.g. 'Synchronized'. + + Assuming that the original function was of the form 'myFunc( self, + foo = 1 )' and the decorator name is 'Synchronized', the new + docstring for the wrapped function will be of the form: + + Synchronized wrapper for: + myFunc( self, foo = 1 ) + + <myFunc's docstring> + + Returns the wrapped function. + """ + + if sys.modules.has_key( "pychecker" ): + # If pychecker is in sys.modules, then we can assume that our + # code is being checked by pychecker. If this is the case, + # then we just want to return the original function, because + # pychecker doesn't like decorators. + return origFunc + + # Get a prettified representation of the argument list. + args, varargs, varkw, defaults = inspect.getargspec( origFunc ) + argspec = inspect.formatargspec( + args, + varargs, + varkw, + defaults + ) + + callspec = "%s%s" % ( origFunc.__name__, argspec ) + + # Generate a new docstring. + newDocString = "%s wrapper for:\n%s\n\n%s" % \ + ( decoratorName, callspec, origFunc.__doc__ ) + + # Set the appropriate attributes on the wrapped function and pass + # it back. + wrappedFunc.__doc__ = newDocString + wrappedFunc.__name__ = origFunc.__name__ + wrappedFunc.__module__ = origFunc.__module__ + return wrappedFunc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/enso/utils/memoize.py Mon Feb 25 09:15:44 2008 -0600 @@ -0,0 +1,283 @@ +# Copyright (c) 2008, Humanized, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of Enso nor the names of its contributors may +# be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY Humanized, Inc. ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL Humanized, Inc. BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# ---------------------------------------------------------------------------- +# +# enso.utils.memoize +# +# ---------------------------------------------------------------------------- + +""" + A memoizing decorator for caching the results of a function. +""" + +# ---------------------------------------------------------------------------- +# Imports +# ---------------------------------------------------------------------------- + +import inspect + +from enso.utils.decorators import finalizeWrapper + + +# ---------------------------------------------------------------------------- +# Memoized Decorator +# ---------------------------------------------------------------------------- + +_memoizedFunctions = [] + +class _MemoizedFunction: + """ + Encapsulates all information about a function that is memoized. + """ + + def __init__( self, function ): + self.function = function + self.cache = {} + _memoizedFunctions.append( self ) + + +def _generateArgWrapper( function, wrappedFunction ): + """ + Given any function and a wrappedFunction of the form + wrappedFunction(*args), creates an argument-wrapper function that + guarantees that function's arguments will be passed into + wrappedFunction() as a single tuple with no keyword arguments; + furthermore, the default values of any absent arguments will be + inserted into the tuple in their appopriate positions. + + The function is very useful for creating wrappers that memoize + functions, because it ensures that the wrapped function's + arguments are always passed in to the memoizing wrapper as a + hashable data structure (a tuple) in a very consistent way. See + the following primitive example: + + >>> timesCalled = 0 + >>> cachedResults = {} + + >>> def myFunction( a, b=1 ): + ... 'Simple function that returns the sum of the two arguments.' + ... global timesCalled + ... timesCalled += 1 + ... return a+b + + >>> def memoizedMyFunction( *args ): + ... 'Simple wrapper that memoizes myFunction().' + ... if not cachedResults.has_key( args ): + ... cachedResults[args] = myFunction( *args ) + ... return cachedResults[args] + + Note that memoizedFunction() isn't very flexible; for instance, + calling it in two different ways that are semantically identical + erroneously causes myFunction() to be called twice: + + >>> memoizedMyFunction( 1 ) + 2 + >>> memoizedMyFunction( 1, 1 ) + 2 + >>> timesCalled + 2 + + Note further that the function can't be used with keyword + arguments, partly because allowing memoizedMyFunction() to take + keyword arguments would require us to convert a **kwargs dictionary + into a hashable object, which is inefficient. See the following: + + >>> memoizedMyFunction( 1, b=5 ) + Traceback (most recent call last): + ... + TypeError: memoizedMyFunction() got an unexpected keyword argument 'b' + + To solve these problems, let's try using _generateArgWrapper() on + our memoized function: + + >>> f = _generateArgWrapper( myFunction, memoizedMyFunction ) + + Now the function 'f' can be used just like myFunction(): + + >>> timesCalled = 0 + >>> cachedResults = {} + + >>> f( 1, 1 ) + 2 + >>> f( a=1 ) + 2 + >>> f( 1 ) + 2 + >>> f( a=1, b=1 ) + 2 + + Furthermore, note that myFunction() has still only been called + once: + + >>> timesCalled + 1 + """ + + args, varargs, varkw, defaults = inspect.getargspec( function ) + + assert varkw == None, "Memoized functions cannot take ** arguments." + + argspecString = inspect.formatargspec( args, varargs, None, defaults ) + argspecStringNoDefaults = inspect.formatargspec( args, + varargs, + None, + None ) + + codeString = "\n".join( [ + "def argWrapperGenerator( wrappedFunction ):", + " def argWrappedFunction%(argspecString)s:", + " return wrappedFunction%(argspecStringNoDefaults)s", + " return argWrappedFunction", + ] ) + + codeString = codeString % { + "argspecString" : argspecString, + "argspecStringNoDefaults" : argspecStringNoDefaults, + } + + fakeFileName = "<Memoize-generated code for '%s'>" % function.__name__ + + codeObj = compile( + codeString, + fakeFileName, + "exec" + ) + + localsDict = {} + globalsDict = function.func_globals + + exec codeObj in globalsDict, localsDict + + argWrapperGenerator = localsDict["argWrapperGenerator"] + + return argWrapperGenerator( wrappedFunction ) + + +def memoized( function ): + """ + 'Memoizes' the function, causing its results to be cached based on + the called arguments. When subsequent calls to function are made + using the same arguments, the cached value is returned rather than + calling the function to re-compute the result. For instance: + + >>> timesCalled = 0 + >>> @memoized + ... def addNumbers( a, b ): + ... global timesCalled + ... timesCalled += 1 + ... return a + b + + We can show that the above function is only called once for each + unique pair of arguments like so: + + >>> addNumbers( 50, 20 ) + 70 + >>> timesCalled + 1 + >>> addNumbers( a=50, b=20 ) + 70 + >>> timesCalled + 1 + + Using different arguments calls the function again, of course: + + >>> addNumbers( 20, 50 ) + 70 + >>> timesCalled + 2 + + The memoized function cannot take any arguments that are + non-hashable; this means that the memoized function cannot take + dicts or lists, among others. For instance: + + >>> @memoized + ... def myFunc( myDict ): + ... myDict.update( {'bar':2} ) + ... return myDict + >>> myFunc( {'foo':1} ) + Traceback (most recent call last): + ... + TypeError: dict objects are unhashable + + The memoized function also cannot take a ** argument, since this + constitutes a dict. + + This decorator should only be used on functions which have a small + number of relatively simple input arguments, and which are called + a fantastic number of times. + + The memoized decorator is most effective in helping performance + when used on factory functions, as the instantiated object + (assuming that it should only be instantiated once) can be reused + rather than re-instantiated (effectively providing the services of + a flyweight pool). + """ + + mfWrap = _MemoizedFunction( function ) + + # For efficiency purposes, let's make it as easy to look up + # mfWrap.cache as possible. + cache = mfWrap.cache + + def memoizedFunctionWrapper( *args ): + # We're using a try-except clause here instead of testing + # whether the dictionary has a key because we believe that it + # is more efficient; it's preferable to speed up the most + # common scenario where a cached value already exists by + # simply assuming that it *does* exist. + + try: + return cache[args] + except KeyError: + cache[args] = function( *args ) + return cache[args] + + finalWrapper = _generateArgWrapper( function, memoizedFunctionWrapper ) + + return finalizeWrapper( function, + finalWrapper, + "Memoized" ) + + +def getMemoizeStats(): + """ + Returns a string describing the memoize usage dictionary. + """ + + STAT_STRING = \ + "Number of functions which used memoizing: %(numFuncs)s\n" \ + "Number of unique function values recorded: %(numValues)s" + + info = STAT_STRING % dict( + numFuncs = len( _memoizedFunctions ), + numValues = sum( [ len(i.cache.keys()) \ + for i in _memoizedFunctions ] ), + ) + + return info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_decorators.py Mon Feb 25 09:15:44 2008 -0600 @@ -0,0 +1,83 @@ +""" + Unit tests for enso.utils.decorators. +""" + +# ---------------------------------------------------------------------------- +# Imports +# ---------------------------------------------------------------------------- + +import unittest + +from enso.utils.decorators import finalizeWrapper + + +# ---------------------------------------------------------------------------- +# Example Functions +# ---------------------------------------------------------------------------- + +def myDec( oldFunc ): + """ + A decorator. + """ + def newFunc( *args, **kwargs ): + return "newFunc", oldFunc( *args, **kwargs ) + return finalizeWrapper( oldFunc, newFunc, "myDec" ) + +def a(): + "a doc" + return "a" +def b(): + "b doc" + return "b" +def c(): + "c doc" + return "c" +def d(): + "d doc" + return "d" + +class SomeClass: + def classFunc( self ): + "classFunc doc" + return "classFunc" + + +FUNCS = { + "a":a, + "b":b, + "c":c, + "d":d, + "classFunc":SomeClass().classFunc, + } + + +# ---------------------------------------------------------------------------- +# Unit Tests +# ---------------------------------------------------------------------------- + +class FinaleWrapperTests( unittest.TestCase ): + + def testAll( self ): + for funcName in FUNCS: + func = FUNCS[funcName] + self.failUnlessEqual( funcName, func() ) + self.failUnlessEqual( funcName, func.__name__ ) + + newFunc = myDec( func ) + # The new function has a different return value, though + # the old value should be contained in the new value. + self.failIfEqual( funcName, newFunc() ) + self.failUnless( func() in newFunc() ) + # The new function should have the same name. + self.failUnlessEqual( funcName, newFunc.__name__ ) + # The docstring of the old function should be + # contained in the docstring of the new function + self.failUnless( func.__doc__ in newFunc.__doc__ ) + + +# ---------------------------------------------------------------------------- +# Script +# ---------------------------------------------------------------------------- + +if __name__ == "__main__": + unittest.main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_memoize.py Mon Feb 25 09:15:44 2008 -0600 @@ -0,0 +1,90 @@ +""" + Unit tests for enso.utils.memoize. +""" + +# ---------------------------------------------------------------------------- +# Imports +# ---------------------------------------------------------------------------- + +import unittest + +from enso.utils.memoize import memoized + + +# ---------------------------------------------------------------------------- +# Imports +# ---------------------------------------------------------------------------- + +class TestMemoize( unittest.TestCase ): + def setUp( self ): + self.calledCount = 0 + + def tearDown( self ): + pass + + @memoized + def add( self, a, b ): + self.calledCount += 1 + return a + b + + def _testPair( self, a, b ): + for i in range( 100 ): + self.failUnlessEqual( self.add( a, b ), a + b ) + self.failUnlessEqual( self.calledCount, 1 ) + for i in range( 100 ): + self.failUnlessEqual( self.add( *[a,b] ), a + b ) + self.failUnlessEqual( self.calledCount, 1 ) + for i in range( 100 ): + self.failUnlessEqual( self.add( a=a, b=b ), a + b ) + self.failUnlessEqual( self.calledCount, 1 ) + for i in range( 100 ): + self.failUnlessEqual( self.add( **{"a":a,"b":b} ), a + b ) + self.failUnlessEqual( self.calledCount, 1 ) + for i in range( 100 ): + self.failUnlessEqual( self.add( a, b=b ), a + b ) + self.failUnlessEqual( self.calledCount, 1 ) + for i in range( 100 ): + self.failUnlessEqual( self.add( a, **{ "b":b } ), a + b ) + self.failUnlessEqual( self.calledCount, 1 ) + + def testNumbers( self ): + self._testPair( 1, 1 ) + + def testTuples( self ): + self._testPair( (1,) , (2,) ) + + @memoized + def something( self, a ): + self.calledCount += 1 + return [ a, 1 ] + + def testKwargs( self ): + def foo(**kwargs): + pass + self.assertRaises( + AssertionError, + memoized, + foo + ) + + def testTypes( self ): + types = [ + "a", + 1, + 1.2, + ] + + called = 0 + for t in types: + called += 1 + for i in range( 100 ): + self.failUnlessEqual( self.something( t ), [t,1] ) + self.failUnlessEqual( self.calledCount, called ) + + +# ---------------------------------------------------------------------------- +# Script +# ---------------------------------------------------------------------------- + +if __name__ == "__main__": + unittest.main()