changeset 45:d7c68ba81215

Added enso.utils.decorators and enso.utils.memoize.
author Atul Varma <varmaa@toolness.com>
date Mon, 25 Feb 2008 09:15:44 -0600
parents 78bb785c992e
children e37e56835647
files enso/utils/decorators.py enso/utils/memoize.py tests/test_decorators.py tests/test_memoize.py
diffstat 4 files changed, 557 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/enso/utils/decorators.py	Mon Feb 25 09:15:44 2008 -0600
@@ -0,0 +1,101 @@
+# Copyright (c) 2008, Humanized, Inc.
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#    1. Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#
+#    2. Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+#    3. Neither the name of Enso nor the names of its contributors may
+#       be used to endorse or promote products derived from this
+#       software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY Humanized, Inc. ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Humanized, Inc. BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ----------------------------------------------------------------------------
+#
+#   enso.utils.decorators
+#
+# ----------------------------------------------------------------------------
+
+"""
+    Contains utility functions for decorators.  Note that this module
+    does not contain actual decorators, but rather *utilities* for
+    decorators to use.
+"""
+
+# ----------------------------------------------------------------------------
+# Imports
+# ----------------------------------------------------------------------------
+
+import inspect
+import sys
+
+
+# ----------------------------------------------------------------------------
+# Functionality
+# ----------------------------------------------------------------------------
+
+def finalizeWrapper( origFunc, wrappedFunc, decoratorName ):
+    """
+    Makes some final modifications to the decorated or 'wrapped'
+    version of a function by making the wrapped version's name and
+    docstring match those of the original function.
+
+    'decoratorName' is the string name for the decorator,
+    e.g. 'Synchronized'.
+
+    Assuming that the original function was of the form 'myFunc( self,
+    foo = 1 )' and the decorator name is 'Synchronized', the new
+    docstring for the wrapped function will be of the form:
+
+        Synchronized wrapper for:
+        myFunc( self, foo = 1 )
+
+        <myFunc's docstring>
+    
+    Returns the wrapped function.
+    """
+
+    if sys.modules.has_key( "pychecker" ):
+        # If pychecker is in sys.modules, then we can assume that our
+        # code is being checked by pychecker.  If this is the case,
+        # then we just want to return the original function, because
+        # pychecker doesn't like decorators.
+        return origFunc
+
+    # Get a prettified representation of the argument list.
+    args, varargs, varkw, defaults = inspect.getargspec( origFunc )
+    argspec = inspect.formatargspec(
+        args,
+        varargs,
+        varkw,
+        defaults
+        )
+
+    callspec = "%s%s" % ( origFunc.__name__, argspec )
+
+    # Generate a new docstring.
+    newDocString = "%s wrapper for:\n%s\n\n%s" % \
+                   ( decoratorName, callspec, origFunc.__doc__ )
+
+    # Set the appropriate attributes on the wrapped function and pass
+    # it back.
+    wrappedFunc.__doc__ = newDocString
+    wrappedFunc.__name__ = origFunc.__name__
+    wrappedFunc.__module__ = origFunc.__module__
+    return wrappedFunc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/enso/utils/memoize.py	Mon Feb 25 09:15:44 2008 -0600
@@ -0,0 +1,283 @@
+# Copyright (c) 2008, Humanized, Inc.
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#    1. Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#
+#    2. Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+#    3. Neither the name of Enso nor the names of its contributors may
+#       be used to endorse or promote products derived from this
+#       software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY Humanized, Inc. ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Humanized, Inc. BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ----------------------------------------------------------------------------
+#
+#   enso.utils.memoize
+#
+# ----------------------------------------------------------------------------
+
+"""
+    A memoizing decorator for caching the results of a function.
+"""
+
+# ----------------------------------------------------------------------------
+# Imports
+# ----------------------------------------------------------------------------
+
+import inspect
+
+from enso.utils.decorators import finalizeWrapper
+
+
+# ----------------------------------------------------------------------------
+# Memoized Decorator
+# ----------------------------------------------------------------------------
+
+_memoizedFunctions = []
+
+class _MemoizedFunction:
+    """
+    Encapsulates all information about a function that is memoized.
+    """
+    
+    def __init__( self, function ):
+        self.function = function
+        self.cache = {}
+        _memoizedFunctions.append( self )
+
+
+def _generateArgWrapper( function, wrappedFunction ):
+    """
+    Given any function and a wrappedFunction of the form
+    wrappedFunction(*args), creates an argument-wrapper function that
+    guarantees that function's arguments will be passed into
+    wrappedFunction() as a single tuple with no keyword arguments;
+    furthermore, the default values of any absent arguments will be
+    inserted into the tuple in their appopriate positions.
+
+    The function is very useful for creating wrappers that memoize
+    functions, because it ensures that the wrapped function's
+    arguments are always passed in to the memoizing wrapper as a
+    hashable data structure (a tuple) in a very consistent way. See
+    the following primitive example:
+
+      >>> timesCalled = 0
+      >>> cachedResults = {}
+
+      >>> def myFunction( a, b=1 ):
+      ...   'Simple function that returns the sum of the two arguments.'
+      ...   global timesCalled
+      ...   timesCalled += 1
+      ...   return a+b
+
+      >>> def memoizedMyFunction( *args ):
+      ...   'Simple wrapper that memoizes myFunction().'
+      ...   if not cachedResults.has_key( args ):
+      ...     cachedResults[args] = myFunction( *args )
+      ...   return cachedResults[args]
+
+    Note that memoizedFunction() isn't very flexible; for instance,
+    calling it in two different ways that are semantically identical
+    erroneously causes myFunction() to be called twice:
+
+      >>> memoizedMyFunction( 1 )
+      2
+      >>> memoizedMyFunction( 1, 1 )
+      2
+      >>> timesCalled
+      2
+
+    Note further that the function can't be used with keyword
+    arguments, partly because allowing memoizedMyFunction() to take
+    keyword arguments would require us to convert a **kwargs dictionary
+    into a hashable object, which is inefficient.  See the following:
+
+      >>> memoizedMyFunction( 1, b=5 )
+      Traceback (most recent call last):
+      ...
+      TypeError: memoizedMyFunction() got an unexpected keyword argument 'b'
+
+    To solve these problems, let's try using _generateArgWrapper() on
+    our memoized function:
+    
+      >>> f = _generateArgWrapper( myFunction, memoizedMyFunction )
+
+    Now the function 'f' can be used just like myFunction():
+
+      >>> timesCalled = 0
+      >>> cachedResults = {}
+
+      >>> f( 1, 1 )
+      2
+      >>> f( a=1 )
+      2
+      >>> f( 1 )
+      2
+      >>> f( a=1, b=1 )
+      2
+
+    Furthermore, note that myFunction() has still only been called
+    once:
+
+      >>> timesCalled
+      1
+    """
+
+    args, varargs, varkw, defaults = inspect.getargspec( function )
+
+    assert varkw == None, "Memoized functions cannot take ** arguments."
+
+    argspecString = inspect.formatargspec( args, varargs, None, defaults )
+    argspecStringNoDefaults = inspect.formatargspec( args,
+                                                     varargs,
+                                                     None,
+                                                     None )
+
+    codeString = "\n".join( [
+        "def argWrapperGenerator( wrappedFunction ):",
+        "    def argWrappedFunction%(argspecString)s:",
+        "        return wrappedFunction%(argspecStringNoDefaults)s",
+        "    return argWrappedFunction",
+        ] )
+
+    codeString = codeString % {
+        "argspecString" : argspecString,
+        "argspecStringNoDefaults" : argspecStringNoDefaults,
+        }
+
+    fakeFileName = "<Memoize-generated code for '%s'>" % function.__name__
+
+    codeObj = compile(
+        codeString,
+        fakeFileName,
+        "exec"
+    )
+
+    localsDict = {}
+    globalsDict = function.func_globals
+
+    exec codeObj in globalsDict, localsDict
+
+    argWrapperGenerator = localsDict["argWrapperGenerator"]
+
+    return argWrapperGenerator( wrappedFunction )
+
+
+def memoized( function ):
+    """
+    'Memoizes' the function, causing its results to be cached based on
+    the called arguments.  When subsequent calls to function are made
+    using the same arguments, the cached value is returned rather than
+    calling the function to re-compute the result.  For instance:
+
+      >>> timesCalled = 0
+      >>> @memoized
+      ... def addNumbers( a, b ):
+      ...   global timesCalled
+      ...   timesCalled += 1
+      ...   return a + b
+
+    We can show that the above function is only called once for each
+    unique pair of arguments like so:
+
+      >>> addNumbers( 50, 20 )
+      70
+      >>> timesCalled
+      1
+      >>> addNumbers( a=50, b=20 )
+      70
+      >>> timesCalled
+      1
+
+    Using different arguments calls the function again, of course:
+
+      >>> addNumbers( 20, 50 )
+      70
+      >>> timesCalled
+      2
+
+    The memoized function cannot take any arguments that are
+    non-hashable; this means that the memoized function cannot take
+    dicts or lists, among others.  For instance:
+
+      >>> @memoized
+      ... def myFunc( myDict ):
+      ...   myDict.update( {'bar':2} )
+      ...   return myDict
+      >>> myFunc( {'foo':1} )
+      Traceback (most recent call last):
+      ...
+      TypeError: dict objects are unhashable
+
+    The memoized function also cannot take a ** argument, since this
+    constitutes a dict.
+
+    This decorator should only be used on functions which have a small
+    number of relatively simple input arguments, and which are called
+    a fantastic number of times.
+
+    The memoized decorator is most effective in helping performance
+    when used on factory functions, as the instantiated object
+    (assuming that it should only be instantiated once) can be reused
+    rather than re-instantiated (effectively providing the services of
+    a flyweight pool).
+    """
+
+    mfWrap = _MemoizedFunction( function )
+
+    # For efficiency purposes, let's make it as easy to look up
+    # mfWrap.cache as possible.
+    cache = mfWrap.cache
+
+    def memoizedFunctionWrapper( *args ):
+        # We're using a try-except clause here instead of testing
+        # whether the dictionary has a key because we believe that it
+        # is more efficient; it's preferable to speed up the most
+        # common scenario where a cached value already exists by
+        # simply assuming that it *does* exist.
+
+        try:
+            return cache[args]
+        except KeyError:
+            cache[args] = function( *args )
+            return cache[args]
+    
+    finalWrapper = _generateArgWrapper( function, memoizedFunctionWrapper )
+
+    return finalizeWrapper( function,
+                            finalWrapper,
+                            "Memoized" )
+
+
+def getMemoizeStats():
+    """
+    Returns a string describing the memoize usage dictionary.
+    """
+
+    STAT_STRING = \
+        "Number of functions which used memoizing:  %(numFuncs)s\n" \
+        "Number of unique function values recorded: %(numValues)s"
+
+    info = STAT_STRING % dict(
+        numFuncs = len( _memoizedFunctions ),
+        numValues = sum( [ len(i.cache.keys()) \
+                           for i in _memoizedFunctions ] ),
+        )
+
+    return info
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_decorators.py	Mon Feb 25 09:15:44 2008 -0600
@@ -0,0 +1,83 @@
+"""
+    Unit tests for enso.utils.decorators.
+"""
+
+# ----------------------------------------------------------------------------
+# Imports
+# ----------------------------------------------------------------------------
+
+import unittest
+
+from enso.utils.decorators import finalizeWrapper
+
+
+# ----------------------------------------------------------------------------
+# Example Functions
+# ----------------------------------------------------------------------------
+
+def myDec( oldFunc ):
+    """
+    A decorator.
+    """
+    def newFunc( *args, **kwargs ):
+        return "newFunc", oldFunc( *args, **kwargs )
+    return finalizeWrapper( oldFunc, newFunc, "myDec" )
+
+def a():
+    "a doc"
+    return "a"
+def b():
+    "b doc"
+    return "b"
+def c():
+    "c doc"
+    return "c"
+def d():
+    "d doc"
+    return "d"
+
+class SomeClass:
+    def classFunc( self ):
+        "classFunc doc"
+        return "classFunc"
+
+
+FUNCS = {
+    "a":a,
+    "b":b,
+    "c":c,
+    "d":d,
+    "classFunc":SomeClass().classFunc,
+    }
+
+
+# ----------------------------------------------------------------------------
+# Unit Tests
+# ----------------------------------------------------------------------------
+
+class FinaleWrapperTests( unittest.TestCase ):
+
+    def testAll( self ):
+        for funcName in FUNCS:
+            func = FUNCS[funcName]
+            self.failUnlessEqual( funcName, func() )
+            self.failUnlessEqual( funcName, func.__name__ )
+
+            newFunc = myDec( func )
+            # The new function has a different return value, though
+            # the old value should be contained in the new value.
+            self.failIfEqual( funcName, newFunc() )
+            self.failUnless( func() in newFunc() )
+            # The new function should have the same name.
+            self.failUnlessEqual( funcName, newFunc.__name__ )
+            # The docstring of the old function should be
+            # contained in the docstring of the new function
+            self.failUnless( func.__doc__ in newFunc.__doc__ )
+
+
+# ----------------------------------------------------------------------------
+# Script
+# ----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    unittest.main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_memoize.py	Mon Feb 25 09:15:44 2008 -0600
@@ -0,0 +1,90 @@
+"""
+    Unit tests for enso.utils.memoize.
+"""
+
+# ----------------------------------------------------------------------------
+# Imports
+# ----------------------------------------------------------------------------
+
+import unittest
+
+from enso.utils.memoize import memoized
+
+
+# ----------------------------------------------------------------------------
+# Imports
+# ----------------------------------------------------------------------------
+
+class TestMemoize( unittest.TestCase ):
+    def setUp( self ):
+        self.calledCount = 0
+
+    def tearDown( self ):
+        pass
+
+    @memoized
+    def add( self, a, b ):
+        self.calledCount += 1
+        return a + b
+
+    def _testPair( self, a, b ):
+        for i in range( 100 ):
+            self.failUnlessEqual( self.add( a, b ), a + b )
+            self.failUnlessEqual( self.calledCount, 1 )
+        for i in range( 100 ):
+            self.failUnlessEqual( self.add( *[a,b] ), a + b )
+            self.failUnlessEqual( self.calledCount, 1 )
+        for i in range( 100 ):
+            self.failUnlessEqual( self.add( a=a, b=b ), a + b )
+            self.failUnlessEqual( self.calledCount, 1 )
+        for i in range( 100 ):
+            self.failUnlessEqual( self.add( **{"a":a,"b":b} ), a + b )
+            self.failUnlessEqual( self.calledCount, 1 )
+        for i in range( 100 ):
+            self.failUnlessEqual( self.add( a, b=b ), a + b )
+            self.failUnlessEqual( self.calledCount, 1 )
+        for i in range( 100 ):
+            self.failUnlessEqual( self.add( a, **{ "b":b } ), a + b )
+            self.failUnlessEqual( self.calledCount, 1 )
+
+    def testNumbers( self ):
+        self._testPair( 1, 1 )
+
+    def testTuples( self ):
+        self._testPair( (1,) , (2,) )
+
+    @memoized
+    def something( self, a ):
+        self.calledCount += 1
+        return [ a, 1 ]
+
+    def testKwargs( self ):
+        def foo(**kwargs):
+            pass
+        self.assertRaises(
+            AssertionError,
+            memoized,
+            foo
+            )
+
+    def testTypes( self ):
+        types = [
+            "a",
+            1,
+            1.2,
+            ]
+
+        called = 0
+        for t in types:
+            called += 1
+            for i in range( 100 ):
+                self.failUnlessEqual( self.something( t ), [t,1] )
+                self.failUnlessEqual( self.calledCount, called )
+
+
+# ----------------------------------------------------------------------------
+# Script
+# ----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    unittest.main()