Mercurial > kiritsu
view UpdateFeeds.py @ 8:4d61c56473c2 default tip
Fixed a problem where some feeds would have unpickleable expatreaders.
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Fri, 18 Apr 2008 17:13:02 -0700 |
parents | 56bd30b89166 |
children |
line wrap: on
line source
import subprocess import time import feedparser import FeedSources import Serializer class SubprocessPool( object ): def __init__( self, maxChildren ): assert maxChildren > 0 self._maxChildren = maxChildren self._pool = [] def add( self, cmdLine ): if len( self._pool ) == self._maxChildren: self.waitToEnd( 1 ) popen = subprocess.Popen( cmdLine ) self._pool.append( popen ) def waitToEnd( self, numChildren=None ): if not numChildren: numChildren = len( self._pool ) while 1: done = [ popen for popen in self._pool if popen.poll() != None ] if len( done ) >= numChildren: break else: time.sleep( 0.1 ) # TODO: Raise errors or inform user if subprocesses exited # with nonzero return code. self._pool = [ popen for popen in self._pool if popen.poll() == None ] def main(): print "Starting updates..." pool = SubprocessPool( maxChildren = 5 ) for feedName in FeedSources.FEED_INFO: pool.add( ["python", "FeedSources.py", feedName] ) print "Waiting for updates to finish..." pool.waitToEnd() print "Processing and serializing feeds..." serializer = Serializer.Serializer() for feed in FeedSources.FEED_INFO.values(): try: fileObj = open( feed["filename"], "r" ) origFeedInfo = feedparser.parse( fileObj ) except Exception, e: import traceback print "Ignoring the following error:" traceback.print_exc() # TODO: Really ignore error here? continue feedInfo = {} feedInfo["entries"] = [] ENTRY_KEYS = ["title", "summary", "content", "updated_parsed", "link"]; for origEntry in origFeedInfo["entries"]: entry = {} for key in ENTRY_KEYS: entry[key] = origEntry.get( key ) feedInfo["entries"].append( entry ) feedInfo["config_name"] = feed["name"] print " %s" % feed["name"] serializer.store( feedInfo ) serializer.finalize() if __name__ == "__main__": main()