view UpdateFeeds.py @ 8:4d61c56473c2 default tip

Fixed a problem where some feeds would have unpickleable expatreaders.
author Atul Varma <varmaa@toolness.com>
date Fri, 18 Apr 2008 17:13:02 -0700
parents 56bd30b89166
children
line wrap: on
line source

import subprocess
import time
import feedparser
import FeedSources
import Serializer

class SubprocessPool( object ):
    def __init__( self, maxChildren ):
        assert maxChildren > 0
        self._maxChildren = maxChildren
        self._pool = []

    def add( self, cmdLine ):
        if len( self._pool ) == self._maxChildren:
            self.waitToEnd( 1 )
        popen = subprocess.Popen( cmdLine )
        self._pool.append( popen )

    def waitToEnd( self, numChildren=None ):
        if not numChildren:
            numChildren = len( self._pool )
        while 1:
            done = [ popen for popen in self._pool
                     if popen.poll() != None ]
            if len( done ) >= numChildren:
                break
            else:
                time.sleep( 0.1 )
        # TODO: Raise errors or inform user if subprocesses exited
        # with nonzero return code.
        self._pool = [ popen for popen in self._pool
                       if popen.poll() == None ]

def main():
    print "Starting updates..."
    pool = SubprocessPool( maxChildren = 5 )
    for feedName in FeedSources.FEED_INFO:
        pool.add( ["python", "FeedSources.py", feedName] )
    print "Waiting for updates to finish..."
    pool.waitToEnd()

    print "Processing and serializing feeds..."
    serializer = Serializer.Serializer()

    for feed in FeedSources.FEED_INFO.values():
        try:
            fileObj = open( feed["filename"], "r" )
            origFeedInfo = feedparser.parse( fileObj )
        except Exception, e:
            import traceback
            print "Ignoring the following error:"
            traceback.print_exc()
            # TODO: Really ignore error here?
            continue

        feedInfo = {}
        feedInfo["entries"] = []
        ENTRY_KEYS = ["title", "summary", "content", "updated_parsed",
                      "link"];
        for origEntry in origFeedInfo["entries"]:
            entry = {}
            for key in ENTRY_KEYS:
                entry[key] = origEntry.get( key )
            feedInfo["entries"].append( entry )

        feedInfo["config_name"] = feed["name"]
        print "  %s" % feed["name"]
        serializer.store( feedInfo )
    serializer.finalize()

if __name__ == "__main__":
    main()