view GenerateHtml.py @ 0:2110430f4d7f

Origination. Two critical files for any of this to work, Config.py and LocalAuth.py, aren't included here because they contain sensitive information, so this is effectively broken at the moment. I'll have to do a bit of refactoring and make a second commit.
author Atul Varma <varmaa@toolness.com>
date Fri, 08 Feb 2008 23:53:26 -0600
parents
children 919b98450387
line wrap: on
line source

import HTMLParser
import time

import Serializer
import Config

class Detagger( HTMLParser.HTMLParser ):
    """
    >>> d = Detagger( 10 )
    >>> d.process( '<p>12345 <i>6</i> 78</p>' )
    '12345 6 78'
    >>> d.process( '<p>12345 <i>6</i> 789</p>' )
    '12345 6 ...'
    >>> d.process( '<p>12345 <i>6</i> 7 89</p>' )
    '12345 6 7 ...'
    >>> d.process( '<p>123 5 oiawejgoaiewgj</p><p>a</p>' )
    '123 5 ...'
    """

    def __init__( self, maxLength ):
        HTMLParser.HTMLParser.__init__( self )
        self.maxLength = maxLength

    def handle_data( self, data ):
        if ( not self._ellipsified ) and self._dataChars < self.maxLength:
            maxChunkLength = self.maxLength - self._dataChars
            if len( data ) > maxChunkLength:
                lastGoodWordBreak = 0
                for i in range( maxChunkLength ):
                    if data[i].isspace():
                        lastGoodWordBreak = i
                data = data[:lastGoodWordBreak] + " ..."
                self._ellipsified = True
            self._data.append( data )
            self._dataChars += len( data )

    def reset( self ):
        HTMLParser.HTMLParser.reset( self )
        self._data = []
        self._dataChars = 0
        self._ellipsified = False

    def process( self, data ):
        self.reset()
        self.feed( data )
        return "".join( self._data )

def makeRows( feedInfo, detagger ):
    rows = []

    for entry in feedInfo["entries"]:
        title = entry.get( "title", "Untitled" )
        summary = entry.get( "summary" )
        if not summary:
            if entry.get( "content" ):
                summary = entry["content"][0].value
            else:
                summary = ""
        summary = detagger.process( summary )
        timestamp = time.mktime( entry["updated_parsed"] )
        url = entry["link"]
        importance = feedInfo["config"].get( "importance",
                                             Config.DEFAULT_IMPORTANCE )
        displayDict = dict( source = feedInfo["config_name"],
                            url = url,
                            title = title,
                            summary = summary,
                            timestamp = timestamp,
                            isUrgent = (importance == 1) )
        rows.append( displayDict )
    return rows

def writePage( viewInfo, rows, outFile ):
    outFile.write( "<html><head><link href=\"styles.css\" rel=\"stylesheet\" type=\"text/css\"></head><h1>%s</h1><body>" % viewInfo["name"] )
    outFile.write( "<table cellspacing=\"0\" cellpadding=\"0\">" )
    rowClass = "odd"
    for row in rows:
        if row["isUrgent"]:
            finalRowClass = "urgent"
        else:
            finalRowClass = rowClass
        outFile.write( "<tr class=\"%s\">" % finalRowClass )
        #line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a></td><td>%(summary)s</td>" % row
        line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a> &mdash; %(summary)s</td>" % row
        outFile.write( line.encode( "ascii", "xmlcharrefreplace" ) )
        outFile.write( "</tr>" )            
        if rowClass == "odd":
            rowClass = "even"
        else:
            rowClass = "odd"
    outFile.write( "</table" )
    outFile.write( "</body>" )

def getFeedConfig( name ):
    return [ feed for feed in Config.FEEDS
             if feed["name"] == name ][0]

def makeViewHtmlPage( viewInfo, detagger ):
    rows = []
    for feedInfo in Serializer.iterFeeds():
        feedConfig = getFeedConfig( feedInfo["config_name"] )
        feedInfo["config"] = feedConfig
        matchingTags = set( viewInfo["tags"] ).intersection(
            feedConfig["tags"]
            )
        if matchingTags:
            rows.extend( makeRows(feedInfo, detagger) )

    rows.sort( key = lambda row: row["timestamp"],
               reverse = True )

    fileObj = open( "%s.html" % viewInfo["name"].lower(), "w" )
    writePage( viewInfo, rows, fileObj )
    fileObj.close()

def main():
    detagger = Detagger( Config.MAX_SUMMARY_LENGTH )

    for viewConfig in Config.VIEWS:
        print "Generating view for %(name)s" % viewConfig
        makeViewHtmlPage( viewConfig, detagger )

if __name__ == "__main__":
    import doctest
    doctest.testmod()
    main()