view GenerateHtml.py @ 4:df9d4e704257

Made the architecture a little cleaner, but it's still pretty messy; I really need to turn some OO-like-hacks into actual OO constructions. At least the Config.py structure is simpler.
author Atul Varma <varmaa@toolness.com>
date Sat, 16 Feb 2008 21:56:15 -0600
parents 919b98450387
children
line wrap: on
line source

import HTMLParser
import time

import Serializer
import Config
import FeedSources

class Detagger( HTMLParser.HTMLParser ):
    """
    >>> d = Detagger( 10 )
    >>> d.process( '<p>12345 <i>6</i> 78</p>' )
    '12345 6 78'
    >>> d.process( '<p>12345 <i>6</i> 789</p>' )
    '12345 6 ...'
    >>> d.process( '<p>12345 <i>6</i> 7 89</p>' )
    '12345 6 7 ...'
    >>> d.process( '<p>123 5 oiawejgoaiewgj</p><p>a</p>' )
    '123 5 ...'
    """

    def __init__( self, maxLength ):
        HTMLParser.HTMLParser.__init__( self )
        self.maxLength = maxLength

    def handle_data( self, data ):
        if ( not self._ellipsified ) and self._dataChars < self.maxLength:
            maxChunkLength = self.maxLength - self._dataChars
            if len( data ) > maxChunkLength:
                lastGoodWordBreak = 0
                for i in range( maxChunkLength ):
                    if data[i].isspace():
                        lastGoodWordBreak = i
                data = data[:lastGoodWordBreak] + " ..."
                self._ellipsified = True
            self._data.append( data )
            self._dataChars += len( data )

    def reset( self ):
        HTMLParser.HTMLParser.reset( self )
        self._data = []
        self._dataChars = 0
        self._ellipsified = False

    def process( self, data ):
        self.reset()
        self.feed( data )
        return "".join( self._data )

def makeRows( feedInfo, detagger ):
    rows = []

    for entry in feedInfo["entries"]:
        title = entry.get( "title", "Untitled" )
        summary = entry.get( "summary" )
        if not summary:
            if entry.get( "content" ):
                summary = entry["content"][0].value
            else:
                summary = ""
        summary = detagger.process( summary )
        timestamp = time.mktime( entry["updated_parsed"] )
        url = entry["link"]
        importance = feedInfo["config"].get( "importance",
                                             Config.DEFAULT_IMPORTANCE )
        displayDict = dict( source = feedInfo["config_name"],
                            url = url,
                            title = title,
                            summary = summary,
                            timestamp = timestamp,
                            isUrgent = (importance == 1) )
        rows.append( displayDict )
    return rows

def writePage( viewInfo, rows, outFile ):
    outFile.write( "<html><head><link href=\"styles.css\" rel=\"stylesheet\" type=\"text/css\"></head><h1>%s</h1><body>" % viewInfo["name"] )
    outFile.write( "<table cellspacing=\"0\" cellpadding=\"0\">" )
    rowClass = "odd"
    for row in rows:
        if row["isUrgent"]:
            finalRowClass = "urgent"
        else:
            finalRowClass = rowClass
        outFile.write( "<tr class=\"%s\">" % finalRowClass )
        #line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a></td><td>%(summary)s</td>" % row
        line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a> &mdash; %(summary)s</td>" % row
        outFile.write( line.encode( "ascii", "xmlcharrefreplace" ) )
        outFile.write( "</tr>" )            
        if rowClass == "odd":
            rowClass = "even"
        else:
            rowClass = "odd"
    outFile.write( "</table" )
    outFile.write( "</body>" )

def getFeedConfig( name ):
    return FeedSources.FEED_INFO[name]

def cmpRowUrgencyThenTimestamp( a, b ):
    """
    Comparison function for rows that first compares the urgency level
    of the item, then compares the timestamp.
    """

    urgentCmp = cmp( a["isUrgent"], b["isUrgent"] )
    if urgentCmp:
        return urgentCmp
    else:
        return cmp( a["timestamp"], b["timestamp"] )

def makeViewHtmlPage( viewInfo, detagger ):
    rows = []
    for feedInfo in Serializer.iterFeeds():
        feedConfig = getFeedConfig( feedInfo["config_name"] )
        feedInfo["config"] = feedConfig
        matchingTags = set( viewInfo["tags"] ).intersection(
            feedConfig["tags"]
            )
        if matchingTags:
            rows.extend( makeRows(feedInfo, detagger) )

    rows.sort( cmp = cmpRowUrgencyThenTimestamp, reverse = True )

    fileObj = open( "%s.html" % viewInfo["name"].lower(), "w" )
    writePage( viewInfo, rows, fileObj )
    fileObj.close()

def main():
    detagger = Detagger( Config.MAX_SUMMARY_LENGTH )

    for viewConfig in Config.VIEWS:
        print "Generating view for %(name)s" % viewConfig
        makeViewHtmlPage( viewConfig, detagger )

if __name__ == "__main__":
    import doctest
    doctest.testmod()
    main()