Mercurial > kiritsu
view GenerateHtml.py @ 8:4d61c56473c2 default tip
Fixed a problem where some feeds would have unpickleable expatreaders.
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Fri, 18 Apr 2008 17:13:02 -0700 |
parents | df9d4e704257 |
children |
line wrap: on
line source
import HTMLParser import time import Serializer import Config import FeedSources class Detagger( HTMLParser.HTMLParser ): """ >>> d = Detagger( 10 ) >>> d.process( '<p>12345 <i>6</i> 78</p>' ) '12345 6 78' >>> d.process( '<p>12345 <i>6</i> 789</p>' ) '12345 6 ...' >>> d.process( '<p>12345 <i>6</i> 7 89</p>' ) '12345 6 7 ...' >>> d.process( '<p>123 5 oiawejgoaiewgj</p><p>a</p>' ) '123 5 ...' """ def __init__( self, maxLength ): HTMLParser.HTMLParser.__init__( self ) self.maxLength = maxLength def handle_data( self, data ): if ( not self._ellipsified ) and self._dataChars < self.maxLength: maxChunkLength = self.maxLength - self._dataChars if len( data ) > maxChunkLength: lastGoodWordBreak = 0 for i in range( maxChunkLength ): if data[i].isspace(): lastGoodWordBreak = i data = data[:lastGoodWordBreak] + " ..." self._ellipsified = True self._data.append( data ) self._dataChars += len( data ) def reset( self ): HTMLParser.HTMLParser.reset( self ) self._data = [] self._dataChars = 0 self._ellipsified = False def process( self, data ): self.reset() self.feed( data ) return "".join( self._data ) def makeRows( feedInfo, detagger ): rows = [] for entry in feedInfo["entries"]: title = entry.get( "title", "Untitled" ) summary = entry.get( "summary" ) if not summary: if entry.get( "content" ): summary = entry["content"][0].value else: summary = "" summary = detagger.process( summary ) timestamp = time.mktime( entry["updated_parsed"] ) url = entry["link"] importance = feedInfo["config"].get( "importance", Config.DEFAULT_IMPORTANCE ) displayDict = dict( source = feedInfo["config_name"], url = url, title = title, summary = summary, timestamp = timestamp, isUrgent = (importance == 1) ) rows.append( displayDict ) return rows def writePage( viewInfo, rows, outFile ): outFile.write( "<html><head><link href=\"styles.css\" rel=\"stylesheet\" type=\"text/css\"></head><h1>%s</h1><body>" % viewInfo["name"] ) outFile.write( "<table cellspacing=\"0\" cellpadding=\"0\">" ) rowClass = "odd" for row in rows: if row["isUrgent"]: finalRowClass = "urgent" else: finalRowClass = rowClass outFile.write( "<tr class=\"%s\">" % finalRowClass ) #line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a></td><td>%(summary)s</td>" % row line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a> — %(summary)s</td>" % row outFile.write( line.encode( "ascii", "xmlcharrefreplace" ) ) outFile.write( "</tr>" ) if rowClass == "odd": rowClass = "even" else: rowClass = "odd" outFile.write( "</table" ) outFile.write( "</body>" ) def getFeedConfig( name ): return FeedSources.FEED_INFO[name] def cmpRowUrgencyThenTimestamp( a, b ): """ Comparison function for rows that first compares the urgency level of the item, then compares the timestamp. """ urgentCmp = cmp( a["isUrgent"], b["isUrgent"] ) if urgentCmp: return urgentCmp else: return cmp( a["timestamp"], b["timestamp"] ) def makeViewHtmlPage( viewInfo, detagger ): rows = [] for feedInfo in Serializer.iterFeeds(): feedConfig = getFeedConfig( feedInfo["config_name"] ) feedInfo["config"] = feedConfig matchingTags = set( viewInfo["tags"] ).intersection( feedConfig["tags"] ) if matchingTags: rows.extend( makeRows(feedInfo, detagger) ) rows.sort( cmp = cmpRowUrgencyThenTimestamp, reverse = True ) fileObj = open( "%s.html" % viewInfo["name"].lower(), "w" ) writePage( viewInfo, rows, fileObj ) fileObj.close() def main(): detagger = Detagger( Config.MAX_SUMMARY_LENGTH ) for viewConfig in Config.VIEWS: print "Generating view for %(name)s" % viewConfig makeViewHtmlPage( viewConfig, detagger ) if __name__ == "__main__": import doctest doctest.testmod() main()