Mercurial > kiritsu
view GenerateHtml.py @ 0:2110430f4d7f
Origination. Two critical files for any of this to work, Config.py and LocalAuth.py, aren't included here because they contain sensitive information, so this is effectively broken at the moment. I'll have to do a bit of refactoring and make a second commit.
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Fri, 08 Feb 2008 23:53:26 -0600 |
parents | |
children | 919b98450387 |
line wrap: on
line source
import HTMLParser import time import Serializer import Config class Detagger( HTMLParser.HTMLParser ): """ >>> d = Detagger( 10 ) >>> d.process( '<p>12345 <i>6</i> 78</p>' ) '12345 6 78' >>> d.process( '<p>12345 <i>6</i> 789</p>' ) '12345 6 ...' >>> d.process( '<p>12345 <i>6</i> 7 89</p>' ) '12345 6 7 ...' >>> d.process( '<p>123 5 oiawejgoaiewgj</p><p>a</p>' ) '123 5 ...' """ def __init__( self, maxLength ): HTMLParser.HTMLParser.__init__( self ) self.maxLength = maxLength def handle_data( self, data ): if ( not self._ellipsified ) and self._dataChars < self.maxLength: maxChunkLength = self.maxLength - self._dataChars if len( data ) > maxChunkLength: lastGoodWordBreak = 0 for i in range( maxChunkLength ): if data[i].isspace(): lastGoodWordBreak = i data = data[:lastGoodWordBreak] + " ..." self._ellipsified = True self._data.append( data ) self._dataChars += len( data ) def reset( self ): HTMLParser.HTMLParser.reset( self ) self._data = [] self._dataChars = 0 self._ellipsified = False def process( self, data ): self.reset() self.feed( data ) return "".join( self._data ) def makeRows( feedInfo, detagger ): rows = [] for entry in feedInfo["entries"]: title = entry.get( "title", "Untitled" ) summary = entry.get( "summary" ) if not summary: if entry.get( "content" ): summary = entry["content"][0].value else: summary = "" summary = detagger.process( summary ) timestamp = time.mktime( entry["updated_parsed"] ) url = entry["link"] importance = feedInfo["config"].get( "importance", Config.DEFAULT_IMPORTANCE ) displayDict = dict( source = feedInfo["config_name"], url = url, title = title, summary = summary, timestamp = timestamp, isUrgent = (importance == 1) ) rows.append( displayDict ) return rows def writePage( viewInfo, rows, outFile ): outFile.write( "<html><head><link href=\"styles.css\" rel=\"stylesheet\" type=\"text/css\"></head><h1>%s</h1><body>" % viewInfo["name"] ) outFile.write( "<table cellspacing=\"0\" cellpadding=\"0\">" ) rowClass = "odd" for row in rows: if row["isUrgent"]: finalRowClass = "urgent" else: finalRowClass = rowClass outFile.write( "<tr class=\"%s\">" % finalRowClass ) #line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a></td><td>%(summary)s</td>" % row line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a> — %(summary)s</td>" % row outFile.write( line.encode( "ascii", "xmlcharrefreplace" ) ) outFile.write( "</tr>" ) if rowClass == "odd": rowClass = "even" else: rowClass = "odd" outFile.write( "</table" ) outFile.write( "</body>" ) def getFeedConfig( name ): return [ feed for feed in Config.FEEDS if feed["name"] == name ][0] def makeViewHtmlPage( viewInfo, detagger ): rows = [] for feedInfo in Serializer.iterFeeds(): feedConfig = getFeedConfig( feedInfo["config_name"] ) feedInfo["config"] = feedConfig matchingTags = set( viewInfo["tags"] ).intersection( feedConfig["tags"] ) if matchingTags: rows.extend( makeRows(feedInfo, detagger) ) rows.sort( key = lambda row: row["timestamp"], reverse = True ) fileObj = open( "%s.html" % viewInfo["name"].lower(), "w" ) writePage( viewInfo, rows, fileObj ) fileObj.close() def main(): detagger = Detagger( Config.MAX_SUMMARY_LENGTH ) for viewConfig in Config.VIEWS: print "Generating view for %(name)s" % viewConfig makeViewHtmlPage( viewConfig, detagger ) if __name__ == "__main__": import doctest doctest.testmod() main()