# HG changeset patch # User Atul Varma # Date 1202536406 21600 # Node ID 2110430f4d7f59de1f1d59feda587824fc0b1b96 Origination. Two critical files for any of this to work, Config.py and LocalAuth.py, aren't included here because they contain sensitive information, so this is effectively broken at the moment. I'll have to do a bit of refactoring and make a second commit. diff -r 000000000000 -r 2110430f4d7f GenerateHtml.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GenerateHtml.py Fri Feb 08 23:53:26 2008 -0600 @@ -0,0 +1,126 @@ +import HTMLParser +import time + +import Serializer +import Config + +class Detagger( HTMLParser.HTMLParser ): + """ + >>> d = Detagger( 10 ) + >>> d.process( '

12345 6 78

' ) + '12345 6 78' + >>> d.process( '

12345 6 789

' ) + '12345 6 ...' + >>> d.process( '

12345 6 7 89

' ) + '12345 6 7 ...' + >>> d.process( '

123 5 oiawejgoaiewgj

a

' ) + '123 5 ...' + """ + + def __init__( self, maxLength ): + HTMLParser.HTMLParser.__init__( self ) + self.maxLength = maxLength + + def handle_data( self, data ): + if ( not self._ellipsified ) and self._dataChars < self.maxLength: + maxChunkLength = self.maxLength - self._dataChars + if len( data ) > maxChunkLength: + lastGoodWordBreak = 0 + for i in range( maxChunkLength ): + if data[i].isspace(): + lastGoodWordBreak = i + data = data[:lastGoodWordBreak] + " ..." + self._ellipsified = True + self._data.append( data ) + self._dataChars += len( data ) + + def reset( self ): + HTMLParser.HTMLParser.reset( self ) + self._data = [] + self._dataChars = 0 + self._ellipsified = False + + def process( self, data ): + self.reset() + self.feed( data ) + return "".join( self._data ) + +def makeRows( feedInfo, detagger ): + rows = [] + + for entry in feedInfo["entries"]: + title = entry.get( "title", "Untitled" ) + summary = entry.get( "summary" ) + if not summary: + if entry.get( "content" ): + summary = entry["content"][0].value + else: + summary = "" + summary = detagger.process( summary ) + timestamp = time.mktime( entry["updated_parsed"] ) + url = entry["link"] + importance = feedInfo["config"].get( "importance", + Config.DEFAULT_IMPORTANCE ) + displayDict = dict( source = feedInfo["config_name"], + url = url, + title = title, + summary = summary, + timestamp = timestamp, + isUrgent = (importance == 1) ) + rows.append( displayDict ) + return rows + +def writePage( viewInfo, rows, outFile ): + outFile.write( "

%s

" % viewInfo["name"] ) + outFile.write( "" ) + rowClass = "odd" + for row in rows: + if row["isUrgent"]: + finalRowClass = "urgent" + else: + finalRowClass = rowClass + outFile.write( "" % finalRowClass ) + #line = "" % row + line = "" % row + outFile.write( line.encode( "ascii", "xmlcharrefreplace" ) ) + outFile.write( "" ) + if rowClass == "odd": + rowClass = "even" + else: + rowClass = "odd" + outFile.write( "" ) + +def getFeedConfig( name ): + return [ feed for feed in Config.FEEDS + if feed["name"] == name ][0] + +def makeViewHtmlPage( viewInfo, detagger ): + rows = [] + for feedInfo in Serializer.iterFeeds(): + feedConfig = getFeedConfig( feedInfo["config_name"] ) + feedInfo["config"] = feedConfig + matchingTags = set( viewInfo["tags"] ).intersection( + feedConfig["tags"] + ) + if matchingTags: + rows.extend( makeRows(feedInfo, detagger) ) + + rows.sort( key = lambda row: row["timestamp"], + reverse = True ) + + fileObj = open( "%s.html" % viewInfo["name"].lower(), "w" ) + writePage( viewInfo, rows, fileObj ) + fileObj.close() + +def main(): + detagger = Detagger( Config.MAX_SUMMARY_LENGTH ) + + for viewConfig in Config.VIEWS: + print "Generating view for %(name)s" % viewConfig + makeViewHtmlPage( viewConfig, detagger ) + +if __name__ == "__main__": + import doctest + doctest.testmod() + main() diff -r 000000000000 -r 2110430f4d7f ImapFeed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ImapFeed.py Fri Feb 08 23:53:26 2008 -0600 @@ -0,0 +1,80 @@ +import imaplib +import time +from email.feedparser import FeedParser +from email.utils import parsedate +import elementtree.ElementTree as ET + +from LocalAuth import IMAP_AUTH +from Config import IMAP_FEEDS + +def getImapUnreadMailInfo( server, port, username, password, + mailbox = "INBOX", isSsl = True ): + if isSsl: + imap = imaplib.IMAP4_SSL( server, port ) + else: + imap = imaplib.IMAP4( server, port ) + + imap.login( username, password ) + imap.select( mailbox ) + typ, msgnums = imap.search( None, "UNSEEN" ) + if typ != "OK": + raise IOError( "Error searching IMAP folder" ) + msgnums = [ int( num ) for num in msgnums[0].split() ] + for num in msgnums: + typ, result = imap.fetch( str(num), "(BODY.PEEK[HEADER])" ) + if typ != "OK": + raise IOError( "Error fetching IMAP messages" ) + headers = result[0][1] + parser = FeedParser() + parser.feed( headers ) + message = parser.close() + # TODO: Make sure that the timezone info is converted properly. + timestamp = time.mktime( parsedate(message["Date"]) ) + yield dict( timestamp = timestamp, + sender = message["From"], + subject = message["Subject"] ) + imap.close() + imap.logout() + +def _addTextNode( element, name, text ): + subElement = ET.SubElement( element, name ) + subElement.text = text + +def makeFeedFromMailInfo( infoIterator, url ): + root = ET.Element( "rss" ) + root.set( "version", "2.0" ) + channel = ET.SubElement( root, "channel" ) + for info in infoIterator: + item = ET.SubElement( channel, "item" ) + _addTextNode( item, "title", info["subject"] ) + _addTextNode( item, "pubDate", + time.ctime( info["timestamp"] ) ) + _addTextNode( item, "link", url ) + _addTextNode( item, "description", + "From %s" % info["sender"] ) + tree = ET.ElementTree( root ) + + import StringIO + strFile = StringIO.StringIO() + tree.write( strFile ) + return strFile.getvalue() + +def generateFeed( feed, auth ): + imapParams = dict( server = feed["server"], + port = feed["port"], + mailbox = feed["mailbox"], + isSsl = feed["isSsl"] ) + imapParams.update( auth[imapParams["server"]] ) + open( feed["filename"], "w" ).write( + makeFeedFromMailInfo( getImapUnreadMailInfo(**imapParams), + feed["url"] ) + ) + +def main(): + for feed in IMAP_FEEDS: + print "Generating feed for %s (%s)" % ( feed["name"], + feed["filename"] ) + generateFeed( feed, IMAP_AUTH ) + +if __name__ == "__main__": + main() diff -r 000000000000 -r 2110430f4d7f MakeEverything.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MakeEverything.py Fri Feb 08 23:53:26 2008 -0600 @@ -0,0 +1,8 @@ +import ImapFeed +import UpdateFeeds +import GenerateHtml + +if __name__ == "__main__": + ImapFeed.main() + UpdateFeeds.main() + GenerateHtml.main() diff -r 000000000000 -r 2110430f4d7f Serializer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Serializer.py Fri Feb 08 23:53:26 2008 -0600 @@ -0,0 +1,24 @@ +import cPickle as pickle +import Config + +class Serializer( object ): + def __init__( self ): + self._fileObj = open( Config.FILENAME, "wb" ) + + def store( self, feedInfo ): + pickle.dump( feedInfo, + self._fileObj, + pickle.HIGHEST_PROTOCOL ) + + def finalize( self ): + self._fileObj.close() + +def iterFeeds(): + fileObj = open( Config.FILENAME, "rb" ) + while 1: + try: + feedInfo = pickle.load( fileObj ) + except EOFError: + break + yield feedInfo + fileObj.close() diff -r 000000000000 -r 2110430f4d7f UpdateFeeds.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/UpdateFeeds.py Fri Feb 08 23:53:26 2008 -0600 @@ -0,0 +1,32 @@ +import feedparser +import urllib2 +import Serializer +import Config + +def getHandlers(): + try: + import LocalAuth + handlers = LocalAuth.getAuthHandlers() + except ImportError: + print "No LocalAuth.py found, assuming no auth handlers." + handlers = [] + return handlers + +def main(): + serializer = Serializer.Serializer() + + for feed in Config.FEEDS: + # Yes, we need to rebuild the opener and the handlers every + # time through this loop, or else things will fail on multiple + # Livejournal requests. + opener = urllib2.build_opener( *getHandlers() ) + + print "Fetching feed for %s..." % feed["name"] + data = opener.open( feed["url"] ) + feedInfo = feedparser.parse( data ) + feedInfo["config_name"] = feed["name"] + serializer.store( feedInfo ) + serializer.finalize() + +if __name__ == "__main__": + main() diff -r 000000000000 -r 2110430f4d7f styles.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/styles.css Fri Feb 08 23:53:26 2008 -0600 @@ -0,0 +1,41 @@ +a:link { +color:#990000; +text-decoration:none; +} + +a:visited { +color:#990000; +text-decoration:none; +} + +a:hover { +color:#CC3300; +text-decoration:underline; +} + +a:active { +color:#990000; +text-decoration:none; +} + +body { + font-family: lucida grande,lucida sans unicode,arial,verdana,sans serif; +} + +tr.urgent { + background-color: #FFCCFF; +} + +tr.odd { +} + +tr.even { + background-color: #FBFBFB; +} + +td { + border-top: 1px solid black; + font-size: 10pt; + vertical-align: top; + padding: 4px; +}
%(source)s%(title)s%(summary)s%(source)s%(title)s — %(summary)s