changeset 0:2110430f4d7f

Origination. Two critical files for any of this to work, Config.py and LocalAuth.py, aren't included here because they contain sensitive information, so this is effectively broken at the moment. I'll have to do a bit of refactoring and make a second commit.
author Atul Varma <varmaa@toolness.com>
date Fri, 08 Feb 2008 23:53:26 -0600
parents
children d5bc8acafca3
files GenerateHtml.py ImapFeed.py MakeEverything.py Serializer.py UpdateFeeds.py styles.css
diffstat 6 files changed, 311 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GenerateHtml.py	Fri Feb 08 23:53:26 2008 -0600
@@ -0,0 +1,126 @@
+import HTMLParser
+import time
+
+import Serializer
+import Config
+
+class Detagger( HTMLParser.HTMLParser ):
+    """
+    >>> d = Detagger( 10 )
+    >>> d.process( '<p>12345 <i>6</i> 78</p>' )
+    '12345 6 78'
+    >>> d.process( '<p>12345 <i>6</i> 789</p>' )
+    '12345 6 ...'
+    >>> d.process( '<p>12345 <i>6</i> 7 89</p>' )
+    '12345 6 7 ...'
+    >>> d.process( '<p>123 5 oiawejgoaiewgj</p><p>a</p>' )
+    '123 5 ...'
+    """
+
+    def __init__( self, maxLength ):
+        HTMLParser.HTMLParser.__init__( self )
+        self.maxLength = maxLength
+
+    def handle_data( self, data ):
+        if ( not self._ellipsified ) and self._dataChars < self.maxLength:
+            maxChunkLength = self.maxLength - self._dataChars
+            if len( data ) > maxChunkLength:
+                lastGoodWordBreak = 0
+                for i in range( maxChunkLength ):
+                    if data[i].isspace():
+                        lastGoodWordBreak = i
+                data = data[:lastGoodWordBreak] + " ..."
+                self._ellipsified = True
+            self._data.append( data )
+            self._dataChars += len( data )
+
+    def reset( self ):
+        HTMLParser.HTMLParser.reset( self )
+        self._data = []
+        self._dataChars = 0
+        self._ellipsified = False
+
+    def process( self, data ):
+        self.reset()
+        self.feed( data )
+        return "".join( self._data )
+
+def makeRows( feedInfo, detagger ):
+    rows = []
+
+    for entry in feedInfo["entries"]:
+        title = entry.get( "title", "Untitled" )
+        summary = entry.get( "summary" )
+        if not summary:
+            if entry.get( "content" ):
+                summary = entry["content"][0].value
+            else:
+                summary = ""
+        summary = detagger.process( summary )
+        timestamp = time.mktime( entry["updated_parsed"] )
+        url = entry["link"]
+        importance = feedInfo["config"].get( "importance",
+                                             Config.DEFAULT_IMPORTANCE )
+        displayDict = dict( source = feedInfo["config_name"],
+                            url = url,
+                            title = title,
+                            summary = summary,
+                            timestamp = timestamp,
+                            isUrgent = (importance == 1) )
+        rows.append( displayDict )
+    return rows
+
+def writePage( viewInfo, rows, outFile ):
+    outFile.write( "<html><head><link href=\"styles.css\" rel=\"stylesheet\" type=\"text/css\"></head><h1>%s</h1><body>" % viewInfo["name"] )
+    outFile.write( "<table cellspacing=\"0\" cellpadding=\"0\">" )
+    rowClass = "odd"
+    for row in rows:
+        if row["isUrgent"]:
+            finalRowClass = "urgent"
+        else:
+            finalRowClass = rowClass
+        outFile.write( "<tr class=\"%s\">" % finalRowClass )
+        #line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a></td><td>%(summary)s</td>" % row
+        line = "<td>%(source)s</td><td><a href=\"%(url)s\">%(title)s</a> &mdash; %(summary)s</td>" % row
+        outFile.write( line.encode( "ascii", "xmlcharrefreplace" ) )
+        outFile.write( "</tr>" )            
+        if rowClass == "odd":
+            rowClass = "even"
+        else:
+            rowClass = "odd"
+    outFile.write( "</table" )
+    outFile.write( "</body>" )
+
+def getFeedConfig( name ):
+    return [ feed for feed in Config.FEEDS
+             if feed["name"] == name ][0]
+
+def makeViewHtmlPage( viewInfo, detagger ):
+    rows = []
+    for feedInfo in Serializer.iterFeeds():
+        feedConfig = getFeedConfig( feedInfo["config_name"] )
+        feedInfo["config"] = feedConfig
+        matchingTags = set( viewInfo["tags"] ).intersection(
+            feedConfig["tags"]
+            )
+        if matchingTags:
+            rows.extend( makeRows(feedInfo, detagger) )
+
+    rows.sort( key = lambda row: row["timestamp"],
+               reverse = True )
+
+    fileObj = open( "%s.html" % viewInfo["name"].lower(), "w" )
+    writePage( viewInfo, rows, fileObj )
+    fileObj.close()
+
+def main():
+    detagger = Detagger( Config.MAX_SUMMARY_LENGTH )
+
+    for viewConfig in Config.VIEWS:
+        print "Generating view for %(name)s" % viewConfig
+        makeViewHtmlPage( viewConfig, detagger )
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ImapFeed.py	Fri Feb 08 23:53:26 2008 -0600
@@ -0,0 +1,80 @@
+import imaplib
+import time
+from email.feedparser import FeedParser
+from email.utils import parsedate
+import elementtree.ElementTree as ET
+
+from LocalAuth import IMAP_AUTH
+from Config import IMAP_FEEDS
+
+def getImapUnreadMailInfo( server, port, username, password,
+                           mailbox = "INBOX", isSsl = True ):
+    if isSsl:
+        imap = imaplib.IMAP4_SSL( server, port )
+    else:
+        imap = imaplib.IMAP4( server, port )
+
+    imap.login( username, password )
+    imap.select( mailbox )
+    typ, msgnums = imap.search( None, "UNSEEN" )
+    if typ != "OK":
+        raise IOError( "Error searching IMAP folder" )
+    msgnums = [ int( num ) for num in msgnums[0].split() ]
+    for num in msgnums:
+        typ, result = imap.fetch( str(num), "(BODY.PEEK[HEADER])" )
+        if typ != "OK":
+            raise IOError( "Error fetching IMAP messages" )
+        headers = result[0][1]
+        parser = FeedParser()
+        parser.feed( headers )
+        message = parser.close()
+        # TODO: Make sure that the timezone info is converted properly.
+        timestamp = time.mktime( parsedate(message["Date"]) )
+        yield dict( timestamp = timestamp,
+                    sender = message["From"],
+                    subject = message["Subject"] )
+    imap.close()
+    imap.logout()
+
+def _addTextNode( element, name, text ):
+    subElement = ET.SubElement( element, name )
+    subElement.text = text
+
+def makeFeedFromMailInfo( infoIterator, url ):    
+    root = ET.Element( "rss" )
+    root.set( "version", "2.0" )
+    channel = ET.SubElement( root, "channel" )
+    for info in infoIterator:
+        item = ET.SubElement( channel, "item" )
+        _addTextNode( item, "title", info["subject"] )
+        _addTextNode( item, "pubDate",
+                      time.ctime( info["timestamp"] ) )
+        _addTextNode( item, "link", url )
+        _addTextNode( item, "description",
+                      "From %s" % info["sender"] )
+    tree = ET.ElementTree( root )
+
+    import StringIO
+    strFile = StringIO.StringIO()
+    tree.write( strFile )
+    return strFile.getvalue()
+
+def generateFeed( feed, auth ):
+    imapParams = dict( server = feed["server"],
+                       port = feed["port"],
+                       mailbox = feed["mailbox"],
+                       isSsl = feed["isSsl"] )
+    imapParams.update( auth[imapParams["server"]] )
+    open( feed["filename"], "w" ).write(
+        makeFeedFromMailInfo( getImapUnreadMailInfo(**imapParams),
+                              feed["url"] )
+        )
+
+def main():
+    for feed in IMAP_FEEDS:
+        print "Generating feed for %s (%s)" % ( feed["name"],
+                                                feed["filename"] )
+        generateFeed( feed, IMAP_AUTH )
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MakeEverything.py	Fri Feb 08 23:53:26 2008 -0600
@@ -0,0 +1,8 @@
+import ImapFeed
+import UpdateFeeds
+import GenerateHtml
+
+if __name__ == "__main__":
+    ImapFeed.main()
+    UpdateFeeds.main()
+    GenerateHtml.main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Serializer.py	Fri Feb 08 23:53:26 2008 -0600
@@ -0,0 +1,24 @@
+import cPickle as pickle
+import Config
+
+class Serializer( object ):
+    def __init__( self ):
+        self._fileObj = open( Config.FILENAME, "wb" )
+
+    def store( self, feedInfo ):
+        pickle.dump( feedInfo,
+                     self._fileObj,
+                     pickle.HIGHEST_PROTOCOL )
+
+    def finalize( self ):
+        self._fileObj.close()
+
+def iterFeeds():
+    fileObj = open( Config.FILENAME, "rb" )
+    while 1:
+        try:
+            feedInfo = pickle.load( fileObj )
+        except EOFError:
+            break
+        yield feedInfo
+    fileObj.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/UpdateFeeds.py	Fri Feb 08 23:53:26 2008 -0600
@@ -0,0 +1,32 @@
+import feedparser
+import urllib2
+import Serializer
+import Config
+
+def getHandlers():
+    try:
+        import LocalAuth
+        handlers = LocalAuth.getAuthHandlers()
+    except ImportError:
+        print "No LocalAuth.py found, assuming no auth handlers."
+        handlers = []
+    return handlers
+
+def main():
+    serializer = Serializer.Serializer()
+
+    for feed in Config.FEEDS:
+        # Yes, we need to rebuild the opener and the handlers every
+        # time through this loop, or else things will fail on multiple
+        # Livejournal requests.
+        opener = urllib2.build_opener( *getHandlers() )
+
+        print "Fetching feed for %s..." % feed["name"]
+        data = opener.open( feed["url"] )
+        feedInfo = feedparser.parse( data )
+        feedInfo["config_name"] = feed["name"]
+        serializer.store( feedInfo )
+    serializer.finalize()
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/styles.css	Fri Feb 08 23:53:26 2008 -0600
@@ -0,0 +1,41 @@
+a:link {
+color:#990000;
+text-decoration:none;
+}
+
+a:visited {
+color:#990000;
+text-decoration:none;
+}
+
+a:hover {
+color:#CC3300;
+text-decoration:underline;
+}
+
+a:active {
+color:#990000;
+text-decoration:none;
+}
+
+body {
+    font-family: lucida grande,lucida sans unicode,arial,verdana,sans serif;
+}
+
+tr.urgent {
+    background-color: #FFCCFF;
+}
+
+tr.odd {
+}
+
+tr.even {
+    background-color: #FBFBFB;
+}
+
+td {
+    border-top: 1px solid black;
+    font-size: 10pt;
+    vertical-align: top;
+    padding: 4px;
+}