Mercurial > kiritsu

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Config.py.sample	Sat Feb 09 14:24:37 2008 -0600
@@ -0,0 +1,89 @@
+from os.path import abspath
+from urllib import pathname2url
+
+FILENAME = "feeds.dat"
+
+# Importance is from 1 to 10; 1 is really important, 10 is not very
+# important.
+DEFAULT_IMPORTANCE = 10
+
+MAX_SUMMARY_LENGTH = 140
+
+VIEWS = [
+    dict( name = "Play",
+          tags = ["friends", "entertainment", "news"] ),
+    dict( name = "Work",
+          tags = ["work", "mozilla"] )
+]
+
+IMAP_FEEDS = [
+    dict( name = "Mozilla Email",
+          tags = ["work"],
+          filename = abspath("MozillaEmail.rss"),
+          server = "mail.mozilla.com",
+          port = 993,
+          mailbox = "INBOX",
+          isSsl = True,
+          url = "https://mail.mozilla.com" ),
+    dict( name = "Humanized Email",
+          tags = ["work"],
+          filename = abspath("HumanizedEmail.rss"),
+          server = "imap.gmail.com",
+          port = 993,
+          # In Gmail's IMAP interface, labels are treated as
+          # IMAP mailboxes.
+          mailbox = "humanized",
+          isSsl = True,
+          url = "https://www.gmail.com" )
+]
+
+FEEDS = [
+    dict( name = "Mozilla Intranet Forum",
+          tags = ["mozilla"],
+          url = "https://intranet.mozilla.org/forum/?Feed=Atom" ),
+    dict( name = "Frostedelves",
+          tags = ["friends"],
+          url = "http://frostedelves.livejournal.com/data/rss?auth=digest" ),
+    dict( name = "Planet Mozilla",
+          tags = ["mozilla"],
+          url = "http://planet.mozilla.org/rss20.xml" ),
+    dict( name = "Lonely Lion",
+          tags = ["work", "friends"],
+          url = "http://feeds.feedburner.com/LonelyLion",
+          importance = 2 ),
+    dict( name = "Subtraction (Khoi Vinh)",
+          tags = ["work"],
+          url = "http://feeds.feedburner.com/subtraction" ),
+    dict( name = "Humanized",
+          tags = ["work"],
+          url = "http://feeds.feedburner.com/humanized/weblog" ),
+    dict( name = "Humanized Comments",
+          tags = ["work"],
+          url = "http://www.humanized.com/weblog/comments/feed" ),
+    dict( name = "Evil Brain Jono",
+          tags = ["friends"],
+          url = "http://evilbrainjono.net/~Jono/rss/jono.rss" ),
+    dict( name = "Penny Arcade",
+          tags = ["entertainment"],
+          url = "http://feeds.penny-arcade.com/pa-mainsite" ),
+    dict( name = "Metaplace",
+          tags = ["entertainment", "work"],
+          url = "http://feeds.feedburner.com/metaplace" ),
+    dict( name = "The Economist - The internet",
+          tags = ["work"],
+          url = "http://www.economist.com/rss/the_internet_rss.xml" ),
+    dict( name = "The Escapist - Featured Articles",
+          tags = ["entertainment"],
+          url = "http://www.escapistmagazine.com/rss/articles/issues" ),
+    dict( name = "Productive Firefox",
+          tags = ["work"],
+          url = "http://feeds.feedburner.com/ProductiveFirefox" ),
+]
+
+for feed in IMAP_FEEDS:
+    info = dict( name = feed["name"],
+                 tags = feed["tags"],
+                 url = "file://" + pathname2url( feed["filename"] ),
+                 # Email feeds are really important!
+                 importance = 1 )
+    FEEDS.insert( 0, info )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LocalAuth.py.sample	Sat Feb 09 14:24:37 2008 -0600
@@ -0,0 +1,58 @@
+import urllib2
+
+class MyBasicAuthHandler( urllib2.HTTPBasicAuthHandler ):
+    def http_error_401(self, req, fp, code, msg, headers):
+        result = urllib2.HTTPBasicAuthHandler.http_error_401(
+            self, req, fp, code, msg, headers
+            )
+        return result
+
+    def http_error_auth_reqed(self, authreq, host, req, headers):
+        # MODIFIED VERSION OF THE ORIGINAL METHOD TO FIX A BUG.
+
+        # host may be an authority (without userinfo) or a URL with an
+        # authority
+        # XXX could be multiple headers
+        authreq = headers.get(authreq, None)
+        if authreq:
+            mo = urllib2.AbstractBasicAuthHandler.rx.search(authreq)
+            if not mo:
+                # HACK TO GET THINGS TO WORK IF THE AUTHREQ USES
+                # SINGLE QUOTES INSTEAD OF DOUBLE QUOTES - A.V.
+                authreq = authreq.replace( "'", "\"" )
+                mo = urllib2.AbstractBasicAuthHandler.rx.search(authreq)
+            if mo:
+                scheme, realm = mo.groups()
+                if scheme.lower() == 'basic':
+                    return self.retry_http_basic_auth(host, req, realm)
+
+class MyDigestAuthHandler( urllib2.HTTPDigestAuthHandler ):
+    def get_authorization(self, req, chal):
+        return urllib2.HTTPDigestAuthHandler.get_authorization(
+            self, req, chal
+            )
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        return urllib2.HTTPDigestAuthHandler.http_error_401(
+            self, req, fp, code, msg, headers
+            )
+
+def getAuthHandlers():
+    basicAuth = MyBasicAuthHandler()
+    digestAuth = MyDigestAuthHandler()
+    basicAuth.add_password( "Private",
+                            "intranet.mozilla.org",
+                            "my_mozilla_username",
+                            "my_mozilla_password" )
+    digestAuth.add_password( "lj",
+                             ("frostedelves.livejournal.com",),
+                             "my_lj_username",
+                             "my_lj_password" )
+    return [basicAuth, digestAuth]
+
+IMAP_AUTH = {
+    "mail.mozilla.com" : dict( username = "my_mozilla_username",
+                               password = "my_mozilla_password" ),
+    "imap.gmail.com" : dict( username = "my_gmail_username",
+                             password = "my_gmail_password" )
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.txt	Sat Feb 09 14:24:37 2008 -0600
@@ -0,0 +1,122 @@
+Kiritsu: A Humane Content Aggregator
+====================================
+
+Motivation
+----------
+
+The motivation for this project comes from two separate sources.
+
+The first is the idea that content aggregation should, to whatever
+extent possible, be an automated thing.  Despite the fact that RSS has
+been around for years, for instance, most users don't know what it is,
+and even those who do don't usually go through the additional work of
+subscribing to feeds they want to read.  Ideally, a trusted
+observer--such as a Mozilla browser--would monitor the browsing habits
+of its user and automatically present them with content that it knows
+they want, keeping all statistics completely confidential (stored on
+the client or in a Weave cloud).  If the user visits Penny Arcade and
+xkcd frequently, for instance, the browser can automatically subscribe
+to the RSS feeds for those sites and display information about the
+latest content from them on, say, the user's start page.  If the user
+eventually stops visiting a site, the browser should notice this and
+stop subscribing to its feed.  All of this is to say that a user
+shouldn't have to even know what RSS or "web syndication" is for it to
+help them transparently in a way that doesn't compromise their privacy.
+
+The second motivation for this project is that shortly after joining
+Mozilla, I was a bit overloaded with information--a mental state that
+some affectionately refer to as "drinking from a fire hose".  While
+this was, in some ways, really exciting and invigorating, it
+eventually became a bit distracting.  Not only did I have to continue
+to consume the information I had taken in before I joined, but I was
+also faced with even more sources of it: I now found myself reading
+planet.mozilla.org like it was a nervous twitch, checking Mail.app for
+my mozilla email (in addition to checking Gmail for other email), and
+constantly checking Mozilla's internal message boards as well.  On top
+of that, I had to log on to IRC and instant messenger.
+
+What I really needed was a solution that securely aggregated all of
+this information in one place, sorted it by importance, and
+compartmentalized different kinds of relevant information into views
+that mapped directly to my daily activities (e.g., one view for work,
+another for play, another for personal finances/chores).  Furthermore,
+I wanted to be able to "turn off" the flow of information so that it
+wouldn't distract me and I could get my work done.  Once I was ready
+for more information, I could turn the flow back on, consuming it only
+when I need it, rather than having it constantly pushed at me.
+
+In spirit of this, I decided to call this project "Kiritsu", which is
+the Japanese word for "discipline".
+
+Prerequisites
+-------------
+
+To use Kiritsu, you currently need:
+
+  * Python 2.5
+  * Mark Pilgrim's feedparser module:
+    http://www.feedparser.org/
+
+Use
+---
+
+Presently, Kiritsu is in a rather nascent and inchoate state, and I
+apologize in advance for any frustrations the reader may have in
+getting it to work.  In short, this is what has to be done:
+
+  (1) Copy Config.py.sample to Config.py and edit it as
+      necessary.
+  (2) Copy LocalAuth.py.sample to LocalAuth.py and edit it as
+      necessary.
+  (3) Run "MakeEverything.py".  If all goes well, a static HTML file
+      should have been created for each view--e.g. "work.html" is the
+      page for the "Work" view.
+
+Implementation
+--------------
+
+Kiritsu operates entirely on RSS and Atom feeds; while other sources,
+such as IMAP, XMPP, IRC, and so forth are (or will be) supported by
+the framework, they are internally converted into RSS feeds and then
+processed as such.  My intent was to use pre-existing standards as
+much as possible.
+
+Improvements
+------------
+
+As can plainly be seen, Kiritsu can use a lot of improvements,
+especially given all the features mentioned in the "Motivation"
+section of this document.  In particular:
+
+  * Kiritsu should eventually be able to automatically figure out what
+    feeds the end-user wants to read, to as much an extent as
+    possible.  One potentially easy way to do this may be to create a
+    simple Firefox Extension that integrates with MeeTimer
+    (http://getmeetimer.com/) to determine what the end-user is most
+    frequently reading, and when they're reading it.  Then it should
+    at least be possible to offer different views of information based
+    on what time of day (or day of the week) it is.  There's obviously
+    lots of room for machine learning algorithms here; for example,
+    bayesian filtering could be used to determine what kinds of
+    articles within a feed the user finds interesting, and only
+    present them with similar articles.  The general goal is to
+    prevent information overload, as opposed to presenting the user
+    with more information.
+
+  * Support for more information sources needs to be added, such as
+    XMPP and IRC.
+
+Other Considerations
+--------------------
+
+It should be noted that there are some interface features that are
+traditionally considered to be humane which have intentionally been
+left out of Kiritsu.  For example, Humanized History [1] is humane
+because it doesn't require the user to think about navigation.
+Kiritsu, on the other hand, intentionally displays a very limited
+amount of information to the user that should comfortably fit on one
+page.  The intent is to give the reader a definitive stopping point at
+which they can stop consuming information and go back to doing
+whatever it is they need to do.
+
+[1] http://www.humanized.com/weblog/2006/04/28/reading_humanized/