Mercurial > kiritsu
changeset 1:d5bc8acafca3
Added a readme and sample configuration files.
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Sat, 09 Feb 2008 14:24:37 -0600 |
parents | 2110430f4d7f |
children | 8311e7c2eb4b |
files | Config.py.sample LocalAuth.py.sample readme.txt |
diffstat | 3 files changed, 269 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Config.py.sample Sat Feb 09 14:24:37 2008 -0600 @@ -0,0 +1,89 @@ +from os.path import abspath +from urllib import pathname2url + +FILENAME = "feeds.dat" + +# Importance is from 1 to 10; 1 is really important, 10 is not very +# important. +DEFAULT_IMPORTANCE = 10 + +MAX_SUMMARY_LENGTH = 140 + +VIEWS = [ + dict( name = "Play", + tags = ["friends", "entertainment", "news"] ), + dict( name = "Work", + tags = ["work", "mozilla"] ) +] + +IMAP_FEEDS = [ + dict( name = "Mozilla Email", + tags = ["work"], + filename = abspath("MozillaEmail.rss"), + server = "mail.mozilla.com", + port = 993, + mailbox = "INBOX", + isSsl = True, + url = "https://mail.mozilla.com" ), + dict( name = "Humanized Email", + tags = ["work"], + filename = abspath("HumanizedEmail.rss"), + server = "imap.gmail.com", + port = 993, + # In Gmail's IMAP interface, labels are treated as + # IMAP mailboxes. + mailbox = "humanized", + isSsl = True, + url = "https://www.gmail.com" ) +] + +FEEDS = [ + dict( name = "Mozilla Intranet Forum", + tags = ["mozilla"], + url = "https://intranet.mozilla.org/forum/?Feed=Atom" ), + dict( name = "Frostedelves", + tags = ["friends"], + url = "http://frostedelves.livejournal.com/data/rss?auth=digest" ), + dict( name = "Planet Mozilla", + tags = ["mozilla"], + url = "http://planet.mozilla.org/rss20.xml" ), + dict( name = "Lonely Lion", + tags = ["work", "friends"], + url = "http://feeds.feedburner.com/LonelyLion", + importance = 2 ), + dict( name = "Subtraction (Khoi Vinh)", + tags = ["work"], + url = "http://feeds.feedburner.com/subtraction" ), + dict( name = "Humanized", + tags = ["work"], + url = "http://feeds.feedburner.com/humanized/weblog" ), + dict( name = "Humanized Comments", + tags = ["work"], + url = "http://www.humanized.com/weblog/comments/feed" ), + dict( name = "Evil Brain Jono", + tags = ["friends"], + url = "http://evilbrainjono.net/~Jono/rss/jono.rss" ), + dict( name = "Penny Arcade", + tags = ["entertainment"], + url = "http://feeds.penny-arcade.com/pa-mainsite" ), + dict( name = "Metaplace", + tags = ["entertainment", "work"], + url = "http://feeds.feedburner.com/metaplace" ), + dict( name = "The Economist - The internet", + tags = ["work"], + url = "http://www.economist.com/rss/the_internet_rss.xml" ), + dict( name = "The Escapist - Featured Articles", + tags = ["entertainment"], + url = "http://www.escapistmagazine.com/rss/articles/issues" ), + dict( name = "Productive Firefox", + tags = ["work"], + url = "http://feeds.feedburner.com/ProductiveFirefox" ), +] + +for feed in IMAP_FEEDS: + info = dict( name = feed["name"], + tags = feed["tags"], + url = "file://" + pathname2url( feed["filename"] ), + # Email feeds are really important! + importance = 1 ) + FEEDS.insert( 0, info )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LocalAuth.py.sample Sat Feb 09 14:24:37 2008 -0600 @@ -0,0 +1,58 @@ +import urllib2 + +class MyBasicAuthHandler( urllib2.HTTPBasicAuthHandler ): + def http_error_401(self, req, fp, code, msg, headers): + result = urllib2.HTTPBasicAuthHandler.http_error_401( + self, req, fp, code, msg, headers + ) + return result + + def http_error_auth_reqed(self, authreq, host, req, headers): + # MODIFIED VERSION OF THE ORIGINAL METHOD TO FIX A BUG. + + # host may be an authority (without userinfo) or a URL with an + # authority + # XXX could be multiple headers + authreq = headers.get(authreq, None) + if authreq: + mo = urllib2.AbstractBasicAuthHandler.rx.search(authreq) + if not mo: + # HACK TO GET THINGS TO WORK IF THE AUTHREQ USES + # SINGLE QUOTES INSTEAD OF DOUBLE QUOTES - A.V. + authreq = authreq.replace( "'", "\"" ) + mo = urllib2.AbstractBasicAuthHandler.rx.search(authreq) + if mo: + scheme, realm = mo.groups() + if scheme.lower() == 'basic': + return self.retry_http_basic_auth(host, req, realm) + +class MyDigestAuthHandler( urllib2.HTTPDigestAuthHandler ): + def get_authorization(self, req, chal): + return urllib2.HTTPDigestAuthHandler.get_authorization( + self, req, chal + ) + + def http_error_401(self, req, fp, code, msg, headers): + return urllib2.HTTPDigestAuthHandler.http_error_401( + self, req, fp, code, msg, headers + ) + +def getAuthHandlers(): + basicAuth = MyBasicAuthHandler() + digestAuth = MyDigestAuthHandler() + basicAuth.add_password( "Private", + "intranet.mozilla.org", + "my_mozilla_username", + "my_mozilla_password" ) + digestAuth.add_password( "lj", + ("frostedelves.livejournal.com",), + "my_lj_username", + "my_lj_password" ) + return [basicAuth, digestAuth] + +IMAP_AUTH = { + "mail.mozilla.com" : dict( username = "my_mozilla_username", + password = "my_mozilla_password" ), + "imap.gmail.com" : dict( username = "my_gmail_username", + password = "my_gmail_password" ) +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.txt Sat Feb 09 14:24:37 2008 -0600 @@ -0,0 +1,122 @@ +Kiritsu: A Humane Content Aggregator +==================================== + +Motivation +---------- + +The motivation for this project comes from two separate sources. + +The first is the idea that content aggregation should, to whatever +extent possible, be an automated thing. Despite the fact that RSS has +been around for years, for instance, most users don't know what it is, +and even those who do don't usually go through the additional work of +subscribing to feeds they want to read. Ideally, a trusted +observer--such as a Mozilla browser--would monitor the browsing habits +of its user and automatically present them with content that it knows +they want, keeping all statistics completely confidential (stored on +the client or in a Weave cloud). If the user visits Penny Arcade and +xkcd frequently, for instance, the browser can automatically subscribe +to the RSS feeds for those sites and display information about the +latest content from them on, say, the user's start page. If the user +eventually stops visiting a site, the browser should notice this and +stop subscribing to its feed. All of this is to say that a user +shouldn't have to even know what RSS or "web syndication" is for it to +help them transparently in a way that doesn't compromise their privacy. + +The second motivation for this project is that shortly after joining +Mozilla, I was a bit overloaded with information--a mental state that +some affectionately refer to as "drinking from a fire hose". While +this was, in some ways, really exciting and invigorating, it +eventually became a bit distracting. Not only did I have to continue +to consume the information I had taken in before I joined, but I was +also faced with even more sources of it: I now found myself reading +planet.mozilla.org like it was a nervous twitch, checking Mail.app for +my mozilla email (in addition to checking Gmail for other email), and +constantly checking Mozilla's internal message boards as well. On top +of that, I had to log on to IRC and instant messenger. + +What I really needed was a solution that securely aggregated all of +this information in one place, sorted it by importance, and +compartmentalized different kinds of relevant information into views +that mapped directly to my daily activities (e.g., one view for work, +another for play, another for personal finances/chores). Furthermore, +I wanted to be able to "turn off" the flow of information so that it +wouldn't distract me and I could get my work done. Once I was ready +for more information, I could turn the flow back on, consuming it only +when I need it, rather than having it constantly pushed at me. + +In spirit of this, I decided to call this project "Kiritsu", which is +the Japanese word for "discipline". + +Prerequisites +------------- + +To use Kiritsu, you currently need: + + * Python 2.5 + * Mark Pilgrim's feedparser module: + http://www.feedparser.org/ + +Use +--- + +Presently, Kiritsu is in a rather nascent and inchoate state, and I +apologize in advance for any frustrations the reader may have in +getting it to work. In short, this is what has to be done: + + (1) Copy Config.py.sample to Config.py and edit it as + necessary. + (2) Copy LocalAuth.py.sample to LocalAuth.py and edit it as + necessary. + (3) Run "MakeEverything.py". If all goes well, a static HTML file + should have been created for each view--e.g. "work.html" is the + page for the "Work" view. + +Implementation +-------------- + +Kiritsu operates entirely on RSS and Atom feeds; while other sources, +such as IMAP, XMPP, IRC, and so forth are (or will be) supported by +the framework, they are internally converted into RSS feeds and then +processed as such. My intent was to use pre-existing standards as +much as possible. + +Improvements +------------ + +As can plainly be seen, Kiritsu can use a lot of improvements, +especially given all the features mentioned in the "Motivation" +section of this document. In particular: + + * Kiritsu should eventually be able to automatically figure out what + feeds the end-user wants to read, to as much an extent as + possible. One potentially easy way to do this may be to create a + simple Firefox Extension that integrates with MeeTimer + (http://getmeetimer.com/) to determine what the end-user is most + frequently reading, and when they're reading it. Then it should + at least be possible to offer different views of information based + on what time of day (or day of the week) it is. There's obviously + lots of room for machine learning algorithms here; for example, + bayesian filtering could be used to determine what kinds of + articles within a feed the user finds interesting, and only + present them with similar articles. The general goal is to + prevent information overload, as opposed to presenting the user + with more information. + + * Support for more information sources needs to be added, such as + XMPP and IRC. + +Other Considerations +-------------------- + +It should be noted that there are some interface features that are +traditionally considered to be humane which have intentionally been +left out of Kiritsu. For example, Humanized History [1] is humane +because it doesn't require the user to think about navigation. +Kiritsu, on the other hand, intentionally displays a very limited +amount of information to the user that should comfortably fit on one +page. The intent is to give the reader a definitive stopping point at +which they can stop consuming information and go back to doing +whatever it is they need to do. + +[1] http://www.humanized.com/weblog/2006/04/28/reading_humanized/