changeset 14:4a2499602804

refactorings to publish_edition.py
author Atul Varma <varmaa@toolness.com>
date Sun, 03 Jan 2010 00:27:38 -0800
parents 69fd13a4aef4
children f0dd39b7cbe1
files publish_edition.py
diffstat 1 files changed, 96 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/publish_edition.py	Sun Jan 03 00:19:49 2010 -0800
+++ b/publish_edition.py	Sun Jan 03 00:27:38 2010 -0800
@@ -14,12 +14,6 @@
 ARTICLES_FILENAME = 'articles.pickle'
 JSON_FILENAME = 'daily-edition.json'
 
-UPDATE_WHOISI = False
-UPDATE_URLS = False
-UPDATE_ARTICLES = False
-
-logging.basicConfig(level=logging.DEBUG)
-
 def load(filename, default):
     if os.path.exists(filename):    
         return pickle.load(open(filename, 'r'))
@@ -30,100 +24,107 @@
     pickle.dump(obj, f)
     f.close()
 
-names = [line.strip()
-         for line in open(AUTHORS_FILENAME, 'r').readlines()
-         if line and not line.startswith('#')]
-
-urls = load(URLS_FILENAME, {})
-ucache = UrlCache(urls)
-
-people = load(WHOISI_FILENAME, [])
-wiserver = WhoisiServer()
-wicache = WhoisiCache(wiserver, people)
-
-if UPDATE_WHOISI:
-    wicache.update()
-    save(people, WHOISI_FILENAME)
-
-#wicache.refresh_people([people.index(person)])
-
-following = [person for person in people
-             if person['name'] in names]
-
-feeds = {}
-
-for person in following:
-    person_feeds = []
-    for site in person['sites'].values():
-        if site['type'] == 'feed':
-            person_feeds.append(site['feed'])
-    feeds[person['name']] = person_feeds
-
-if UPDATE_URLS:
-    for feed_urls in feeds.values():
-        for url in feed_urls:
-            try:
-                ucache.refresh(url)
-            except Exception, e:
-                traceback.print_exc(e)
-    save(urls, URLS_FILENAME)
-
-articles = load(ARTICLES_FILENAME, {})
-
-if UPDATE_ARTICLES:
-    for author, feed_urls in feeds.items():
-        articles[author] = []
-        for url in feed_urls:
-            feed = feedparser.parse(urls[url]['data'])
-            for entry in feed['entries']:
-                updated = entry.get('updated_parsed')
-                updated = date(year=updated.tm_year,
-                               month=updated.tm_mon,
-                               day=updated.tm_mday)
-                content = entry.get('content', '')
-                summary = entry.get('summary', '')
-                summary_detail = entry.get('summary_detail', {})
-                if not content:
-                    if not (summary_detail and
-                            summary_detail.get('value')):
-                        if not summary:
-                            pass
-                        else:
-                            content = [{'type': 'text/plain',
-                                        'value': summary}]
-                    else:
-                        content = [summary_detail]
-                if content:
-                    article = {'url': entry.get('link'),
-                               'title': entry.get('title'),
-                               'pub_date': updated,
-                               'content': content}
-                    articles[author].append(article)
-    save(articles, ARTICLES_FILENAME)
-
 def to_date_tuple(dt):
     return (dt.year, dt.month, dt.day)
 
-max_date = date.today()
-min_date = max_date - timedelta(days=3)
+def publish_edition(update_whoisi=False,
+                    update_urls=False,
+                    update_articles=False):
+    names = [line.strip()
+             for line in open(AUTHORS_FILENAME, 'r').readlines()
+             if line and not line.startswith('#')]
+
+    urls = load(URLS_FILENAME, {})
+    ucache = UrlCache(urls)
+
+    people = load(WHOISI_FILENAME, [])
+    wiserver = WhoisiServer()
+    wicache = WhoisiCache(wiserver, people)
+
+    if update_whoisi:
+        wicache.update()
+        save(people, WHOISI_FILENAME)
+
+    #wicache.refresh_people([people.index(person)])
 
-published_authors = [author for author in names
-                     if author in articles]
+    following = [person for person in people
+                 if person['name'] in names]
+
+    feeds = {}
 
-filtered_articles = {}
+    for person in following:
+        person_feeds = []
+        for site in person['sites'].values():
+            if site['type'] == 'feed':
+                person_feeds.append(site['feed'])
+        feeds[person['name']] = person_feeds
+
+    if update_urls:
+        for feed_urls in feeds.values():
+            for url in feed_urls:
+                try:
+                    ucache.refresh(url)
+                except Exception, e:
+                    traceback.print_exc(e)
+        save(urls, URLS_FILENAME)
+
+    articles = load(ARTICLES_FILENAME, {})
 
-for author in published_authors:
-    filtered_articles[author] = [
-        {'url': article['url'],
-         'title': article['title'],
-         'content': article['content'],
-         'pubDate': to_date_tuple(article['pub_date'])}
-        for article in articles[author]
-        if (article['pub_date'] > min_date and
-            article['pub_date'] < max_date)
-        ]
+    if update_articles:
+        for author, feed_urls in feeds.items():
+            articles[author] = []
+            for url in feed_urls:
+                feed = feedparser.parse(urls[url]['data'])
+                for entry in feed['entries']:
+                    updated = entry.get('updated_parsed')
+                    updated = date(year=updated.tm_year,
+                                   month=updated.tm_mon,
+                                   day=updated.tm_mday)
+                    content = entry.get('content', '')
+                    summary = entry.get('summary', '')
+                    summary_detail = entry.get('summary_detail', {})
+                    if not content:
+                        if not (summary_detail and
+                                summary_detail.get('value')):
+                            if not summary:
+                                pass
+                            else:
+                                content = [{'type': 'text/plain',
+                                            'value': summary}]
+                        else:
+                            content = [summary_detail]
+                    if content:
+                        article = {'url': entry.get('link'),
+                                   'title': entry.get('title'),
+                                   'pub_date': updated,
+                                   'content': content}
+                        articles[author].append(article)
+        save(articles, ARTICLES_FILENAME)
 
-json.dump({'authors': names, 'articles': filtered_articles},
-          open(JSON_FILENAME, 'w'))
+    max_date = date.today()
+    min_date = max_date - timedelta(days=3)
+
+    published_authors = [author for author in names
+                         if author in articles]
+
+    filtered_articles = {}
 
-logging.info('wrote %s.' % JSON_FILENAME)
+    for author in published_authors:
+        filtered_articles[author] = [
+            {'url': article['url'],
+             'title': article['title'],
+             'content': article['content'],
+             'pubDate': to_date_tuple(article['pub_date'])}
+            for article in articles[author]
+            if (article['pub_date'] > min_date and
+                article['pub_date'] < max_date)
+            ]
+
+    json.dump({'authors': names, 'articles': filtered_articles},
+              open(JSON_FILENAME, 'w'))
+
+    logging.info('wrote %s.' % JSON_FILENAME)
+
+if __name__ == '__main__':
+    logging.basicConfig(level=logging.DEBUG)
+    publish_edition()