Mercurial > daily-edition

--- a/publish_edition.py	Sun Jan 03 04:17:09 2010 -0800
+++ b/publish_edition.py	Sun Jan 03 04:31:28 2010 -0800
@@ -28,6 +28,65 @@
 def to_date_tuple(dt):
     return (dt.year, dt.month, dt.day)

+def refresh_urls(feeds, urls):
+    ucache = UrlCache(urls)
+    for feed_urls in feeds.values():
+        for url in feed_urls:
+            try:
+                ucache.refresh(url)
+            except Exception, e:
+                traceback.print_exc(e)
+
+def refresh_articles(articles, feeds, urls):
+    for author, feed_urls in feeds.items():
+        articles[author] = []
+        for url in feed_urls:
+            feed = feedparser.parse(urls[url]['data'])
+            for entry in feed['entries']:
+                updated = entry.get('updated_parsed')
+                updated = date(year=updated.tm_year,
+                               month=updated.tm_mon,
+                               day=updated.tm_mday)
+                content = entry.get('content', '')
+                summary = entry.get('summary', '')
+                summary_detail = entry.get('summary_detail', {})
+                if not content:
+                    if not (summary_detail and
+                            summary_detail.get('value')):
+                        if not summary:
+                            pass
+                        else:
+                            content = [{'type': 'text/plain',
+                                        'value': summary}]
+                    else:
+                        content = [summary_detail]
+                if content:
+                    article = {'url': entry.get('link'),
+                               'title': entry.get('title'),
+                               'pub_date': updated,
+                               'content': content}
+                    articles[author].append(article)
+
+def filter_articles(names, articles):
+    max_date = date.today()
+    min_date = max_date - timedelta(days=3)
+
+    published_authors = [author for author in names
+                         if author in articles]
+
+    filtered_articles = {}
+
+    for author in published_authors:
+        filtered_articles[author] = [
+            {'url': article['url'],
+             'title': article['title'],
+             'content': article['content'],
+             'pubDate': to_date_tuple(article['pub_date'])}
+            for article in articles[author]
+            if (article['pub_date'] > min_date and
+                article['pub_date'] < max_date)
+            ]
+
 def publish_edition(update_whoisi=False,
                     update_urls=False,
                     update_articles=False):
@@ -40,9 +99,6 @@
              for line in open(AUTHORS_FILENAME, 'r').readlines()
              if line and not line.startswith('#')]

-    urls = load(URLS_FILENAME, {})
-    ucache = UrlCache(urls)
-
     people = load(WHOISI_FILENAME, [])
     wiserver = WhoisiServer()
     wicache = WhoisiCache(wiserver, people)
@@ -68,66 +124,22 @@
                 person_feeds.append(site['feed'])
         feeds[person['name']] = person_feeds

+    urls = load(URLS_FILENAME, {})
+
     if update_urls:
-        for feed_urls in feeds.values():
-            for url in feed_urls:
-                try:
-                    ucache.refresh(url)
-                except Exception, e:
-                    traceback.print_exc(e)
+        refresh_urls(feeds=feeds, urls=urls)
         save(urls, URLS_FILENAME)

     articles = load(ARTICLES_FILENAME, {})

     if update_articles:
-        for author, feed_urls in feeds.items():
-            articles[author] = []
-            for url in feed_urls:
-                feed = feedparser.parse(urls[url]['data'])
-                for entry in feed['entries']:
-                    updated = entry.get('updated_parsed')
-                    updated = date(year=updated.tm_year,
-                                   month=updated.tm_mon,
-                                   day=updated.tm_mday)
-                    content = entry.get('content', '')
-                    summary = entry.get('summary', '')
-                    summary_detail = entry.get('summary_detail', {})
-                    if not content:
-                        if not (summary_detail and
-                                summary_detail.get('value')):
-                            if not summary:
-                                pass
-                            else:
-                                content = [{'type': 'text/plain',
-                                            'value': summary}]
-                        else:
-                            content = [summary_detail]
-                    if content:
-                        article = {'url': entry.get('link'),
-                                   'title': entry.get('title'),
-                                   'pub_date': updated,
-                                   'content': content}
-                        articles[author].append(article)
+        refresh_articles(articles=articles,
+                         feeds=feeds,
+                         urls=urls)
         save(articles, ARTICLES_FILENAME)

-    max_date = date.today()
-    min_date = max_date - timedelta(days=3)
-
-    published_authors = [author for author in names
-                         if author in articles]
-
-    filtered_articles = {}
-
-    for author in published_authors:
-        filtered_articles[author] = [
-            {'url': article['url'],
-             'title': article['title'],
-             'content': article['content'],
-             'pubDate': to_date_tuple(article['pub_date'])}
-            for article in articles[author]
-            if (article['pub_date'] > min_date and
-                article['pub_date'] < max_date)
-            ]
+    filtered_articles = filter_articles(names=names,
+                                        articles=articles)

     json.dump({'authors': names, 'articles': filtered_articles},
               open(JSON_FILENAME, 'w'))