# HG changeset patch # User Atul Varma # Date 1262521888 28800 # Node ID 7fe2efecc9c3da984867da696d1954c4d6bcf798 # Parent 75e573ddc70e203746aff9891b50ec7d4f59b34b more refactoring diff -r 75e573ddc70e -r 7fe2efecc9c3 publish_edition.py --- a/publish_edition.py Sun Jan 03 04:17:09 2010 -0800 +++ b/publish_edition.py Sun Jan 03 04:31:28 2010 -0800 @@ -28,6 +28,65 @@ def to_date_tuple(dt): return (dt.year, dt.month, dt.day) +def refresh_urls(feeds, urls): + ucache = UrlCache(urls) + for feed_urls in feeds.values(): + for url in feed_urls: + try: + ucache.refresh(url) + except Exception, e: + traceback.print_exc(e) + +def refresh_articles(articles, feeds, urls): + for author, feed_urls in feeds.items(): + articles[author] = [] + for url in feed_urls: + feed = feedparser.parse(urls[url]['data']) + for entry in feed['entries']: + updated = entry.get('updated_parsed') + updated = date(year=updated.tm_year, + month=updated.tm_mon, + day=updated.tm_mday) + content = entry.get('content', '') + summary = entry.get('summary', '') + summary_detail = entry.get('summary_detail', {}) + if not content: + if not (summary_detail and + summary_detail.get('value')): + if not summary: + pass + else: + content = [{'type': 'text/plain', + 'value': summary}] + else: + content = [summary_detail] + if content: + article = {'url': entry.get('link'), + 'title': entry.get('title'), + 'pub_date': updated, + 'content': content} + articles[author].append(article) + +def filter_articles(names, articles): + max_date = date.today() + min_date = max_date - timedelta(days=3) + + published_authors = [author for author in names + if author in articles] + + filtered_articles = {} + + for author in published_authors: + filtered_articles[author] = [ + {'url': article['url'], + 'title': article['title'], + 'content': article['content'], + 'pubDate': to_date_tuple(article['pub_date'])} + for article in articles[author] + if (article['pub_date'] > min_date and + article['pub_date'] < max_date) + ] + def publish_edition(update_whoisi=False, update_urls=False, update_articles=False): @@ -40,9 +99,6 @@ for line in open(AUTHORS_FILENAME, 'r').readlines() if line and not line.startswith('#')] - urls = load(URLS_FILENAME, {}) - ucache = UrlCache(urls) - people = load(WHOISI_FILENAME, []) wiserver = WhoisiServer() wicache = WhoisiCache(wiserver, people) @@ -68,66 +124,22 @@ person_feeds.append(site['feed']) feeds[person['name']] = person_feeds + urls = load(URLS_FILENAME, {}) + if update_urls: - for feed_urls in feeds.values(): - for url in feed_urls: - try: - ucache.refresh(url) - except Exception, e: - traceback.print_exc(e) + refresh_urls(feeds=feeds, urls=urls) save(urls, URLS_FILENAME) articles = load(ARTICLES_FILENAME, {}) if update_articles: - for author, feed_urls in feeds.items(): - articles[author] = [] - for url in feed_urls: - feed = feedparser.parse(urls[url]['data']) - for entry in feed['entries']: - updated = entry.get('updated_parsed') - updated = date(year=updated.tm_year, - month=updated.tm_mon, - day=updated.tm_mday) - content = entry.get('content', '') - summary = entry.get('summary', '') - summary_detail = entry.get('summary_detail', {}) - if not content: - if not (summary_detail and - summary_detail.get('value')): - if not summary: - pass - else: - content = [{'type': 'text/plain', - 'value': summary}] - else: - content = [summary_detail] - if content: - article = {'url': entry.get('link'), - 'title': entry.get('title'), - 'pub_date': updated, - 'content': content} - articles[author].append(article) + refresh_articles(articles=articles, + feeds=feeds, + urls=urls) save(articles, ARTICLES_FILENAME) - max_date = date.today() - min_date = max_date - timedelta(days=3) - - published_authors = [author for author in names - if author in articles] - - filtered_articles = {} - - for author in published_authors: - filtered_articles[author] = [ - {'url': article['url'], - 'title': article['title'], - 'content': article['content'], - 'pubDate': to_date_tuple(article['pub_date'])} - for article in articles[author] - if (article['pub_date'] > min_date and - article['pub_date'] < max_date) - ] + filtered_articles = filter_articles(names=names, + articles=articles) json.dump({'authors': names, 'articles': filtered_articles}, open(JSON_FILENAME, 'w'))