Mercurial > daily-edition
changeset 14:4a2499602804
refactorings to publish_edition.py
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Sun, 03 Jan 2010 00:27:38 -0800 |
parents | 69fd13a4aef4 |
children | f0dd39b7cbe1 |
files | publish_edition.py |
diffstat | 1 files changed, 96 insertions(+), 95 deletions(-) [+] |
line wrap: on
line diff
--- a/publish_edition.py Sun Jan 03 00:19:49 2010 -0800 +++ b/publish_edition.py Sun Jan 03 00:27:38 2010 -0800 @@ -14,12 +14,6 @@ ARTICLES_FILENAME = 'articles.pickle' JSON_FILENAME = 'daily-edition.json' -UPDATE_WHOISI = False -UPDATE_URLS = False -UPDATE_ARTICLES = False - -logging.basicConfig(level=logging.DEBUG) - def load(filename, default): if os.path.exists(filename): return pickle.load(open(filename, 'r')) @@ -30,100 +24,107 @@ pickle.dump(obj, f) f.close() -names = [line.strip() - for line in open(AUTHORS_FILENAME, 'r').readlines() - if line and not line.startswith('#')] - -urls = load(URLS_FILENAME, {}) -ucache = UrlCache(urls) - -people = load(WHOISI_FILENAME, []) -wiserver = WhoisiServer() -wicache = WhoisiCache(wiserver, people) - -if UPDATE_WHOISI: - wicache.update() - save(people, WHOISI_FILENAME) - -#wicache.refresh_people([people.index(person)]) - -following = [person for person in people - if person['name'] in names] - -feeds = {} - -for person in following: - person_feeds = [] - for site in person['sites'].values(): - if site['type'] == 'feed': - person_feeds.append(site['feed']) - feeds[person['name']] = person_feeds - -if UPDATE_URLS: - for feed_urls in feeds.values(): - for url in feed_urls: - try: - ucache.refresh(url) - except Exception, e: - traceback.print_exc(e) - save(urls, URLS_FILENAME) - -articles = load(ARTICLES_FILENAME, {}) - -if UPDATE_ARTICLES: - for author, feed_urls in feeds.items(): - articles[author] = [] - for url in feed_urls: - feed = feedparser.parse(urls[url]['data']) - for entry in feed['entries']: - updated = entry.get('updated_parsed') - updated = date(year=updated.tm_year, - month=updated.tm_mon, - day=updated.tm_mday) - content = entry.get('content', '') - summary = entry.get('summary', '') - summary_detail = entry.get('summary_detail', {}) - if not content: - if not (summary_detail and - summary_detail.get('value')): - if not summary: - pass - else: - content = [{'type': 'text/plain', - 'value': summary}] - else: - content = [summary_detail] - if content: - article = {'url': entry.get('link'), - 'title': entry.get('title'), - 'pub_date': updated, - 'content': content} - articles[author].append(article) - save(articles, ARTICLES_FILENAME) - def to_date_tuple(dt): return (dt.year, dt.month, dt.day) -max_date = date.today() -min_date = max_date - timedelta(days=3) +def publish_edition(update_whoisi=False, + update_urls=False, + update_articles=False): + names = [line.strip() + for line in open(AUTHORS_FILENAME, 'r').readlines() + if line and not line.startswith('#')] + + urls = load(URLS_FILENAME, {}) + ucache = UrlCache(urls) + + people = load(WHOISI_FILENAME, []) + wiserver = WhoisiServer() + wicache = WhoisiCache(wiserver, people) + + if update_whoisi: + wicache.update() + save(people, WHOISI_FILENAME) + + #wicache.refresh_people([people.index(person)]) -published_authors = [author for author in names - if author in articles] + following = [person for person in people + if person['name'] in names] + + feeds = {} -filtered_articles = {} + for person in following: + person_feeds = [] + for site in person['sites'].values(): + if site['type'] == 'feed': + person_feeds.append(site['feed']) + feeds[person['name']] = person_feeds + + if update_urls: + for feed_urls in feeds.values(): + for url in feed_urls: + try: + ucache.refresh(url) + except Exception, e: + traceback.print_exc(e) + save(urls, URLS_FILENAME) + + articles = load(ARTICLES_FILENAME, {}) -for author in published_authors: - filtered_articles[author] = [ - {'url': article['url'], - 'title': article['title'], - 'content': article['content'], - 'pubDate': to_date_tuple(article['pub_date'])} - for article in articles[author] - if (article['pub_date'] > min_date and - article['pub_date'] < max_date) - ] + if update_articles: + for author, feed_urls in feeds.items(): + articles[author] = [] + for url in feed_urls: + feed = feedparser.parse(urls[url]['data']) + for entry in feed['entries']: + updated = entry.get('updated_parsed') + updated = date(year=updated.tm_year, + month=updated.tm_mon, + day=updated.tm_mday) + content = entry.get('content', '') + summary = entry.get('summary', '') + summary_detail = entry.get('summary_detail', {}) + if not content: + if not (summary_detail and + summary_detail.get('value')): + if not summary: + pass + else: + content = [{'type': 'text/plain', + 'value': summary}] + else: + content = [summary_detail] + if content: + article = {'url': entry.get('link'), + 'title': entry.get('title'), + 'pub_date': updated, + 'content': content} + articles[author].append(article) + save(articles, ARTICLES_FILENAME) -json.dump({'authors': names, 'articles': filtered_articles}, - open(JSON_FILENAME, 'w')) + max_date = date.today() + min_date = max_date - timedelta(days=3) + + published_authors = [author for author in names + if author in articles] + + filtered_articles = {} -logging.info('wrote %s.' % JSON_FILENAME) + for author in published_authors: + filtered_articles[author] = [ + {'url': article['url'], + 'title': article['title'], + 'content': article['content'], + 'pubDate': to_date_tuple(article['pub_date'])} + for article in articles[author] + if (article['pub_date'] > min_date and + article['pub_date'] < max_date) + ] + + json.dump({'authors': names, 'articles': filtered_articles}, + open(JSON_FILENAME, 'w')) + + logging.info('wrote %s.' % JSON_FILENAME) + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + publish_edition()