Mercurial > daily-edition
changeset 29:6fbd38dd976a
added more logging stats.
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Sun, 03 Jan 2010 10:21:53 -0800 |
parents | 6e2038000082 |
children | 74f23dc049e8 |
files | publish_edition.py |
diffstat | 1 files changed, 12 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/publish_edition.py Sun Jan 03 09:20:29 2010 -0800 +++ b/publish_edition.py Sun Jan 03 10:21:53 2010 -0800 @@ -87,7 +87,7 @@ def filter_articles(names, articles, issues, max_articles_per_author=1, max_word_count=2500, - max_article_age=timedelta(days=30)): + max_article_age=timedelta(days=15)): min_date = date.today() - max_article_age published_authors = [author for author in names @@ -96,6 +96,10 @@ filtered_articles = {} words_left = max_word_count + total_potentials = 0 + total_articles = 0 + total_word_count = 0 + for author in published_authors: articles_left = max_articles_per_author potential_articles = [ @@ -108,6 +112,8 @@ and article['url'] not in issues['urls']) ] + total_potentials += len(potential_articles) + for article in potential_articles: html = [ctype['value'] for ctype in article['content'] @@ -122,6 +128,8 @@ if word_count < words_left: if author not in filtered_articles: filtered_articles[author] = [] + total_word_count += word_count + total_articles += 1 filtered_articles[author].append(article) elif word_count > max_word_count: logging.warn( @@ -135,6 +143,9 @@ if not articles_left: break + logging.debug('found %d articles (out of a potential %d), totalling ' + '%d words.' % + (total_articles, total_potentials, total_word_count)) return normalize(filtered_articles) def publish_edition(update_whoisi=False,