# HG changeset patch # User Atul Varma # Date 1262542913 28800 # Node ID 6fbd38dd976a3e6f4fbecbbd08e0e55f6c6c0a05 # Parent 6e2038000082e2d48af272c625bf08212d7d6314 added more logging stats. diff -r 6e2038000082 -r 6fbd38dd976a publish_edition.py --- a/publish_edition.py Sun Jan 03 09:20:29 2010 -0800 +++ b/publish_edition.py Sun Jan 03 10:21:53 2010 -0800 @@ -87,7 +87,7 @@ def filter_articles(names, articles, issues, max_articles_per_author=1, max_word_count=2500, - max_article_age=timedelta(days=30)): + max_article_age=timedelta(days=15)): min_date = date.today() - max_article_age published_authors = [author for author in names @@ -96,6 +96,10 @@ filtered_articles = {} words_left = max_word_count + total_potentials = 0 + total_articles = 0 + total_word_count = 0 + for author in published_authors: articles_left = max_articles_per_author potential_articles = [ @@ -108,6 +112,8 @@ and article['url'] not in issues['urls']) ] + total_potentials += len(potential_articles) + for article in potential_articles: html = [ctype['value'] for ctype in article['content'] @@ -122,6 +128,8 @@ if word_count < words_left: if author not in filtered_articles: filtered_articles[author] = [] + total_word_count += word_count + total_articles += 1 filtered_articles[author].append(article) elif word_count > max_word_count: logging.warn( @@ -135,6 +143,9 @@ if not articles_left: break + logging.debug('found %d articles (out of a potential %d), totalling ' + '%d words.' % + (total_articles, total_potentials, total_word_count)) return normalize(filtered_articles) def publish_edition(update_whoisi=False,