# HG changeset patch
# User Atul Varma <varmaa@toolness.com>
# Date 1262542913 28800
# Node ID 6fbd38dd976a3e6f4fbecbbd08e0e55f6c6c0a05
# Parent  6e2038000082e2d48af272c625bf08212d7d6314
added more logging stats.

diff -r 6e2038000082 -r 6fbd38dd976a publish_edition.py
--- a/publish_edition.py	Sun Jan 03 09:20:29 2010 -0800
+++ b/publish_edition.py	Sun Jan 03 10:21:53 2010 -0800
@@ -87,7 +87,7 @@
 def filter_articles(names, articles, issues,
                     max_articles_per_author=1,
                     max_word_count=2500,
-                    max_article_age=timedelta(days=30)):
+                    max_article_age=timedelta(days=15)):
     min_date = date.today() - max_article_age
 
     published_authors = [author for author in names
@@ -96,6 +96,10 @@
     filtered_articles = {}
     words_left = max_word_count
 
+    total_potentials = 0
+    total_articles = 0
+    total_word_count = 0
+
     for author in published_authors:
         articles_left = max_articles_per_author
         potential_articles = [
@@ -108,6 +112,8 @@
                 and article['url'] not in issues['urls'])
             ]
 
+        total_potentials += len(potential_articles)
+
         for article in potential_articles:
             html = [ctype['value']
                     for ctype in article['content']
@@ -122,6 +128,8 @@
                 if word_count < words_left:
                     if author not in filtered_articles:
                         filtered_articles[author] = []
+                    total_word_count += word_count
+                    total_articles += 1
                     filtered_articles[author].append(article)
                 elif word_count > max_word_count:
                     logging.warn(
@@ -135,6 +143,9 @@
                 if not articles_left:
                     break
 
+    logging.debug('found %d articles (out of a potential %d), totalling '
+                  '%d words.' % 
+                  (total_articles, total_potentials, total_word_count))
     return normalize(filtered_articles)
 
 def publish_edition(update_whoisi=False,