Mercurial > daily-edition
annotate publish_edition.py @ 28:6e2038000082
added dry-run cmdline option
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Sun, 03 Jan 2010 09:20:29 -0800 |
parents | 83091b7b3e59 |
children | 6fbd38dd976a |
rev | line source |
---|---|
13
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
1 import os |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
2 import logging |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
3 import traceback |
15 | 4 import optparse |
13
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
5 import cPickle as pickle |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
6 from datetime import date, datetime, timedelta |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
7 |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
8 import feedparser |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
9 from whoisi_cache import WhoisiServer, WhoisiCache, json |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
10 from url_cache import UrlCache |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
11 |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
12 AUTHORS_FILENAME = 'authors.txt' |
20
5e7ecaf68b9a
renamed .pickle files to .dat
Atul Varma <varmaa@toolness.com>
parents:
19
diff
changeset
|
13 URLS_FILENAME = 'url_cache.dat' |
5e7ecaf68b9a
renamed .pickle files to .dat
Atul Varma <varmaa@toolness.com>
parents:
19
diff
changeset
|
14 WHOISI_FILENAME = 'whoisi_cache.dat' |
5e7ecaf68b9a
renamed .pickle files to .dat
Atul Varma <varmaa@toolness.com>
parents:
19
diff
changeset
|
15 ARTICLES_FILENAME = 'articles.dat' |
22
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
16 ISSUES_FILENAME = 'issues.dat' |
13
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
17 JSON_FILENAME = 'daily-edition.json' |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
18 |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
19 def load(filename, default): |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
20 if os.path.exists(filename): |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
21 return pickle.load(open(filename, 'r')) |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
22 return default |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
23 |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
24 def save(obj, filename): |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
25 f = open(filename, 'w') |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
26 pickle.dump(obj, f) |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
27 f.close() |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
28 |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
29 def to_date_tuple(dt): |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
30 return (dt.year, dt.month, dt.day) |
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
31 |
19 | 32 def refresh_urls(feeds, urls): |
33 ucache = UrlCache(urls) | |
34 for feed_urls in feeds.values(): | |
35 for url in feed_urls: | |
36 try: | |
37 ucache.refresh(url) | |
38 except Exception, e: | |
39 traceback.print_exc(e) | |
40 | |
41 def refresh_articles(articles, feeds, urls): | |
42 for author, feed_urls in feeds.items(): | |
43 articles[author] = [] | |
44 for url in feed_urls: | |
21
6a6632954dc6
added logging statement while parsing feeds
Atul Varma <varmaa@toolness.com>
parents:
20
diff
changeset
|
45 logging.debug('parsing feed at %s.' % url) |
19 | 46 feed = feedparser.parse(urls[url]['data']) |
47 for entry in feed['entries']: | |
48 updated = entry.get('updated_parsed') | |
49 updated = date(year=updated.tm_year, | |
50 month=updated.tm_mon, | |
51 day=updated.tm_mday) | |
52 content = entry.get('content', '') | |
53 summary = entry.get('summary', '') | |
54 summary_detail = entry.get('summary_detail', {}) | |
55 if not content: | |
56 if not (summary_detail and | |
57 summary_detail.get('value')): | |
58 if not summary: | |
59 pass | |
60 else: | |
61 content = [{'type': 'text/plain', | |
62 'value': summary}] | |
63 else: | |
64 content = [summary_detail] | |
65 if content: | |
66 article = {'url': entry.get('link'), | |
67 'title': entry.get('title'), | |
68 'pub_date': updated, | |
69 'content': content} | |
70 articles[author].append(article) | |
71 | |
24
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
72 def normalize(obj): |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
73 if isinstance(obj, list): |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
74 for i in range(len(obj)): |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
75 obj[i] = normalize(obj[i]) |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
76 elif isinstance(obj, dict): |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
77 for key in obj: |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
78 obj[key] = normalize(obj[key]) |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
79 elif isinstance(obj, str): |
26 | 80 try: |
81 obj = obj.decode('utf-8') | |
82 except UnicodeDecodeError, e: | |
83 logging.warn('error decoding "%s" (%s).' % (repr(obj), e)) | |
84 obj = obj.decode('utf-8', 'ignore') | |
24
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
85 return obj |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
86 |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
87 def filter_articles(names, articles, issues, |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
88 max_articles_per_author=1, |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
89 max_word_count=2500, |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
90 max_article_age=timedelta(days=30)): |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
91 min_date = date.today() - max_article_age |
19 | 92 |
93 published_authors = [author for author in names | |
94 if author in articles] | |
95 | |
96 filtered_articles = {} | |
24
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
97 words_left = max_word_count |
19 | 98 |
99 for author in published_authors: | |
24
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
100 articles_left = max_articles_per_author |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
101 potential_articles = [ |
19 | 102 {'url': article['url'], |
103 'title': article['title'], | |
104 'content': article['content'], | |
105 'pubDate': to_date_tuple(article['pub_date'])} | |
106 for article in articles[author] | |
24
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
107 if (article['pub_date'] > min_date |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
108 and article['url'] not in issues['urls']) |
19 | 109 ] |
110 | |
24
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
111 for article in potential_articles: |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
112 html = [ctype['value'] |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
113 for ctype in article['content'] |
25 | 114 if ctype.get('type') == 'text/html' |
115 and ctype.get('value')] | |
24
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
116 if not html: |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
117 logging.warn('no html content for %s.' % article['url']) |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
118 elif len(html) > 1: |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
119 logging.warn('multiple html found for %s.' % article['url']) |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
120 else: |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
121 word_count = len(html[0].split()) |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
122 if word_count < words_left: |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
123 if author not in filtered_articles: |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
124 filtered_articles[author] = [] |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
125 filtered_articles[author].append(article) |
25 | 126 elif word_count > max_word_count: |
127 logging.warn( | |
128 'article will never be included in an ' | |
129 'issue due to word count: %s (%d words)' % ( | |
130 article['url'], | |
131 word_count | |
132 )) | |
24
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
133 words_left -= word_count |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
134 articles_left -= 1 |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
135 if not articles_left: |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
136 break |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
137 |
2b4cf6903012
implemented a basic algorithm for filtering articles.
Atul Varma <varmaa@toolness.com>
parents:
23
diff
changeset
|
138 return normalize(filtered_articles) |
22
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
139 |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
140 def publish_edition(update_whoisi=False, |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
141 update_urls=False, |
28
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
142 update_articles=False, |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
143 dry_run=False): |
17
6e01f65bb3e8
some cmdline options now imply others
Atul Varma <varmaa@toolness.com>
parents:
16
diff
changeset
|
144 if update_whoisi: |
6e01f65bb3e8
some cmdline options now imply others
Atul Varma <varmaa@toolness.com>
parents:
16
diff
changeset
|
145 update_urls = True |
6e01f65bb3e8
some cmdline options now imply others
Atul Varma <varmaa@toolness.com>
parents:
16
diff
changeset
|
146 if update_urls: |
6e01f65bb3e8
some cmdline options now imply others
Atul Varma <varmaa@toolness.com>
parents:
16
diff
changeset
|
147 update_articles = True |
6e01f65bb3e8
some cmdline options now imply others
Atul Varma <varmaa@toolness.com>
parents:
16
diff
changeset
|
148 |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
149 names = [line.strip() |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
150 for line in open(AUTHORS_FILENAME, 'r').readlines() |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
151 if line and not line.startswith('#')] |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
152 |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
153 people = load(WHOISI_FILENAME, []) |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
154 wiserver = WhoisiServer() |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
155 wicache = WhoisiCache(wiserver, people) |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
156 |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
157 if update_whoisi: |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
158 wicache.update() |
13
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
159 |
27
83091b7b3e59
now display warning information on unknown people
Atul Varma <varmaa@toolness.com>
parents:
26
diff
changeset
|
160 whoisi_names = [person['name'] for person in people] |
83091b7b3e59
now display warning information on unknown people
Atul Varma <varmaa@toolness.com>
parents:
26
diff
changeset
|
161 unknown_names = [name for name in names |
83091b7b3e59
now display warning information on unknown people
Atul Varma <varmaa@toolness.com>
parents:
26
diff
changeset
|
162 if name not in whoisi_names] |
83091b7b3e59
now display warning information on unknown people
Atul Varma <varmaa@toolness.com>
parents:
26
diff
changeset
|
163 if unknown_names: |
83091b7b3e59
now display warning information on unknown people
Atul Varma <varmaa@toolness.com>
parents:
26
diff
changeset
|
164 logging.warn('could not find information on: %s.' % |
83091b7b3e59
now display warning information on unknown people
Atul Varma <varmaa@toolness.com>
parents:
26
diff
changeset
|
165 ', '.join(unknown_names)) |
83091b7b3e59
now display warning information on unknown people
Atul Varma <varmaa@toolness.com>
parents:
26
diff
changeset
|
166 |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
167 following = [person for person in people |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
168 if person['name'] in names] |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
169 |
16
d74597d6ae5a
updating whoisi db also includes updating entries for all 'following' now.
Atul Varma <varmaa@toolness.com>
parents:
15
diff
changeset
|
170 if update_whoisi: |
d74597d6ae5a
updating whoisi db also includes updating entries for all 'following' now.
Atul Varma <varmaa@toolness.com>
parents:
15
diff
changeset
|
171 people_indexes = [people.index(person) |
d74597d6ae5a
updating whoisi db also includes updating entries for all 'following' now.
Atul Varma <varmaa@toolness.com>
parents:
15
diff
changeset
|
172 for person in following] |
d74597d6ae5a
updating whoisi db also includes updating entries for all 'following' now.
Atul Varma <varmaa@toolness.com>
parents:
15
diff
changeset
|
173 wicache.refresh_people(people_indexes) |
28
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
174 if not dry_run: |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
175 save(people, WHOISI_FILENAME) |
16
d74597d6ae5a
updating whoisi db also includes updating entries for all 'following' now.
Atul Varma <varmaa@toolness.com>
parents:
15
diff
changeset
|
176 |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
177 feeds = {} |
13
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
178 |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
179 for person in following: |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
180 person_feeds = [] |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
181 for site in person['sites'].values(): |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
182 if site['type'] == 'feed': |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
183 person_feeds.append(site['feed']) |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
184 feeds[person['name']] = person_feeds |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
185 |
19 | 186 urls = load(URLS_FILENAME, {}) |
187 | |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
188 if update_urls: |
19 | 189 refresh_urls(feeds=feeds, urls=urls) |
28
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
190 if not dry_run: |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
191 save(urls, URLS_FILENAME) |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
192 |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
193 articles = load(ARTICLES_FILENAME, {}) |
13
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
194 |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
195 if update_articles: |
19 | 196 refresh_articles(articles=articles, |
197 feeds=feeds, | |
198 urls=urls) | |
28
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
199 if not dry_run: |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
200 save(articles, ARTICLES_FILENAME) |
13
69fd13a4aef4
Added an html viewer and a publish_edition.py script that generates the JSON which the html viewer reads.
Atul Varma <varmaa@toolness.com>
parents:
diff
changeset
|
201 |
22
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
202 issues = load(ISSUES_FILENAME, {'urls': {}, |
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
203 'pub_dates': []}) |
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
204 |
19 | 205 filtered_articles = filter_articles(names=names, |
22
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
206 articles=articles, |
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
207 issues=issues) |
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
208 |
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
209 issue_id = len(issues['pub_dates']) |
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
210 issues['pub_dates'].append(datetime.now()) |
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
211 for author in filtered_articles: |
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
212 for article in filtered_articles[author]: |
f2fafca30ff3
added issue tracking information, though it's not yet being used in any way.
Atul Varma <varmaa@toolness.com>
parents:
21
diff
changeset
|
213 issues['urls'][article['url']] = issue_id |
28
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
214 if not dry_run: |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
215 save(issues, ISSUES_FILENAME) |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
216 |
28
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
217 if not dry_run: |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
218 json.dump({'authors': names, 'articles': filtered_articles}, |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
219 open(JSON_FILENAME, 'w')) |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
220 |
28
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
221 logging.info('wrote %s (issue #%d).' % (JSON_FILENAME, issue_id)) |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
222 |
15 | 223 parser_options = { |
23
8b501dfe7d85
changed names of cmd line options to make a bit more sense
Atul Varma <varmaa@toolness.com>
parents:
22
diff
changeset
|
224 ('-w', '--refresh-whoisi',): |
15 | 225 dict(dest='update_whoisi', |
226 help='re-sync with whoisi.com', | |
227 action='store_true', | |
228 default=False), | |
229 | |
23
8b501dfe7d85
changed names of cmd line options to make a bit more sense
Atul Varma <varmaa@toolness.com>
parents:
22
diff
changeset
|
230 ('-f', '--refresh-feeds',): |
15 | 231 dict(dest='update_urls', |
232 help='refresh feeds', | |
233 action='store_true', | |
234 default=False), | |
235 | |
23
8b501dfe7d85
changed names of cmd line options to make a bit more sense
Atul Varma <varmaa@toolness.com>
parents:
22
diff
changeset
|
236 ('-p', '--reparse-feeds',): |
15 | 237 dict(dest='update_articles', |
23
8b501dfe7d85
changed names of cmd line options to make a bit more sense
Atul Varma <varmaa@toolness.com>
parents:
22
diff
changeset
|
238 help='re-parse feeds', |
15 | 239 action='store_true', |
240 default=False), | |
28
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
241 |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
242 ('-d', '--dry-run',): |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
243 dict(dest='dry_run', |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
244 help='do not write anything to disk', |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
245 action='store_true', |
6e2038000082
added dry-run cmdline option
Atul Varma <varmaa@toolness.com>
parents:
27
diff
changeset
|
246 default=False), |
15 | 247 } |
248 | |
14
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
249 if __name__ == '__main__': |
4a2499602804
refactorings to publish_edition.py
Atul Varma <varmaa@toolness.com>
parents:
13
diff
changeset
|
250 logging.basicConfig(level=logging.DEBUG) |
15 | 251 |
252 parser = optparse.OptionParser() | |
253 for names, opts in parser_options.items(): | |
254 parser.add_option(*names, **opts) | |
255 (options, args) = parser.parse_args() | |
256 | |
257 publish_edition(**options.__dict__) |