Mercurial > public > sg101
comparison forums/latest.py @ 581:ee87ea74d46b
For Django 1.4, rearranged project structure for new manage.py.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 05 May 2012 17:10:48 -0500 |
parents | gpp/forums/latest.py@7388cdf61b25 |
children | 2469d5864249 |
comparison
equal
deleted
inserted
replaced
580:c525f3e0b5d0 | 581:ee87ea74d46b |
---|---|
1 """ | |
2 This module maintains the latest posts datastore. The latest posts are often | |
3 needed by RSS feeds, "latest posts" template tags, etc. This module listens for | |
4 the post_content_update signal, then bundles the post up and stores it by forum | |
5 ID in Redis. We also maintain a combined forums list. This allows quick | |
6 retrieval of the latest posts and avoids some slow SQL queries. | |
7 | |
8 We also do things like send topic notification emails, auto-favorite, and | |
9 auto-subscribe functions here rather than bog the user down in the request / | |
10 response cycle. | |
11 | |
12 """ | |
13 import datetime | |
14 import logging | |
15 import time | |
16 | |
17 from django.dispatch import receiver | |
18 from django.utils import simplejson | |
19 import redis | |
20 | |
21 from forums.signals import post_content_update, topic_content_update | |
22 from forums.models import Forum, Topic, Post | |
23 from forums.views.subscriptions import notify_topic_subscribers | |
24 from forums.tools import auto_favorite, auto_subscribe | |
25 from core.services import get_redis_connection | |
26 | |
27 # This constant controls how many latest posts per forum we store | |
28 MAX_POSTS = 50 | |
29 | |
30 # This controls how many updated topics we track | |
31 MAX_UPDATED_TOPICS = 50 | |
32 | |
33 # Redis key names: | |
34 POST_COUNT_KEY = "forums:public_post_count" | |
35 TOPIC_COUNT_KEY = "forums:public_topic_count" | |
36 UPDATED_TOPICS_SET_KEY = "forums:updated_topics:set" | |
37 UPDATED_TOPIC_KEY = "forums:updated_topics:%s" | |
38 | |
39 logger = logging.getLogger(__name__) | |
40 | |
41 | |
42 @receiver(post_content_update, dispatch_uid='forums.latest_posts') | |
43 def on_post_update(sender, **kwargs): | |
44 """ | |
45 This function is our signal handler, called when a post has been updated. | |
46 We only care about newly created posts, and ignore updates. | |
47 | |
48 We kick off a Celery task to perform work outside of the request/response | |
49 cycle. | |
50 | |
51 """ | |
52 # ignore non-new posts | |
53 if not kwargs['created']: | |
54 return | |
55 | |
56 # Kick off a Celery task to process this new post | |
57 forums.tasks.new_post_task.delay(sender.id) | |
58 | |
59 | |
60 def process_new_post(post_id): | |
61 """ | |
62 This function is run on a Celery task. It performs all new-post processing. | |
63 | |
64 """ | |
65 try: | |
66 post = Post.objects.select_related().get(pk=post_id) | |
67 except Post.DoesNotExist: | |
68 logger.warning("process_new_post: post %d does not exist", post_id) | |
69 return | |
70 | |
71 # selectively process posts from non-public forums | |
72 public_forums = Forum.objects.public_forum_ids() | |
73 | |
74 if post.topic.forum.id in public_forums: | |
75 conn = get_redis_connection() | |
76 _update_post_feeds(conn, post) | |
77 _update_post_count(conn, public_forums) | |
78 _update_latest_topics(conn, post) | |
79 | |
80 # send out any email notifications | |
81 notify_topic_subscribers(post, defer=False) | |
82 | |
83 # perform any auto-favorite and auto-subscribe actions for the new post | |
84 auto_favorite(post) | |
85 auto_subscribe(post) | |
86 | |
87 | |
88 def _update_post_feeds(conn, post): | |
89 """ | |
90 Updates the forum feeds we keep in Redis so that our RSS feeds are quick. | |
91 | |
92 """ | |
93 # serialize post attributes | |
94 post_content = { | |
95 'id': post.id, | |
96 'title': post.topic.name, | |
97 'content': post.html, | |
98 'author': post.user.username, | |
99 'pubdate': int(time.mktime(post.creation_date.timetuple())), | |
100 'forum_name': post.topic.forum.name, | |
101 'url': post.get_absolute_url() | |
102 } | |
103 | |
104 s = simplejson.dumps(post_content) | |
105 | |
106 # store in Redis | |
107 | |
108 pipeline = conn.pipeline() | |
109 | |
110 key = 'forums:latest:%d' % post.topic.forum.id | |
111 | |
112 pipeline.lpush(key, s) | |
113 pipeline.ltrim(key, 0, MAX_POSTS - 1) | |
114 | |
115 # store in the combined feed; yes this wastes some memory storing it twice, | |
116 # but it makes things much easier | |
117 | |
118 key = 'forums:latest:*' | |
119 | |
120 pipeline.lpush(key, s) | |
121 pipeline.ltrim(key, 0, MAX_POSTS - 1) | |
122 | |
123 pipeline.execute() | |
124 | |
125 | |
126 def _update_post_count(conn, public_forums): | |
127 """ | |
128 Updates the post count we cache in Redis. Doing a COUNT(*) on the post table | |
129 can be expensive in MySQL InnoDB. | |
130 | |
131 """ | |
132 result = conn.incr(POST_COUNT_KEY) | |
133 if result == 1: | |
134 # it is likely redis got trashed, so re-compute the correct value | |
135 | |
136 count = Post.objects.filter(topic__forum__in=public_forums).count() | |
137 conn.set(POST_COUNT_KEY, count) | |
138 | |
139 | |
140 def _update_latest_topics(conn, post): | |
141 """ | |
142 Updates the "latest topics with new posts" list we cache in Redis for speed. | |
143 There is a template tag and forum view that uses this information. | |
144 | |
145 """ | |
146 # serialize topic attributes | |
147 topic_id = post.topic.id | |
148 topic_score = int(time.mktime(post.creation_date.timetuple())) | |
149 | |
150 topic_content = { | |
151 'title': post.topic.name, | |
152 'author': post.user.username, | |
153 'date': topic_score, | |
154 'url': post.topic.get_latest_post_url() | |
155 } | |
156 json = simplejson.dumps(topic_content) | |
157 key = UPDATED_TOPIC_KEY % topic_id | |
158 | |
159 pipeline = conn.pipeline() | |
160 pipeline.set(key, json) | |
161 pipeline.zadd(UPDATED_TOPICS_SET_KEY, topic_score, topic_id) | |
162 pipeline.zcard(UPDATED_TOPICS_SET_KEY) | |
163 results = pipeline.execute() | |
164 | |
165 # delete topics beyond our maximum count | |
166 num_topics = results[-1] | |
167 num_to_del = num_topics - MAX_UPDATED_TOPICS | |
168 if num_to_del > 0: | |
169 # get the IDs of the topics we need to delete first | |
170 start = 0 | |
171 stop = num_to_del - 1 # Redis indices are inclusive | |
172 old_ids = conn.zrange(UPDATED_TOPICS_SET_KEY, start, stop) | |
173 | |
174 keys = [UPDATED_TOPIC_KEY % n for n in old_ids] | |
175 conn.delete(*keys) | |
176 | |
177 # now delete the oldest num_to_del topics | |
178 conn.zremrangebyrank(UPDATED_TOPICS_SET_KEY, start, stop) | |
179 | |
180 | |
181 def get_latest_posts(num_posts=MAX_POSTS, forum_id=None): | |
182 """ | |
183 This function retrieves num_posts latest posts for the forum with the given | |
184 forum_id. If forum_id is None, the posts are retrieved from the combined | |
185 forums datastore. A list of dictionaries is returned. Each dictionary | |
186 contains information about a post. | |
187 | |
188 """ | |
189 key = 'forums:latest:%d' % forum_id if forum_id else 'forums:latest:*' | |
190 | |
191 num_posts = max(0, min(MAX_POSTS, num_posts)) | |
192 | |
193 if num_posts == 0: | |
194 return [] | |
195 | |
196 conn = get_redis_connection() | |
197 raw_posts = conn.lrange(key, 0, num_posts - 1) | |
198 | |
199 posts = [] | |
200 for raw_post in raw_posts: | |
201 post = simplejson.loads(raw_post) | |
202 | |
203 # fix up the pubdate; turn it back into a datetime object | |
204 post['pubdate'] = datetime.datetime.fromtimestamp(post['pubdate']) | |
205 | |
206 posts.append(post) | |
207 | |
208 return posts | |
209 | |
210 | |
211 @receiver(topic_content_update, dispatch_uid='forums.latest_posts') | |
212 def on_topic_update(sender, **kwargs): | |
213 """ | |
214 This function is our signal handler, called when a topic has been updated. | |
215 We only care about newly created topics, and ignore updates. | |
216 | |
217 We kick off a Celery task to perform work outside of the request/response | |
218 cycle. | |
219 | |
220 """ | |
221 # ignore non-new topics | |
222 if not kwargs['created']: | |
223 return | |
224 | |
225 # Kick off a Celery task to process this new post | |
226 forums.tasks.new_topic_task.delay(sender.id) | |
227 | |
228 | |
229 def process_new_topic(topic_id): | |
230 """ | |
231 This function contains new topic processing. Currently we only update the | |
232 topic count statistic. | |
233 | |
234 """ | |
235 try: | |
236 topic = Topic.objects.select_related().get(pk=topic_id) | |
237 except Topic.DoesNotExist: | |
238 logger.warning("process_new_topic: topic %d does not exist", topic_id) | |
239 return | |
240 | |
241 # selectively process topics from non-public forums | |
242 public_forums = Forum.objects.public_forum_ids() | |
243 | |
244 if topic.forum.id not in public_forums: | |
245 return | |
246 | |
247 # update the topic count statistic | |
248 conn = get_redis_connection() | |
249 | |
250 result = conn.incr(TOPIC_COUNT_KEY) | |
251 if result == 1: | |
252 # it is likely redis got trashed, so re-compute the correct value | |
253 | |
254 count = Topic.objects.filter(forum__in=public_forums).count() | |
255 conn.set(TOPIC_COUNT_KEY, count) | |
256 | |
257 | |
258 def get_stats(): | |
259 """ | |
260 This function returns the topic and post count statistics as a tuple, in | |
261 that order. If a statistic is not available, its position in the tuple will | |
262 be None. | |
263 | |
264 """ | |
265 try: | |
266 conn = get_redis_connection() | |
267 result = conn.mget(TOPIC_COUNT_KEY, POST_COUNT_KEY) | |
268 except redis.RedisError, e: | |
269 logger.error(e) | |
270 return (None, None) | |
271 | |
272 topic_count = int(result[0]) if result[0] else None | |
273 post_count = int(result[1]) if result[1] else None | |
274 | |
275 return (topic_count, post_count) | |
276 | |
277 | |
278 def get_latest_topic_ids(num): | |
279 """ | |
280 Return a list of topic ids from the latest topics that have posts. The ids | |
281 will be sorted from newest to oldest. | |
282 | |
283 """ | |
284 try: | |
285 conn = get_redis_connection() | |
286 result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1) | |
287 except redis.RedisError, e: | |
288 logger.error(e) | |
289 return [] | |
290 | |
291 return [int(n) for n in result] | |
292 | |
293 | |
294 def get_latest_topics(num): | |
295 """ | |
296 Return a list of dictionaries with information about the latest topics that | |
297 have updated posts. The topics are sorted from newest to oldest. | |
298 | |
299 """ | |
300 try: | |
301 conn = get_redis_connection() | |
302 result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1) | |
303 | |
304 topic_keys = [UPDATED_TOPIC_KEY % n for n in result] | |
305 json_list = conn.mget(topic_keys) if topic_keys else [] | |
306 | |
307 except redis.RedisError, e: | |
308 logger.error(e) | |
309 return [] | |
310 | |
311 topics = [] | |
312 for s in json_list: | |
313 item = simplejson.loads(s) | |
314 item['date'] = datetime.datetime.fromtimestamp(item['date']) | |
315 topics.append(item) | |
316 | |
317 return topics | |
318 | |
319 | |
320 def notify_topic_delete(topic): | |
321 """ | |
322 This function should be called when a topic is deleted. It will remove the | |
323 topic from the updated topics set, if present, and delete any info we have | |
324 about the topic. | |
325 | |
326 Note we don't do anything like this for posts. Since they just populate RSS | |
327 feeds we'll let them 404. The updated topic list is seen in a prominent | |
328 template tag however, so it is a bit more important to get that cleaned up. | |
329 | |
330 """ | |
331 try: | |
332 conn = get_redis_connection() | |
333 pipeline = conn.pipeline() | |
334 pipeline.zrem(UPDATED_TOPICS_SET_KEY, topic.id) | |
335 pipeline.delete(UPDATED_TOPIC_KEY % topic.id) | |
336 pipeline.execute() | |
337 except redis.RedisError, e: | |
338 logger.error(e) | |
339 | |
340 | |
341 # Down here to avoid a circular import | |
342 import forums.tasks |