comparison forums/latest.py @ 581:ee87ea74d46b

For Django 1.4, rearranged project structure for new manage.py.
author Brian Neal <bgneal@gmail.com>
date Sat, 05 May 2012 17:10:48 -0500
parents gpp/forums/latest.py@7388cdf61b25
children 2469d5864249
comparison
equal deleted inserted replaced
580:c525f3e0b5d0 581:ee87ea74d46b
1 """
2 This module maintains the latest posts datastore. The latest posts are often
3 needed by RSS feeds, "latest posts" template tags, etc. This module listens for
4 the post_content_update signal, then bundles the post up and stores it by forum
5 ID in Redis. We also maintain a combined forums list. This allows quick
6 retrieval of the latest posts and avoids some slow SQL queries.
7
8 We also do things like send topic notification emails, auto-favorite, and
9 auto-subscribe functions here rather than bog the user down in the request /
10 response cycle.
11
12 """
13 import datetime
14 import logging
15 import time
16
17 from django.dispatch import receiver
18 from django.utils import simplejson
19 import redis
20
21 from forums.signals import post_content_update, topic_content_update
22 from forums.models import Forum, Topic, Post
23 from forums.views.subscriptions import notify_topic_subscribers
24 from forums.tools import auto_favorite, auto_subscribe
25 from core.services import get_redis_connection
26
27 # This constant controls how many latest posts per forum we store
28 MAX_POSTS = 50
29
30 # This controls how many updated topics we track
31 MAX_UPDATED_TOPICS = 50
32
33 # Redis key names:
34 POST_COUNT_KEY = "forums:public_post_count"
35 TOPIC_COUNT_KEY = "forums:public_topic_count"
36 UPDATED_TOPICS_SET_KEY = "forums:updated_topics:set"
37 UPDATED_TOPIC_KEY = "forums:updated_topics:%s"
38
39 logger = logging.getLogger(__name__)
40
41
42 @receiver(post_content_update, dispatch_uid='forums.latest_posts')
43 def on_post_update(sender, **kwargs):
44 """
45 This function is our signal handler, called when a post has been updated.
46 We only care about newly created posts, and ignore updates.
47
48 We kick off a Celery task to perform work outside of the request/response
49 cycle.
50
51 """
52 # ignore non-new posts
53 if not kwargs['created']:
54 return
55
56 # Kick off a Celery task to process this new post
57 forums.tasks.new_post_task.delay(sender.id)
58
59
60 def process_new_post(post_id):
61 """
62 This function is run on a Celery task. It performs all new-post processing.
63
64 """
65 try:
66 post = Post.objects.select_related().get(pk=post_id)
67 except Post.DoesNotExist:
68 logger.warning("process_new_post: post %d does not exist", post_id)
69 return
70
71 # selectively process posts from non-public forums
72 public_forums = Forum.objects.public_forum_ids()
73
74 if post.topic.forum.id in public_forums:
75 conn = get_redis_connection()
76 _update_post_feeds(conn, post)
77 _update_post_count(conn, public_forums)
78 _update_latest_topics(conn, post)
79
80 # send out any email notifications
81 notify_topic_subscribers(post, defer=False)
82
83 # perform any auto-favorite and auto-subscribe actions for the new post
84 auto_favorite(post)
85 auto_subscribe(post)
86
87
88 def _update_post_feeds(conn, post):
89 """
90 Updates the forum feeds we keep in Redis so that our RSS feeds are quick.
91
92 """
93 # serialize post attributes
94 post_content = {
95 'id': post.id,
96 'title': post.topic.name,
97 'content': post.html,
98 'author': post.user.username,
99 'pubdate': int(time.mktime(post.creation_date.timetuple())),
100 'forum_name': post.topic.forum.name,
101 'url': post.get_absolute_url()
102 }
103
104 s = simplejson.dumps(post_content)
105
106 # store in Redis
107
108 pipeline = conn.pipeline()
109
110 key = 'forums:latest:%d' % post.topic.forum.id
111
112 pipeline.lpush(key, s)
113 pipeline.ltrim(key, 0, MAX_POSTS - 1)
114
115 # store in the combined feed; yes this wastes some memory storing it twice,
116 # but it makes things much easier
117
118 key = 'forums:latest:*'
119
120 pipeline.lpush(key, s)
121 pipeline.ltrim(key, 0, MAX_POSTS - 1)
122
123 pipeline.execute()
124
125
126 def _update_post_count(conn, public_forums):
127 """
128 Updates the post count we cache in Redis. Doing a COUNT(*) on the post table
129 can be expensive in MySQL InnoDB.
130
131 """
132 result = conn.incr(POST_COUNT_KEY)
133 if result == 1:
134 # it is likely redis got trashed, so re-compute the correct value
135
136 count = Post.objects.filter(topic__forum__in=public_forums).count()
137 conn.set(POST_COUNT_KEY, count)
138
139
140 def _update_latest_topics(conn, post):
141 """
142 Updates the "latest topics with new posts" list we cache in Redis for speed.
143 There is a template tag and forum view that uses this information.
144
145 """
146 # serialize topic attributes
147 topic_id = post.topic.id
148 topic_score = int(time.mktime(post.creation_date.timetuple()))
149
150 topic_content = {
151 'title': post.topic.name,
152 'author': post.user.username,
153 'date': topic_score,
154 'url': post.topic.get_latest_post_url()
155 }
156 json = simplejson.dumps(topic_content)
157 key = UPDATED_TOPIC_KEY % topic_id
158
159 pipeline = conn.pipeline()
160 pipeline.set(key, json)
161 pipeline.zadd(UPDATED_TOPICS_SET_KEY, topic_score, topic_id)
162 pipeline.zcard(UPDATED_TOPICS_SET_KEY)
163 results = pipeline.execute()
164
165 # delete topics beyond our maximum count
166 num_topics = results[-1]
167 num_to_del = num_topics - MAX_UPDATED_TOPICS
168 if num_to_del > 0:
169 # get the IDs of the topics we need to delete first
170 start = 0
171 stop = num_to_del - 1 # Redis indices are inclusive
172 old_ids = conn.zrange(UPDATED_TOPICS_SET_KEY, start, stop)
173
174 keys = [UPDATED_TOPIC_KEY % n for n in old_ids]
175 conn.delete(*keys)
176
177 # now delete the oldest num_to_del topics
178 conn.zremrangebyrank(UPDATED_TOPICS_SET_KEY, start, stop)
179
180
181 def get_latest_posts(num_posts=MAX_POSTS, forum_id=None):
182 """
183 This function retrieves num_posts latest posts for the forum with the given
184 forum_id. If forum_id is None, the posts are retrieved from the combined
185 forums datastore. A list of dictionaries is returned. Each dictionary
186 contains information about a post.
187
188 """
189 key = 'forums:latest:%d' % forum_id if forum_id else 'forums:latest:*'
190
191 num_posts = max(0, min(MAX_POSTS, num_posts))
192
193 if num_posts == 0:
194 return []
195
196 conn = get_redis_connection()
197 raw_posts = conn.lrange(key, 0, num_posts - 1)
198
199 posts = []
200 for raw_post in raw_posts:
201 post = simplejson.loads(raw_post)
202
203 # fix up the pubdate; turn it back into a datetime object
204 post['pubdate'] = datetime.datetime.fromtimestamp(post['pubdate'])
205
206 posts.append(post)
207
208 return posts
209
210
211 @receiver(topic_content_update, dispatch_uid='forums.latest_posts')
212 def on_topic_update(sender, **kwargs):
213 """
214 This function is our signal handler, called when a topic has been updated.
215 We only care about newly created topics, and ignore updates.
216
217 We kick off a Celery task to perform work outside of the request/response
218 cycle.
219
220 """
221 # ignore non-new topics
222 if not kwargs['created']:
223 return
224
225 # Kick off a Celery task to process this new post
226 forums.tasks.new_topic_task.delay(sender.id)
227
228
229 def process_new_topic(topic_id):
230 """
231 This function contains new topic processing. Currently we only update the
232 topic count statistic.
233
234 """
235 try:
236 topic = Topic.objects.select_related().get(pk=topic_id)
237 except Topic.DoesNotExist:
238 logger.warning("process_new_topic: topic %d does not exist", topic_id)
239 return
240
241 # selectively process topics from non-public forums
242 public_forums = Forum.objects.public_forum_ids()
243
244 if topic.forum.id not in public_forums:
245 return
246
247 # update the topic count statistic
248 conn = get_redis_connection()
249
250 result = conn.incr(TOPIC_COUNT_KEY)
251 if result == 1:
252 # it is likely redis got trashed, so re-compute the correct value
253
254 count = Topic.objects.filter(forum__in=public_forums).count()
255 conn.set(TOPIC_COUNT_KEY, count)
256
257
258 def get_stats():
259 """
260 This function returns the topic and post count statistics as a tuple, in
261 that order. If a statistic is not available, its position in the tuple will
262 be None.
263
264 """
265 try:
266 conn = get_redis_connection()
267 result = conn.mget(TOPIC_COUNT_KEY, POST_COUNT_KEY)
268 except redis.RedisError, e:
269 logger.error(e)
270 return (None, None)
271
272 topic_count = int(result[0]) if result[0] else None
273 post_count = int(result[1]) if result[1] else None
274
275 return (topic_count, post_count)
276
277
278 def get_latest_topic_ids(num):
279 """
280 Return a list of topic ids from the latest topics that have posts. The ids
281 will be sorted from newest to oldest.
282
283 """
284 try:
285 conn = get_redis_connection()
286 result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1)
287 except redis.RedisError, e:
288 logger.error(e)
289 return []
290
291 return [int(n) for n in result]
292
293
294 def get_latest_topics(num):
295 """
296 Return a list of dictionaries with information about the latest topics that
297 have updated posts. The topics are sorted from newest to oldest.
298
299 """
300 try:
301 conn = get_redis_connection()
302 result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1)
303
304 topic_keys = [UPDATED_TOPIC_KEY % n for n in result]
305 json_list = conn.mget(topic_keys) if topic_keys else []
306
307 except redis.RedisError, e:
308 logger.error(e)
309 return []
310
311 topics = []
312 for s in json_list:
313 item = simplejson.loads(s)
314 item['date'] = datetime.datetime.fromtimestamp(item['date'])
315 topics.append(item)
316
317 return topics
318
319
320 def notify_topic_delete(topic):
321 """
322 This function should be called when a topic is deleted. It will remove the
323 topic from the updated topics set, if present, and delete any info we have
324 about the topic.
325
326 Note we don't do anything like this for posts. Since they just populate RSS
327 feeds we'll let them 404. The updated topic list is seen in a prominent
328 template tag however, so it is a bit more important to get that cleaned up.
329
330 """
331 try:
332 conn = get_redis_connection()
333 pipeline = conn.pipeline()
334 pipeline.zrem(UPDATED_TOPICS_SET_KEY, topic.id)
335 pipeline.delete(UPDATED_TOPIC_KEY % topic.id)
336 pipeline.execute()
337 except redis.RedisError, e:
338 logger.error(e)
339
340
341 # Down here to avoid a circular import
342 import forums.tasks