annotate forums/latest.py @ 629:f4c043cf55ac

Wiki integration. Requests don't always have sessions. In particular this occurs when a request is made without a trailing slash. The Common middleware redirects when this happens, and the middleware process_request() processing stops before a session can get added. So just set an attribute on the request object for each operation. This seemed weird to me at first, but there are plenty of examples of this in the Django code base already.
author Brian Neal <bgneal@gmail.com>
date Tue, 13 Nov 2012 13:50:06 -0600
parents f3fded5df64b
children 89b240fe9297
rev   line source
bgneal@509 1 """
bgneal@509 2 This module maintains the latest posts datastore. The latest posts are often
bgneal@509 3 needed by RSS feeds, "latest posts" template tags, etc. This module listens for
bgneal@509 4 the post_content_update signal, then bundles the post up and stores it by forum
bgneal@509 5 ID in Redis. We also maintain a combined forums list. This allows quick
bgneal@509 6 retrieval of the latest posts and avoids some slow SQL queries.
bgneal@509 7
bgneal@522 8 We also do things like send topic notification emails, auto-favorite, and
bgneal@522 9 auto-subscribe functions here rather than bog the user down in the request /
bgneal@522 10 response cycle.
bgneal@522 11
bgneal@509 12 """
bgneal@595 13 # Maintenance notes:
bgneal@595 14 # How we use Redis in this module:
bgneal@595 15 #
bgneal@595 16 # Forum post processing:
bgneal@595 17 #
bgneal@595 18 # * Forum posts are turned into Python dictionaries, then converted to JSON and
bgneal@595 19 # stored under keys: forums:post:id
bgneal@595 20 # * Each forum has a list in Redis stored under the key: forums:rss:id. This
bgneal@595 21 # is a list of post IDs.
bgneal@595 22 # * There is also a key called forums:rss:* which is the combined latest
bgneal@595 23 # feed. It is also a list of post IDs.
bgneal@595 24 # * A sorted set is maintained that keeps track of the reference count for each
bgneal@595 25 # post. When a new post is created, this reference count is 2 because it is
bgneal@595 26 # stored in both the combined list and the parent forum list.
bgneal@595 27 # This sorted set is stored under the key: forums:post_ref_cnt.
bgneal@595 28 # * When a post falls off a list due to aging, the reference count in the
bgneal@595 29 # ordered set is decremented. If it falls to zero, the post's key is deleted
bgneal@595 30 # from Redis.
bgneal@595 31 # * When a post is edited, and it is in Redis, we simply update the JSON
bgneal@595 32 # content.
bgneal@595 33 # * When a post is deleted, and it is in Redis, it is removed from the 2 lists,
bgneal@595 34 # the ordered set, and deleted from Redis.
bgneal@595 35 # * When the RSS feed wants to update, it simply pulls down the entire list of
bgneal@595 36 # post IDs for the feed of interest, then does a get on all the posts.
bgneal@595 37 #
bgneal@595 38 # Topics with recent posts processing:
bgneal@595 39 #
bgneal@595 40 # * A key is created for each topic that is updated.
bgneal@595 41 # * An ordered set of topics is maintained with the current time as the score.
bgneal@595 42 # * An updated topic gets its score bumped.
bgneal@595 43 # * We only allow MAX_UPDATED_TOPICS number of topics in the set. We sort the
bgneal@595 44 # set by score, and the expired topics are removed from the set and their keys
bgneal@595 45 # are deleted from Redis.
bgneal@595 46 # * The template tag (or anyone) who wants the list of topics with new posts
bgneal@595 47 # gets the list of IDs sorted by score from newest to oldest. An mget is then
bgneal@595 48 # performed to get all the topic data and it is deserialized from JSON.
bgneal@595 49 #
bgneal@595 50 # We also maintain topic and post counts in Redis since select(*) can take a
bgneal@595 51 # while with MySQL InnoDb.
bgneal@595 52 #
bgneal@509 53 import datetime
bgneal@522 54 import logging
bgneal@509 55 import time
bgneal@509 56
bgneal@509 57 from django.dispatch import receiver
bgneal@509 58 from django.utils import simplejson
bgneal@594 59 from django.template.loader import render_to_string
bgneal@523 60 import redis
bgneal@509 61
bgneal@522 62 from forums.signals import post_content_update, topic_content_update
bgneal@594 63 from forums.models import Forum, Topic, Post, Attachment
bgneal@522 64 from forums.views.subscriptions import notify_topic_subscribers
bgneal@522 65 from forums.tools import auto_favorite, auto_subscribe
bgneal@509 66 from core.services import get_redis_connection
bgneal@509 67
bgneal@509 68 # This constant controls how many latest posts per forum we store
bgneal@509 69 MAX_POSTS = 50
bgneal@509 70
bgneal@522 71 # This controls how many updated topics we track
bgneal@522 72 MAX_UPDATED_TOPICS = 50
bgneal@522 73
bgneal@522 74 # Redis key names:
bgneal@522 75 POST_COUNT_KEY = "forums:public_post_count"
bgneal@522 76 TOPIC_COUNT_KEY = "forums:public_topic_count"
bgneal@522 77 UPDATED_TOPICS_SET_KEY = "forums:updated_topics:set"
bgneal@522 78 UPDATED_TOPIC_KEY = "forums:updated_topics:%s"
bgneal@595 79 POST_KEY = "forums:post:%s"
bgneal@595 80 FORUM_RSS_KEY = "forums:rss:%s"
bgneal@595 81 ALL_FORUMS_RSS_KEY = "forums:rss:*"
bgneal@595 82 POST_SET_KEY = "forums:post_ref_cnt"
bgneal@522 83
bgneal@522 84 logger = logging.getLogger(__name__)
bgneal@522 85
bgneal@509 86
bgneal@509 87 @receiver(post_content_update, dispatch_uid='forums.latest_posts')
bgneal@509 88 def on_post_update(sender, **kwargs):
bgneal@509 89 """
bgneal@595 90 This function is our signal handler, called when a post has been updated
bgneal@595 91 or created.
bgneal@509 92
bgneal@522 93 We kick off a Celery task to perform work outside of the request/response
bgneal@522 94 cycle.
bgneal@509 95
bgneal@509 96 """
bgneal@595 97 if kwargs['created']:
bgneal@595 98 forums.tasks.new_post_task.delay(sender.id)
bgneal@595 99 else:
bgneal@595 100 forums.tasks.updated_post_task.delay(sender.id)
bgneal@522 101
bgneal@522 102
bgneal@522 103 def process_new_post(post_id):
bgneal@522 104 """
bgneal@522 105 This function is run on a Celery task. It performs all new-post processing.
bgneal@522 106
bgneal@522 107 """
bgneal@522 108 try:
bgneal@522 109 post = Post.objects.select_related().get(pk=post_id)
bgneal@522 110 except Post.DoesNotExist:
bgneal@522 111 logger.warning("process_new_post: post %d does not exist", post_id)
bgneal@509 112 return
bgneal@509 113
bgneal@522 114 # selectively process posts from non-public forums
bgneal@522 115 public_forums = Forum.objects.public_forum_ids()
bgneal@522 116
bgneal@522 117 if post.topic.forum.id in public_forums:
bgneal@523 118 conn = get_redis_connection()
bgneal@523 119 _update_post_feeds(conn, post)
bgneal@523 120 _update_post_count(conn, public_forums)
bgneal@523 121 _update_latest_topics(conn, post)
bgneal@522 122
bgneal@522 123 # send out any email notifications
bgneal@522 124 notify_topic_subscribers(post, defer=False)
bgneal@522 125
bgneal@522 126 # perform any auto-favorite and auto-subscribe actions for the new post
bgneal@522 127 auto_favorite(post)
bgneal@522 128 auto_subscribe(post)
bgneal@522 129
bgneal@522 130
bgneal@595 131 def process_updated_post(post_id):
bgneal@595 132 """
bgneal@595 133 This function is run on a Celery task. It performs all updated-post
bgneal@595 134 processing.
bgneal@595 135
bgneal@595 136 """
bgneal@595 137 # Is this post ID in a RSS feed?
bgneal@595 138 conn = get_redis_connection()
bgneal@595 139 post_key = POST_KEY % post_id
bgneal@595 140 post_val = conn.get(post_key)
bgneal@595 141
bgneal@595 142 if post_val is not None:
bgneal@595 143 # Update the post value in Redis
bgneal@595 144 try:
bgneal@595 145 post = Post.objects.select_related().get(pk=post_id)
bgneal@595 146 except Post.DoesNotExist:
bgneal@595 147 logger.warning("process_updated_post: post %d does not exist", post_id)
bgneal@595 148 return
bgneal@595 149 conn.set(post_key, _serialize_post(post))
bgneal@595 150
bgneal@595 151
bgneal@523 152 def _update_post_feeds(conn, post):
bgneal@522 153 """
bgneal@522 154 Updates the forum feeds we keep in Redis so that our RSS feeds are quick.
bgneal@522 155
bgneal@522 156 """
bgneal@595 157 post_key = POST_KEY % post.id
bgneal@595 158 post_value = _serialize_post(post)
bgneal@509 159
bgneal@523 160 pipeline = conn.pipeline()
bgneal@509 161
bgneal@595 162 # Store serialized post content under its own key
bgneal@595 163 pipeline.set(post_key, post_value)
bgneal@509 164
bgneal@595 165 # Store in the RSS feed for the post's forum
bgneal@595 166 forum_key = FORUM_RSS_KEY % post.topic.forum.id
bgneal@595 167 pipeline.lpush(forum_key, post.id)
bgneal@509 168
bgneal@595 169 # Store in the RSS feed for combined forums
bgneal@595 170 pipeline.lpush(ALL_FORUMS_RSS_KEY, post.id)
bgneal@509 171
bgneal@595 172 # Store reference count for the post
bgneal@595 173 pipeline.zadd(POST_SET_KEY, 2, post.id)
bgneal@509 174
bgneal@595 175 results = pipeline.execute()
bgneal@509 176
bgneal@595 177 # Make sure our forums RSS lists lengths are not exceeded
bgneal@595 178
bgneal@595 179 if results[1] > MAX_POSTS or results[2] > MAX_POSTS:
bgneal@595 180 pipeline = conn.pipeline()
bgneal@595 181
bgneal@595 182 # Truncate lists of posts:
bgneal@595 183 if results[1] > MAX_POSTS:
bgneal@595 184 pipeline.rpop(forum_key)
bgneal@595 185 if results[2] > MAX_POSTS:
bgneal@595 186 pipeline.rpop(ALL_FORUMS_RSS_KEY)
bgneal@595 187 post_ids = pipeline.execute()
bgneal@595 188
bgneal@595 189 # Decrement reference count(s)
bgneal@595 190 pipeline = conn.pipeline()
bgneal@595 191 for post_id in post_ids:
bgneal@595 192 pipeline.zincrby(POST_SET_KEY, post_id, -1)
bgneal@595 193 scores = pipeline.execute()
bgneal@595 194
bgneal@595 195 # If any reference counts have fallen to 0, clean up:
bgneal@595 196 if not all(scores):
bgneal@595 197 pipeline = conn.pipeline()
bgneal@595 198
bgneal@595 199 # remove from post set
bgneal@595 200 ids = [post_ids[n] for n, s in enumerate(scores) if s <= 0.0]
bgneal@595 201 pipeline.zrem(POST_SET_KEY, *ids)
bgneal@595 202
bgneal@595 203 # remove serialized post data
bgneal@595 204 keys = [POST_KEY % n for n in ids]
bgneal@595 205 pipeline.delete(*keys)
bgneal@595 206
bgneal@595 207 pipeline.execute()
bgneal@509 208
bgneal@509 209
bgneal@523 210 def _update_post_count(conn, public_forums):
bgneal@522 211 """
bgneal@522 212 Updates the post count we cache in Redis. Doing a COUNT(*) on the post table
bgneal@522 213 can be expensive in MySQL InnoDB.
bgneal@522 214
bgneal@522 215 """
bgneal@523 216 result = conn.incr(POST_COUNT_KEY)
bgneal@522 217 if result == 1:
bgneal@522 218 # it is likely redis got trashed, so re-compute the correct value
bgneal@522 219
bgneal@522 220 count = Post.objects.filter(topic__forum__in=public_forums).count()
bgneal@523 221 conn.set(POST_COUNT_KEY, count)
bgneal@522 222
bgneal@522 223
bgneal@523 224 def _update_latest_topics(conn, post):
bgneal@522 225 """
bgneal@522 226 Updates the "latest topics with new posts" list we cache in Redis for speed.
bgneal@522 227 There is a template tag and forum view that uses this information.
bgneal@522 228
bgneal@522 229 """
bgneal@522 230 # serialize topic attributes
bgneal@522 231 topic_id = post.topic.id
bgneal@522 232 topic_score = int(time.mktime(post.creation_date.timetuple()))
bgneal@522 233
bgneal@522 234 topic_content = {
bgneal@522 235 'title': post.topic.name,
bgneal@522 236 'author': post.user.username,
bgneal@522 237 'date': topic_score,
bgneal@529 238 'url': post.topic.get_latest_post_url()
bgneal@522 239 }
bgneal@522 240 json = simplejson.dumps(topic_content)
bgneal@522 241 key = UPDATED_TOPIC_KEY % topic_id
bgneal@522 242
bgneal@523 243 pipeline = conn.pipeline()
bgneal@522 244 pipeline.set(key, json)
bgneal@522 245 pipeline.zadd(UPDATED_TOPICS_SET_KEY, topic_score, topic_id)
bgneal@522 246 pipeline.zcard(UPDATED_TOPICS_SET_KEY)
bgneal@522 247 results = pipeline.execute()
bgneal@522 248
bgneal@522 249 # delete topics beyond our maximum count
bgneal@522 250 num_topics = results[-1]
bgneal@522 251 num_to_del = num_topics - MAX_UPDATED_TOPICS
bgneal@522 252 if num_to_del > 0:
bgneal@522 253 # get the IDs of the topics we need to delete first
bgneal@522 254 start = 0
bgneal@522 255 stop = num_to_del - 1 # Redis indices are inclusive
bgneal@523 256 old_ids = conn.zrange(UPDATED_TOPICS_SET_KEY, start, stop)
bgneal@522 257
bgneal@522 258 keys = [UPDATED_TOPIC_KEY % n for n in old_ids]
bgneal@523 259 conn.delete(*keys)
bgneal@522 260
bgneal@522 261 # now delete the oldest num_to_del topics
bgneal@523 262 conn.zremrangebyrank(UPDATED_TOPICS_SET_KEY, start, stop)
bgneal@522 263
bgneal@522 264
bgneal@509 265 def get_latest_posts(num_posts=MAX_POSTS, forum_id=None):
bgneal@509 266 """
bgneal@509 267 This function retrieves num_posts latest posts for the forum with the given
bgneal@509 268 forum_id. If forum_id is None, the posts are retrieved from the combined
bgneal@509 269 forums datastore. A list of dictionaries is returned. Each dictionary
bgneal@509 270 contains information about a post.
bgneal@509 271
bgneal@509 272 """
bgneal@595 273 key = FORUM_RSS_KEY % forum_id if forum_id else ALL_FORUMS_RSS_KEY
bgneal@509 274
bgneal@509 275 num_posts = max(0, min(MAX_POSTS, num_posts))
bgneal@509 276
bgneal@509 277 if num_posts == 0:
bgneal@509 278 return []
bgneal@509 279
bgneal@523 280 conn = get_redis_connection()
bgneal@595 281 post_ids = conn.lrange(key, 0, num_posts - 1)
bgneal@595 282 if not post_ids:
bgneal@595 283 return []
bgneal@595 284
bgneal@595 285 post_keys = [POST_KEY % n for n in post_ids]
bgneal@595 286 raw_posts = conn.mget(post_keys)
bgneal@595 287 raw_posts = [s for s in raw_posts if s is not None]
bgneal@509 288
bgneal@509 289 posts = []
bgneal@509 290 for raw_post in raw_posts:
bgneal@509 291 post = simplejson.loads(raw_post)
bgneal@509 292
bgneal@509 293 # fix up the pubdate; turn it back into a datetime object
bgneal@509 294 post['pubdate'] = datetime.datetime.fromtimestamp(post['pubdate'])
bgneal@509 295
bgneal@509 296 posts.append(post)
bgneal@509 297
bgneal@509 298 return posts
bgneal@522 299
bgneal@522 300
bgneal@522 301 @receiver(topic_content_update, dispatch_uid='forums.latest_posts')
bgneal@522 302 def on_topic_update(sender, **kwargs):
bgneal@522 303 """
bgneal@595 304 This function is our signal handler, called when a topic has been updated
bgneal@595 305 or created.
bgneal@522 306
bgneal@522 307 We kick off a Celery task to perform work outside of the request/response
bgneal@522 308 cycle.
bgneal@522 309
bgneal@522 310 """
bgneal@595 311 if kwargs['created']:
bgneal@595 312 forums.tasks.new_topic_task.delay(sender.id)
bgneal@595 313 else:
bgneal@595 314 forums.tasks.updated_topic_task.delay(sender.id)
bgneal@522 315
bgneal@522 316
bgneal@522 317 def process_new_topic(topic_id):
bgneal@522 318 """
bgneal@522 319 This function contains new topic processing. Currently we only update the
bgneal@522 320 topic count statistic.
bgneal@522 321
bgneal@522 322 """
bgneal@522 323 try:
bgneal@522 324 topic = Topic.objects.select_related().get(pk=topic_id)
bgneal@522 325 except Topic.DoesNotExist:
bgneal@522 326 logger.warning("process_new_topic: topic %d does not exist", topic_id)
bgneal@522 327 return
bgneal@522 328
bgneal@522 329 # selectively process topics from non-public forums
bgneal@522 330 public_forums = Forum.objects.public_forum_ids()
bgneal@522 331
bgneal@522 332 if topic.forum.id not in public_forums:
bgneal@522 333 return
bgneal@522 334
bgneal@522 335 # update the topic count statistic
bgneal@523 336 conn = get_redis_connection()
bgneal@522 337
bgneal@523 338 result = conn.incr(TOPIC_COUNT_KEY)
bgneal@522 339 if result == 1:
bgneal@522 340 # it is likely redis got trashed, so re-compute the correct value
bgneal@522 341
bgneal@522 342 count = Topic.objects.filter(forum__in=public_forums).count()
bgneal@523 343 conn.set(TOPIC_COUNT_KEY, count)
bgneal@522 344
bgneal@522 345
bgneal@595 346 def process_updated_topic(topic_id):
bgneal@595 347 """
bgneal@595 348 This function contains updated topic processing. Update the title only.
bgneal@595 349
bgneal@595 350 """
bgneal@595 351 conn = get_redis_connection()
bgneal@595 352 key = UPDATED_TOPIC_KEY % topic_id
bgneal@595 353 json = conn.get(key)
bgneal@595 354 if json is not None:
bgneal@595 355 try:
bgneal@595 356 topic = Topic.objects.get(pk=topic_id)
bgneal@595 357 except Topic.DoesNotExist:
bgneal@595 358 logger.warning("topic %d does not exist", topic_id)
bgneal@595 359 return
bgneal@595 360
bgneal@595 361 topic_dict = simplejson.loads(json)
bgneal@595 362
bgneal@595 363 if topic.name != topic_dict['title']:
bgneal@595 364 topic_dict['title'] = topic.name
bgneal@595 365 json = simplejson.dumps(topic_dict)
bgneal@595 366 conn.set(key, json)
bgneal@595 367
bgneal@595 368
bgneal@522 369 def get_stats():
bgneal@522 370 """
bgneal@522 371 This function returns the topic and post count statistics as a tuple, in
bgneal@522 372 that order. If a statistic is not available, its position in the tuple will
bgneal@522 373 be None.
bgneal@522 374
bgneal@522 375 """
bgneal@522 376 try:
bgneal@523 377 conn = get_redis_connection()
bgneal@523 378 result = conn.mget(TOPIC_COUNT_KEY, POST_COUNT_KEY)
bgneal@522 379 except redis.RedisError, e:
bgneal@522 380 logger.error(e)
bgneal@522 381 return (None, None)
bgneal@522 382
bgneal@522 383 topic_count = int(result[0]) if result[0] else None
bgneal@522 384 post_count = int(result[1]) if result[1] else None
bgneal@522 385
bgneal@522 386 return (topic_count, post_count)
bgneal@522 387
bgneal@522 388
bgneal@522 389 def get_latest_topic_ids(num):
bgneal@522 390 """
bgneal@522 391 Return a list of topic ids from the latest topics that have posts. The ids
bgneal@522 392 will be sorted from newest to oldest.
bgneal@522 393
bgneal@522 394 """
bgneal@522 395 try:
bgneal@523 396 conn = get_redis_connection()
bgneal@523 397 result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1)
bgneal@522 398 except redis.RedisError, e:
bgneal@522 399 logger.error(e)
bgneal@522 400 return []
bgneal@522 401
bgneal@522 402 return [int(n) for n in result]
bgneal@522 403
bgneal@522 404
bgneal@522 405 def get_latest_topics(num):
bgneal@522 406 """
bgneal@522 407 Return a list of dictionaries with information about the latest topics that
bgneal@522 408 have updated posts. The topics are sorted from newest to oldest.
bgneal@522 409
bgneal@522 410 """
bgneal@522 411 try:
bgneal@523 412 conn = get_redis_connection()
bgneal@523 413 result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1)
bgneal@522 414
bgneal@522 415 topic_keys = [UPDATED_TOPIC_KEY % n for n in result]
bgneal@524 416 json_list = conn.mget(topic_keys) if topic_keys else []
bgneal@522 417
bgneal@522 418 except redis.RedisError, e:
bgneal@522 419 logger.error(e)
bgneal@522 420 return []
bgneal@522 421
bgneal@522 422 topics = []
bgneal@522 423 for s in json_list:
bgneal@522 424 item = simplejson.loads(s)
bgneal@522 425 item['date'] = datetime.datetime.fromtimestamp(item['date'])
bgneal@522 426 topics.append(item)
bgneal@522 427
bgneal@522 428 return topics
bgneal@522 429
bgneal@522 430
bgneal@522 431 def notify_topic_delete(topic):
bgneal@522 432 """
bgneal@522 433 This function should be called when a topic is deleted. It will remove the
bgneal@522 434 topic from the updated topics set, if present, and delete any info we have
bgneal@522 435 about the topic.
bgneal@522 436
bgneal@522 437 Note we don't do anything like this for posts. Since they just populate RSS
bgneal@522 438 feeds we'll let them 404. The updated topic list is seen in a prominent
bgneal@522 439 template tag however, so it is a bit more important to get that cleaned up.
bgneal@522 440
bgneal@522 441 """
bgneal@522 442 try:
bgneal@523 443 conn = get_redis_connection()
bgneal@523 444 pipeline = conn.pipeline()
bgneal@522 445 pipeline.zrem(UPDATED_TOPICS_SET_KEY, topic.id)
bgneal@522 446 pipeline.delete(UPDATED_TOPIC_KEY % topic.id)
bgneal@522 447 pipeline.execute()
bgneal@522 448 except redis.RedisError, e:
bgneal@522 449 logger.error(e)
bgneal@522 450
bgneal@522 451
bgneal@595 452 def _serialize_post(post):
bgneal@595 453 """Serialize a post to JSON and return it.
bgneal@595 454
bgneal@595 455 """
bgneal@595 456 # get any attachments for the post
bgneal@595 457
bgneal@595 458 attachments = Attachment.objects.filter(post=post).select_related(
bgneal@595 459 'embed').order_by('order')
bgneal@595 460 embeds = [item.embed for item in attachments]
bgneal@595 461 if len(embeds) == 0:
bgneal@595 462 content = post.html
bgneal@595 463 else:
bgneal@595 464 content = render_to_string('forums/post_rss.html', {
bgneal@595 465 'post': post,
bgneal@595 466 'embeds': embeds,
bgneal@595 467 })
bgneal@595 468
bgneal@595 469 # serialize post attributes
bgneal@595 470 post_content = {
bgneal@595 471 'id': post.id,
bgneal@595 472 'title': post.topic.name,
bgneal@595 473 'content': content,
bgneal@595 474 'author': post.user.username,
bgneal@595 475 'pubdate': int(time.mktime(post.creation_date.timetuple())),
bgneal@595 476 'forum_name': post.topic.forum.name,
bgneal@595 477 'url': post.get_absolute_url()
bgneal@595 478 }
bgneal@595 479
bgneal@595 480 return simplejson.dumps(post_content)
bgneal@595 481
bgneal@595 482
bgneal@522 483 # Down here to avoid a circular import
bgneal@522 484 import forums.tasks