annotate forums/latest.py @ 1163:44e55e4317f8

Merge with mainline.
author Brian Neal <bgneal@gmail.com>
date Tue, 07 Mar 2017 19:47:18 -0600
parents 7429c98c8ece
children 90e8cc6eff77
rev   line source
bgneal@509 1 """
bgneal@509 2 This module maintains the latest posts datastore. The latest posts are often
bgneal@509 3 needed by RSS feeds, "latest posts" template tags, etc. This module listens for
bgneal@509 4 the post_content_update signal, then bundles the post up and stores it by forum
bgneal@509 5 ID in Redis. We also maintain a combined forums list. This allows quick
bgneal@509 6 retrieval of the latest posts and avoids some slow SQL queries.
bgneal@509 7
bgneal@522 8 We also do things like send topic notification emails, auto-favorite, and
bgneal@522 9 auto-subscribe functions here rather than bog the user down in the request /
bgneal@522 10 response cycle.
bgneal@522 11
bgneal@509 12 """
bgneal@595 13 # Maintenance notes:
bgneal@595 14 # How we use Redis in this module:
bgneal@595 15 #
bgneal@595 16 # Forum post processing:
bgneal@595 17 #
bgneal@595 18 # * Forum posts are turned into Python dictionaries, then converted to JSON and
bgneal@595 19 # stored under keys: forums:post:id
bgneal@595 20 # * Each forum has a list in Redis stored under the key: forums:rss:id. This
bgneal@595 21 # is a list of post IDs.
bgneal@595 22 # * There is also a key called forums:rss:* which is the combined latest
bgneal@595 23 # feed. It is also a list of post IDs.
bgneal@595 24 # * A sorted set is maintained that keeps track of the reference count for each
bgneal@595 25 # post. When a new post is created, this reference count is 2 because it is
bgneal@595 26 # stored in both the combined list and the parent forum list.
bgneal@595 27 # This sorted set is stored under the key: forums:post_ref_cnt.
bgneal@595 28 # * When a post falls off a list due to aging, the reference count in the
bgneal@595 29 # ordered set is decremented. If it falls to zero, the post's key is deleted
bgneal@595 30 # from Redis.
bgneal@595 31 # * When a post is edited, and it is in Redis, we simply update the JSON
bgneal@595 32 # content.
bgneal@595 33 # * When a post is deleted, and it is in Redis, it is removed from the 2 lists,
bgneal@595 34 # the ordered set, and deleted from Redis.
bgneal@595 35 # * When the RSS feed wants to update, it simply pulls down the entire list of
bgneal@595 36 # post IDs for the feed of interest, then does a get on all the posts.
bgneal@595 37 #
bgneal@595 38 # Topics with recent posts processing:
bgneal@595 39 #
bgneal@595 40 # * A key is created for each topic that is updated.
bgneal@595 41 # * An ordered set of topics is maintained with the current time as the score.
bgneal@595 42 # * An updated topic gets its score bumped.
bgneal@595 43 # * We only allow MAX_UPDATED_TOPICS number of topics in the set. We sort the
bgneal@595 44 # set by score, and the expired topics are removed from the set and their keys
bgneal@595 45 # are deleted from Redis.
bgneal@595 46 # * The template tag (or anyone) who wants the list of topics with new posts
bgneal@595 47 # gets the list of IDs sorted by score from newest to oldest. An mget is then
bgneal@595 48 # performed to get all the topic data and it is deserialized from JSON.
bgneal@595 49 #
bgneal@595 50 # We also maintain topic and post counts in Redis since select(*) can take a
bgneal@595 51 # while with MySQL InnoDb.
bgneal@595 52 #
bgneal@509 53 import datetime
bgneal@679 54 import json
bgneal@522 55 import logging
bgneal@509 56 import time
bgneal@509 57
bgneal@509 58 from django.dispatch import receiver
bgneal@594 59 from django.template.loader import render_to_string
bgneal@523 60 import redis
bgneal@509 61
bgneal@522 62 from forums.signals import post_content_update, topic_content_update
bgneal@594 63 from forums.models import Forum, Topic, Post, Attachment
bgneal@522 64 from forums.views.subscriptions import notify_topic_subscribers
bgneal@522 65 from forums.tools import auto_favorite, auto_subscribe
bgneal@509 66 from core.services import get_redis_connection
bgneal@792 67 from core.markup import site_markup
bgneal@509 68
bgneal@509 69 # This constant controls how many latest posts per forum we store
bgneal@509 70 MAX_POSTS = 50
bgneal@509 71
bgneal@522 72 # This controls how many updated topics we track
bgneal@522 73 MAX_UPDATED_TOPICS = 50
bgneal@522 74
bgneal@522 75 # Redis key names:
bgneal@522 76 POST_COUNT_KEY = "forums:public_post_count"
bgneal@522 77 TOPIC_COUNT_KEY = "forums:public_topic_count"
bgneal@522 78 UPDATED_TOPICS_SET_KEY = "forums:updated_topics:set"
bgneal@522 79 UPDATED_TOPIC_KEY = "forums:updated_topics:%s"
bgneal@595 80 POST_KEY = "forums:post:%s"
bgneal@595 81 FORUM_RSS_KEY = "forums:rss:%s"
bgneal@595 82 ALL_FORUMS_RSS_KEY = "forums:rss:*"
bgneal@595 83 POST_SET_KEY = "forums:post_ref_cnt"
bgneal@522 84
bgneal@522 85 logger = logging.getLogger(__name__)
bgneal@522 86
bgneal@509 87
bgneal@509 88 @receiver(post_content_update, dispatch_uid='forums.latest_posts')
bgneal@509 89 def on_post_update(sender, **kwargs):
bgneal@509 90 """
bgneal@595 91 This function is our signal handler, called when a post has been updated
bgneal@595 92 or created.
bgneal@509 93
bgneal@522 94 We kick off a Celery task to perform work outside of the request/response
bgneal@522 95 cycle.
bgneal@509 96
bgneal@509 97 """
bgneal@595 98 if kwargs['created']:
bgneal@595 99 forums.tasks.new_post_task.delay(sender.id)
bgneal@595 100 else:
bgneal@595 101 forums.tasks.updated_post_task.delay(sender.id)
bgneal@522 102
bgneal@522 103
bgneal@522 104 def process_new_post(post_id):
bgneal@522 105 """
bgneal@522 106 This function is run on a Celery task. It performs all new-post processing.
bgneal@522 107
bgneal@522 108 """
bgneal@522 109 try:
bgneal@522 110 post = Post.objects.select_related().get(pk=post_id)
bgneal@522 111 except Post.DoesNotExist:
bgneal@522 112 logger.warning("process_new_post: post %d does not exist", post_id)
bgneal@509 113 return
bgneal@509 114
bgneal@522 115 # selectively process posts from non-public forums
bgneal@522 116 public_forums = Forum.objects.public_forum_ids()
bgneal@522 117
bgneal@522 118 if post.topic.forum.id in public_forums:
bgneal@523 119 conn = get_redis_connection()
bgneal@523 120 _update_post_feeds(conn, post)
bgneal@523 121 _update_post_count(conn, public_forums)
bgneal@523 122 _update_latest_topics(conn, post)
bgneal@522 123
bgneal@522 124 # send out any email notifications
bgneal@522 125 notify_topic_subscribers(post, defer=False)
bgneal@522 126
bgneal@522 127 # perform any auto-favorite and auto-subscribe actions for the new post
bgneal@522 128 auto_favorite(post)
bgneal@522 129 auto_subscribe(post)
bgneal@522 130
bgneal@522 131
bgneal@595 132 def process_updated_post(post_id):
bgneal@595 133 """
bgneal@595 134 This function is run on a Celery task. It performs all updated-post
bgneal@595 135 processing.
bgneal@595 136
bgneal@595 137 """
bgneal@595 138 # Is this post ID in a RSS feed?
bgneal@595 139 conn = get_redis_connection()
bgneal@595 140 post_key = POST_KEY % post_id
bgneal@595 141 post_val = conn.get(post_key)
bgneal@595 142
bgneal@595 143 if post_val is not None:
bgneal@595 144 # Update the post value in Redis
bgneal@595 145 try:
bgneal@595 146 post = Post.objects.select_related().get(pk=post_id)
bgneal@595 147 except Post.DoesNotExist:
bgneal@595 148 logger.warning("process_updated_post: post %d does not exist", post_id)
bgneal@595 149 return
bgneal@595 150 conn.set(post_key, _serialize_post(post))
bgneal@595 151
bgneal@595 152
bgneal@523 153 def _update_post_feeds(conn, post):
bgneal@522 154 """
bgneal@522 155 Updates the forum feeds we keep in Redis so that our RSS feeds are quick.
bgneal@522 156
bgneal@522 157 """
bgneal@595 158 post_key = POST_KEY % post.id
bgneal@595 159 post_value = _serialize_post(post)
bgneal@509 160
bgneal@523 161 pipeline = conn.pipeline()
bgneal@509 162
bgneal@595 163 # Store serialized post content under its own key
bgneal@595 164 pipeline.set(post_key, post_value)
bgneal@509 165
bgneal@595 166 # Store in the RSS feed for the post's forum
bgneal@595 167 forum_key = FORUM_RSS_KEY % post.topic.forum.id
bgneal@595 168 pipeline.lpush(forum_key, post.id)
bgneal@509 169
bgneal@595 170 # Store in the RSS feed for combined forums
bgneal@595 171 pipeline.lpush(ALL_FORUMS_RSS_KEY, post.id)
bgneal@509 172
bgneal@595 173 # Store reference count for the post
bgneal@595 174 pipeline.zadd(POST_SET_KEY, 2, post.id)
bgneal@509 175
bgneal@595 176 results = pipeline.execute()
bgneal@509 177
bgneal@595 178 # Make sure our forums RSS lists lengths are not exceeded
bgneal@595 179
bgneal@595 180 if results[1] > MAX_POSTS or results[2] > MAX_POSTS:
bgneal@595 181 pipeline = conn.pipeline()
bgneal@595 182
bgneal@595 183 # Truncate lists of posts:
bgneal@595 184 if results[1] > MAX_POSTS:
bgneal@595 185 pipeline.rpop(forum_key)
bgneal@595 186 if results[2] > MAX_POSTS:
bgneal@595 187 pipeline.rpop(ALL_FORUMS_RSS_KEY)
bgneal@595 188 post_ids = pipeline.execute()
bgneal@595 189
bgneal@595 190 # Decrement reference count(s)
bgneal@595 191 pipeline = conn.pipeline()
bgneal@595 192 for post_id in post_ids:
bgneal@595 193 pipeline.zincrby(POST_SET_KEY, post_id, -1)
bgneal@595 194 scores = pipeline.execute()
bgneal@595 195
bgneal@595 196 # If any reference counts have fallen to 0, clean up:
bgneal@595 197 if not all(scores):
bgneal@595 198 pipeline = conn.pipeline()
bgneal@595 199
bgneal@595 200 # remove from post set
bgneal@595 201 ids = [post_ids[n] for n, s in enumerate(scores) if s <= 0.0]
bgneal@595 202 pipeline.zrem(POST_SET_KEY, *ids)
bgneal@595 203
bgneal@595 204 # remove serialized post data
bgneal@595 205 keys = [POST_KEY % n for n in ids]
bgneal@595 206 pipeline.delete(*keys)
bgneal@595 207
bgneal@595 208 pipeline.execute()
bgneal@509 209
bgneal@509 210
bgneal@523 211 def _update_post_count(conn, public_forums):
bgneal@522 212 """
bgneal@522 213 Updates the post count we cache in Redis. Doing a COUNT(*) on the post table
bgneal@522 214 can be expensive in MySQL InnoDB.
bgneal@522 215
bgneal@522 216 """
bgneal@523 217 result = conn.incr(POST_COUNT_KEY)
bgneal@522 218 if result == 1:
bgneal@522 219 # it is likely redis got trashed, so re-compute the correct value
bgneal@522 220
bgneal@522 221 count = Post.objects.filter(topic__forum__in=public_forums).count()
bgneal@523 222 conn.set(POST_COUNT_KEY, count)
bgneal@522 223
bgneal@522 224
bgneal@523 225 def _update_latest_topics(conn, post):
bgneal@522 226 """
bgneal@522 227 Updates the "latest topics with new posts" list we cache in Redis for speed.
bgneal@522 228 There is a template tag and forum view that uses this information.
bgneal@522 229
bgneal@522 230 """
bgneal@522 231 # serialize topic attributes
bgneal@522 232 topic_id = post.topic.id
bgneal@522 233 topic_score = int(time.mktime(post.creation_date.timetuple()))
bgneal@522 234
bgneal@522 235 topic_content = {
bgneal@522 236 'title': post.topic.name,
bgneal@522 237 'author': post.user.username,
bgneal@522 238 'date': topic_score,
bgneal@529 239 'url': post.topic.get_latest_post_url()
bgneal@522 240 }
bgneal@679 241 topic_json = json.dumps(topic_content)
bgneal@522 242 key = UPDATED_TOPIC_KEY % topic_id
bgneal@522 243
bgneal@523 244 pipeline = conn.pipeline()
bgneal@679 245 pipeline.set(key, topic_json)
bgneal@522 246 pipeline.zadd(UPDATED_TOPICS_SET_KEY, topic_score, topic_id)
bgneal@522 247 pipeline.zcard(UPDATED_TOPICS_SET_KEY)
bgneal@522 248 results = pipeline.execute()
bgneal@522 249
bgneal@522 250 # delete topics beyond our maximum count
bgneal@522 251 num_topics = results[-1]
bgneal@522 252 num_to_del = num_topics - MAX_UPDATED_TOPICS
bgneal@522 253 if num_to_del > 0:
bgneal@522 254 # get the IDs of the topics we need to delete first
bgneal@522 255 start = 0
bgneal@522 256 stop = num_to_del - 1 # Redis indices are inclusive
bgneal@523 257 old_ids = conn.zrange(UPDATED_TOPICS_SET_KEY, start, stop)
bgneal@522 258
bgneal@522 259 keys = [UPDATED_TOPIC_KEY % n for n in old_ids]
bgneal@523 260 conn.delete(*keys)
bgneal@522 261
bgneal@522 262 # now delete the oldest num_to_del topics
bgneal@523 263 conn.zremrangebyrank(UPDATED_TOPICS_SET_KEY, start, stop)
bgneal@522 264
bgneal@522 265
bgneal@509 266 def get_latest_posts(num_posts=MAX_POSTS, forum_id=None):
bgneal@509 267 """
bgneal@509 268 This function retrieves num_posts latest posts for the forum with the given
bgneal@509 269 forum_id. If forum_id is None, the posts are retrieved from the combined
bgneal@509 270 forums datastore. A list of dictionaries is returned. Each dictionary
bgneal@509 271 contains information about a post.
bgneal@509 272
bgneal@509 273 """
bgneal@595 274 key = FORUM_RSS_KEY % forum_id if forum_id else ALL_FORUMS_RSS_KEY
bgneal@509 275
bgneal@509 276 num_posts = max(0, min(MAX_POSTS, num_posts))
bgneal@509 277
bgneal@509 278 if num_posts == 0:
bgneal@509 279 return []
bgneal@509 280
bgneal@523 281 conn = get_redis_connection()
bgneal@595 282 post_ids = conn.lrange(key, 0, num_posts - 1)
bgneal@595 283 if not post_ids:
bgneal@595 284 return []
bgneal@595 285
bgneal@595 286 post_keys = [POST_KEY % n for n in post_ids]
bgneal@595 287 raw_posts = conn.mget(post_keys)
bgneal@595 288 raw_posts = [s for s in raw_posts if s is not None]
bgneal@509 289
bgneal@509 290 posts = []
bgneal@509 291 for raw_post in raw_posts:
bgneal@679 292 post = json.loads(raw_post)
bgneal@509 293
bgneal@509 294 # fix up the pubdate; turn it back into a datetime object
bgneal@509 295 post['pubdate'] = datetime.datetime.fromtimestamp(post['pubdate'])
bgneal@509 296
bgneal@509 297 posts.append(post)
bgneal@509 298
bgneal@509 299 return posts
bgneal@522 300
bgneal@522 301
bgneal@522 302 @receiver(topic_content_update, dispatch_uid='forums.latest_posts')
bgneal@522 303 def on_topic_update(sender, **kwargs):
bgneal@522 304 """
bgneal@595 305 This function is our signal handler, called when a topic has been updated
bgneal@595 306 or created.
bgneal@522 307
bgneal@522 308 We kick off a Celery task to perform work outside of the request/response
bgneal@522 309 cycle.
bgneal@522 310
bgneal@522 311 """
bgneal@595 312 if kwargs['created']:
bgneal@595 313 forums.tasks.new_topic_task.delay(sender.id)
bgneal@595 314 else:
bgneal@595 315 forums.tasks.updated_topic_task.delay(sender.id)
bgneal@522 316
bgneal@522 317
bgneal@522 318 def process_new_topic(topic_id):
bgneal@522 319 """
bgneal@522 320 This function contains new topic processing. Currently we only update the
bgneal@522 321 topic count statistic.
bgneal@522 322
bgneal@522 323 """
bgneal@522 324 try:
bgneal@522 325 topic = Topic.objects.select_related().get(pk=topic_id)
bgneal@522 326 except Topic.DoesNotExist:
bgneal@522 327 logger.warning("process_new_topic: topic %d does not exist", topic_id)
bgneal@522 328 return
bgneal@522 329
bgneal@522 330 # selectively process topics from non-public forums
bgneal@522 331 public_forums = Forum.objects.public_forum_ids()
bgneal@522 332
bgneal@522 333 if topic.forum.id not in public_forums:
bgneal@522 334 return
bgneal@522 335
bgneal@522 336 # update the topic count statistic
bgneal@523 337 conn = get_redis_connection()
bgneal@522 338
bgneal@523 339 result = conn.incr(TOPIC_COUNT_KEY)
bgneal@522 340 if result == 1:
bgneal@522 341 # it is likely redis got trashed, so re-compute the correct value
bgneal@522 342
bgneal@522 343 count = Topic.objects.filter(forum__in=public_forums).count()
bgneal@523 344 conn.set(TOPIC_COUNT_KEY, count)
bgneal@522 345
bgneal@522 346
bgneal@595 347 def process_updated_topic(topic_id):
bgneal@595 348 """
bgneal@595 349 This function contains updated topic processing. Update the title only.
bgneal@595 350
bgneal@595 351 """
bgneal@595 352 conn = get_redis_connection()
bgneal@595 353 key = UPDATED_TOPIC_KEY % topic_id
bgneal@679 354 topic_json = conn.get(key)
bgneal@679 355 if topic_json is not None:
bgneal@595 356 try:
bgneal@595 357 topic = Topic.objects.get(pk=topic_id)
bgneal@595 358 except Topic.DoesNotExist:
bgneal@595 359 logger.warning("topic %d does not exist", topic_id)
bgneal@595 360 return
bgneal@595 361
bgneal@679 362 topic_dict = json.loads(topic_json)
bgneal@595 363
bgneal@595 364 if topic.name != topic_dict['title']:
bgneal@595 365 topic_dict['title'] = topic.name
bgneal@679 366 topic_json = json.dumps(topic_dict)
bgneal@679 367 conn.set(key, topic_json)
bgneal@595 368
bgneal@595 369
bgneal@522 370 def get_stats():
bgneal@522 371 """
bgneal@522 372 This function returns the topic and post count statistics as a tuple, in
bgneal@522 373 that order. If a statistic is not available, its position in the tuple will
bgneal@522 374 be None.
bgneal@522 375
bgneal@522 376 """
bgneal@522 377 try:
bgneal@523 378 conn = get_redis_connection()
bgneal@523 379 result = conn.mget(TOPIC_COUNT_KEY, POST_COUNT_KEY)
bgneal@522 380 except redis.RedisError, e:
bgneal@522 381 logger.error(e)
bgneal@522 382 return (None, None)
bgneal@522 383
bgneal@522 384 topic_count = int(result[0]) if result[0] else None
bgneal@522 385 post_count = int(result[1]) if result[1] else None
bgneal@522 386
bgneal@522 387 return (topic_count, post_count)
bgneal@522 388
bgneal@522 389
bgneal@522 390 def get_latest_topic_ids(num):
bgneal@522 391 """
bgneal@522 392 Return a list of topic ids from the latest topics that have posts. The ids
bgneal@522 393 will be sorted from newest to oldest.
bgneal@522 394
bgneal@522 395 """
bgneal@522 396 try:
bgneal@523 397 conn = get_redis_connection()
bgneal@523 398 result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1)
bgneal@522 399 except redis.RedisError, e:
bgneal@522 400 logger.error(e)
bgneal@522 401 return []
bgneal@522 402
bgneal@522 403 return [int(n) for n in result]
bgneal@522 404
bgneal@522 405
bgneal@522 406 def get_latest_topics(num):
bgneal@522 407 """
bgneal@522 408 Return a list of dictionaries with information about the latest topics that
bgneal@522 409 have updated posts. The topics are sorted from newest to oldest.
bgneal@522 410
bgneal@522 411 """
bgneal@522 412 try:
bgneal@523 413 conn = get_redis_connection()
bgneal@523 414 result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1)
bgneal@522 415
bgneal@522 416 topic_keys = [UPDATED_TOPIC_KEY % n for n in result]
bgneal@524 417 json_list = conn.mget(topic_keys) if topic_keys else []
bgneal@522 418
bgneal@522 419 except redis.RedisError, e:
bgneal@522 420 logger.error(e)
bgneal@522 421 return []
bgneal@522 422
bgneal@522 423 topics = []
bgneal@522 424 for s in json_list:
bgneal@679 425 item = json.loads(s)
bgneal@522 426 item['date'] = datetime.datetime.fromtimestamp(item['date'])
bgneal@522 427 topics.append(item)
bgneal@522 428
bgneal@522 429 return topics
bgneal@522 430
bgneal@522 431
bgneal@522 432 def notify_topic_delete(topic):
bgneal@522 433 """
bgneal@522 434 This function should be called when a topic is deleted. It will remove the
bgneal@522 435 topic from the updated topics set, if present, and delete any info we have
bgneal@522 436 about the topic.
bgneal@522 437
bgneal@522 438 Note we don't do anything like this for posts. Since they just populate RSS
bgneal@522 439 feeds we'll let them 404. The updated topic list is seen in a prominent
bgneal@522 440 template tag however, so it is a bit more important to get that cleaned up.
bgneal@522 441
bgneal@522 442 """
bgneal@522 443 try:
bgneal@523 444 conn = get_redis_connection()
bgneal@523 445 pipeline = conn.pipeline()
bgneal@522 446 pipeline.zrem(UPDATED_TOPICS_SET_KEY, topic.id)
bgneal@522 447 pipeline.delete(UPDATED_TOPIC_KEY % topic.id)
bgneal@522 448 pipeline.execute()
bgneal@522 449 except redis.RedisError, e:
bgneal@522 450 logger.error(e)
bgneal@522 451
bgneal@522 452
bgneal@595 453 def _serialize_post(post):
bgneal@595 454 """Serialize a post to JSON and return it.
bgneal@595 455
bgneal@595 456 """
bgneal@792 457 # Use absolute URLs for smileys for RSS. This means we have to reconvert the
bgneal@792 458 # post Markdown to HTML.
bgneal@792 459 content = site_markup(post.body, relative_urls=False)
bgneal@792 460
bgneal@595 461 # get any attachments for the post
bgneal@595 462 attachments = Attachment.objects.filter(post=post).select_related(
bgneal@595 463 'embed').order_by('order')
bgneal@595 464 embeds = [item.embed for item in attachments]
bgneal@792 465 if len(embeds):
bgneal@595 466 content = render_to_string('forums/post_rss.html', {
bgneal@792 467 'content': content,
bgneal@595 468 'embeds': embeds,
bgneal@595 469 })
bgneal@595 470
bgneal@595 471 # serialize post attributes
bgneal@595 472 post_content = {
bgneal@595 473 'id': post.id,
bgneal@595 474 'title': post.topic.name,
bgneal@595 475 'content': content,
bgneal@595 476 'author': post.user.username,
bgneal@595 477 'pubdate': int(time.mktime(post.creation_date.timetuple())),
bgneal@595 478 'forum_name': post.topic.forum.name,
bgneal@595 479 'url': post.get_absolute_url()
bgneal@595 480 }
bgneal@595 481
bgneal@679 482 return json.dumps(post_content)
bgneal@595 483
bgneal@595 484
bgneal@522 485 # Down here to avoid a circular import
bgneal@522 486 import forums.tasks