Mercurial > public > sg101
diff forums/latest.py @ 581:ee87ea74d46b
For Django 1.4, rearranged project structure for new manage.py.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 05 May 2012 17:10:48 -0500 |
parents | gpp/forums/latest.py@7388cdf61b25 |
children | 2469d5864249 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/forums/latest.py Sat May 05 17:10:48 2012 -0500 @@ -0,0 +1,342 @@ +""" +This module maintains the latest posts datastore. The latest posts are often +needed by RSS feeds, "latest posts" template tags, etc. This module listens for +the post_content_update signal, then bundles the post up and stores it by forum +ID in Redis. We also maintain a combined forums list. This allows quick +retrieval of the latest posts and avoids some slow SQL queries. + +We also do things like send topic notification emails, auto-favorite, and +auto-subscribe functions here rather than bog the user down in the request / +response cycle. + +""" +import datetime +import logging +import time + +from django.dispatch import receiver +from django.utils import simplejson +import redis + +from forums.signals import post_content_update, topic_content_update +from forums.models import Forum, Topic, Post +from forums.views.subscriptions import notify_topic_subscribers +from forums.tools import auto_favorite, auto_subscribe +from core.services import get_redis_connection + +# This constant controls how many latest posts per forum we store +MAX_POSTS = 50 + +# This controls how many updated topics we track +MAX_UPDATED_TOPICS = 50 + +# Redis key names: +POST_COUNT_KEY = "forums:public_post_count" +TOPIC_COUNT_KEY = "forums:public_topic_count" +UPDATED_TOPICS_SET_KEY = "forums:updated_topics:set" +UPDATED_TOPIC_KEY = "forums:updated_topics:%s" + +logger = logging.getLogger(__name__) + + +@receiver(post_content_update, dispatch_uid='forums.latest_posts') +def on_post_update(sender, **kwargs): + """ + This function is our signal handler, called when a post has been updated. + We only care about newly created posts, and ignore updates. + + We kick off a Celery task to perform work outside of the request/response + cycle. + + """ + # ignore non-new posts + if not kwargs['created']: + return + + # Kick off a Celery task to process this new post + forums.tasks.new_post_task.delay(sender.id) + + +def process_new_post(post_id): + """ + This function is run on a Celery task. It performs all new-post processing. + + """ + try: + post = Post.objects.select_related().get(pk=post_id) + except Post.DoesNotExist: + logger.warning("process_new_post: post %d does not exist", post_id) + return + + # selectively process posts from non-public forums + public_forums = Forum.objects.public_forum_ids() + + if post.topic.forum.id in public_forums: + conn = get_redis_connection() + _update_post_feeds(conn, post) + _update_post_count(conn, public_forums) + _update_latest_topics(conn, post) + + # send out any email notifications + notify_topic_subscribers(post, defer=False) + + # perform any auto-favorite and auto-subscribe actions for the new post + auto_favorite(post) + auto_subscribe(post) + + +def _update_post_feeds(conn, post): + """ + Updates the forum feeds we keep in Redis so that our RSS feeds are quick. + + """ + # serialize post attributes + post_content = { + 'id': post.id, + 'title': post.topic.name, + 'content': post.html, + 'author': post.user.username, + 'pubdate': int(time.mktime(post.creation_date.timetuple())), + 'forum_name': post.topic.forum.name, + 'url': post.get_absolute_url() + } + + s = simplejson.dumps(post_content) + + # store in Redis + + pipeline = conn.pipeline() + + key = 'forums:latest:%d' % post.topic.forum.id + + pipeline.lpush(key, s) + pipeline.ltrim(key, 0, MAX_POSTS - 1) + + # store in the combined feed; yes this wastes some memory storing it twice, + # but it makes things much easier + + key = 'forums:latest:*' + + pipeline.lpush(key, s) + pipeline.ltrim(key, 0, MAX_POSTS - 1) + + pipeline.execute() + + +def _update_post_count(conn, public_forums): + """ + Updates the post count we cache in Redis. Doing a COUNT(*) on the post table + can be expensive in MySQL InnoDB. + + """ + result = conn.incr(POST_COUNT_KEY) + if result == 1: + # it is likely redis got trashed, so re-compute the correct value + + count = Post.objects.filter(topic__forum__in=public_forums).count() + conn.set(POST_COUNT_KEY, count) + + +def _update_latest_topics(conn, post): + """ + Updates the "latest topics with new posts" list we cache in Redis for speed. + There is a template tag and forum view that uses this information. + + """ + # serialize topic attributes + topic_id = post.topic.id + topic_score = int(time.mktime(post.creation_date.timetuple())) + + topic_content = { + 'title': post.topic.name, + 'author': post.user.username, + 'date': topic_score, + 'url': post.topic.get_latest_post_url() + } + json = simplejson.dumps(topic_content) + key = UPDATED_TOPIC_KEY % topic_id + + pipeline = conn.pipeline() + pipeline.set(key, json) + pipeline.zadd(UPDATED_TOPICS_SET_KEY, topic_score, topic_id) + pipeline.zcard(UPDATED_TOPICS_SET_KEY) + results = pipeline.execute() + + # delete topics beyond our maximum count + num_topics = results[-1] + num_to_del = num_topics - MAX_UPDATED_TOPICS + if num_to_del > 0: + # get the IDs of the topics we need to delete first + start = 0 + stop = num_to_del - 1 # Redis indices are inclusive + old_ids = conn.zrange(UPDATED_TOPICS_SET_KEY, start, stop) + + keys = [UPDATED_TOPIC_KEY % n for n in old_ids] + conn.delete(*keys) + + # now delete the oldest num_to_del topics + conn.zremrangebyrank(UPDATED_TOPICS_SET_KEY, start, stop) + + +def get_latest_posts(num_posts=MAX_POSTS, forum_id=None): + """ + This function retrieves num_posts latest posts for the forum with the given + forum_id. If forum_id is None, the posts are retrieved from the combined + forums datastore. A list of dictionaries is returned. Each dictionary + contains information about a post. + + """ + key = 'forums:latest:%d' % forum_id if forum_id else 'forums:latest:*' + + num_posts = max(0, min(MAX_POSTS, num_posts)) + + if num_posts == 0: + return [] + + conn = get_redis_connection() + raw_posts = conn.lrange(key, 0, num_posts - 1) + + posts = [] + for raw_post in raw_posts: + post = simplejson.loads(raw_post) + + # fix up the pubdate; turn it back into a datetime object + post['pubdate'] = datetime.datetime.fromtimestamp(post['pubdate']) + + posts.append(post) + + return posts + + +@receiver(topic_content_update, dispatch_uid='forums.latest_posts') +def on_topic_update(sender, **kwargs): + """ + This function is our signal handler, called when a topic has been updated. + We only care about newly created topics, and ignore updates. + + We kick off a Celery task to perform work outside of the request/response + cycle. + + """ + # ignore non-new topics + if not kwargs['created']: + return + + # Kick off a Celery task to process this new post + forums.tasks.new_topic_task.delay(sender.id) + + +def process_new_topic(topic_id): + """ + This function contains new topic processing. Currently we only update the + topic count statistic. + + """ + try: + topic = Topic.objects.select_related().get(pk=topic_id) + except Topic.DoesNotExist: + logger.warning("process_new_topic: topic %d does not exist", topic_id) + return + + # selectively process topics from non-public forums + public_forums = Forum.objects.public_forum_ids() + + if topic.forum.id not in public_forums: + return + + # update the topic count statistic + conn = get_redis_connection() + + result = conn.incr(TOPIC_COUNT_KEY) + if result == 1: + # it is likely redis got trashed, so re-compute the correct value + + count = Topic.objects.filter(forum__in=public_forums).count() + conn.set(TOPIC_COUNT_KEY, count) + + +def get_stats(): + """ + This function returns the topic and post count statistics as a tuple, in + that order. If a statistic is not available, its position in the tuple will + be None. + + """ + try: + conn = get_redis_connection() + result = conn.mget(TOPIC_COUNT_KEY, POST_COUNT_KEY) + except redis.RedisError, e: + logger.error(e) + return (None, None) + + topic_count = int(result[0]) if result[0] else None + post_count = int(result[1]) if result[1] else None + + return (topic_count, post_count) + + +def get_latest_topic_ids(num): + """ + Return a list of topic ids from the latest topics that have posts. The ids + will be sorted from newest to oldest. + + """ + try: + conn = get_redis_connection() + result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1) + except redis.RedisError, e: + logger.error(e) + return [] + + return [int(n) for n in result] + + +def get_latest_topics(num): + """ + Return a list of dictionaries with information about the latest topics that + have updated posts. The topics are sorted from newest to oldest. + + """ + try: + conn = get_redis_connection() + result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1) + + topic_keys = [UPDATED_TOPIC_KEY % n for n in result] + json_list = conn.mget(topic_keys) if topic_keys else [] + + except redis.RedisError, e: + logger.error(e) + return [] + + topics = [] + for s in json_list: + item = simplejson.loads(s) + item['date'] = datetime.datetime.fromtimestamp(item['date']) + topics.append(item) + + return topics + + +def notify_topic_delete(topic): + """ + This function should be called when a topic is deleted. It will remove the + topic from the updated topics set, if present, and delete any info we have + about the topic. + + Note we don't do anything like this for posts. Since they just populate RSS + feeds we'll let them 404. The updated topic list is seen in a prominent + template tag however, so it is a bit more important to get that cleaned up. + + """ + try: + conn = get_redis_connection() + pipeline = conn.pipeline() + pipeline.zrem(UPDATED_TOPICS_SET_KEY, topic.id) + pipeline.delete(UPDATED_TOPIC_KEY % topic.id) + pipeline.execute() + except redis.RedisError, e: + logger.error(e) + + +# Down here to avoid a circular import +import forums.tasks