diff forums/latest.py @ 581:ee87ea74d46b

For Django 1.4, rearranged project structure for new manage.py.
author Brian Neal <bgneal@gmail.com>
date Sat, 05 May 2012 17:10:48 -0500
parents gpp/forums/latest.py@7388cdf61b25
children 2469d5864249
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/forums/latest.py	Sat May 05 17:10:48 2012 -0500
@@ -0,0 +1,342 @@
+"""
+This module maintains the latest posts datastore. The latest posts are often
+needed by RSS feeds, "latest posts" template tags, etc. This module listens for
+the post_content_update signal, then bundles the post up and stores it by forum
+ID in Redis. We also maintain a combined forums list. This allows quick
+retrieval of the latest posts and avoids some slow SQL queries.
+
+We also do things like send topic notification emails, auto-favorite, and
+auto-subscribe functions here rather than bog the user down in the request /
+response cycle.
+
+"""
+import datetime
+import logging
+import time
+
+from django.dispatch import receiver
+from django.utils import simplejson
+import redis
+
+from forums.signals import post_content_update, topic_content_update
+from forums.models import Forum, Topic, Post
+from forums.views.subscriptions import notify_topic_subscribers
+from forums.tools import auto_favorite, auto_subscribe
+from core.services import get_redis_connection
+
+# This constant controls how many latest posts per forum we store
+MAX_POSTS = 50
+
+# This controls how many updated topics we track
+MAX_UPDATED_TOPICS = 50
+
+# Redis key names:
+POST_COUNT_KEY = "forums:public_post_count"
+TOPIC_COUNT_KEY = "forums:public_topic_count"
+UPDATED_TOPICS_SET_KEY = "forums:updated_topics:set"
+UPDATED_TOPIC_KEY = "forums:updated_topics:%s"
+
+logger = logging.getLogger(__name__)
+
+
+@receiver(post_content_update, dispatch_uid='forums.latest_posts')
+def on_post_update(sender, **kwargs):
+    """
+    This function is our signal handler, called when a post has been updated.
+    We only care about newly created posts, and ignore updates.
+
+    We kick off a Celery task to perform work outside of the request/response
+    cycle.
+
+    """
+    # ignore non-new posts
+    if not kwargs['created']:
+        return
+
+    # Kick off a Celery task to process this new post
+    forums.tasks.new_post_task.delay(sender.id)
+
+
+def process_new_post(post_id):
+    """
+    This function is run on a Celery task. It performs all new-post processing.
+
+    """
+    try:
+        post = Post.objects.select_related().get(pk=post_id)
+    except Post.DoesNotExist:
+        logger.warning("process_new_post: post %d does not exist", post_id)
+        return
+
+    # selectively process posts from non-public forums
+    public_forums = Forum.objects.public_forum_ids()
+
+    if post.topic.forum.id in public_forums:
+        conn = get_redis_connection()
+        _update_post_feeds(conn, post)
+        _update_post_count(conn, public_forums)
+        _update_latest_topics(conn, post)
+
+    # send out any email notifications
+    notify_topic_subscribers(post, defer=False)
+
+    # perform any auto-favorite and auto-subscribe actions for the new post
+    auto_favorite(post)
+    auto_subscribe(post)
+
+
+def _update_post_feeds(conn, post):
+    """
+    Updates the forum feeds we keep in Redis so that our RSS feeds are quick.
+
+    """
+    # serialize post attributes
+    post_content = {
+        'id': post.id,
+        'title': post.topic.name,
+        'content': post.html,
+        'author': post.user.username,
+        'pubdate': int(time.mktime(post.creation_date.timetuple())),
+        'forum_name': post.topic.forum.name,
+        'url': post.get_absolute_url()
+    }
+
+    s = simplejson.dumps(post_content)
+
+    # store in Redis
+
+    pipeline = conn.pipeline()
+
+    key = 'forums:latest:%d' % post.topic.forum.id
+
+    pipeline.lpush(key, s)
+    pipeline.ltrim(key, 0, MAX_POSTS - 1)
+
+    # store in the combined feed; yes this wastes some memory storing it twice,
+    # but it makes things much easier
+
+    key = 'forums:latest:*'
+
+    pipeline.lpush(key, s)
+    pipeline.ltrim(key, 0, MAX_POSTS - 1)
+
+    pipeline.execute()
+
+
+def _update_post_count(conn, public_forums):
+    """
+    Updates the post count we cache in Redis. Doing a COUNT(*) on the post table
+    can be expensive in MySQL InnoDB.
+
+    """
+    result = conn.incr(POST_COUNT_KEY)
+    if result == 1:
+        # it is likely redis got trashed, so re-compute the correct value
+
+        count = Post.objects.filter(topic__forum__in=public_forums).count()
+        conn.set(POST_COUNT_KEY, count)
+
+
+def _update_latest_topics(conn, post):
+    """
+    Updates the "latest topics with new posts" list we cache in Redis for speed.
+    There is a template tag and forum view that uses this information.
+
+    """
+    # serialize topic attributes
+    topic_id = post.topic.id
+    topic_score = int(time.mktime(post.creation_date.timetuple()))
+
+    topic_content = {
+        'title': post.topic.name,
+        'author': post.user.username,
+        'date': topic_score,
+        'url': post.topic.get_latest_post_url()
+    }
+    json = simplejson.dumps(topic_content)
+    key = UPDATED_TOPIC_KEY % topic_id
+
+    pipeline = conn.pipeline()
+    pipeline.set(key, json)
+    pipeline.zadd(UPDATED_TOPICS_SET_KEY, topic_score, topic_id)
+    pipeline.zcard(UPDATED_TOPICS_SET_KEY)
+    results = pipeline.execute()
+
+    # delete topics beyond our maximum count
+    num_topics = results[-1]
+    num_to_del = num_topics - MAX_UPDATED_TOPICS
+    if num_to_del > 0:
+        # get the IDs of the topics we need to delete first
+        start = 0
+        stop = num_to_del - 1   # Redis indices are inclusive
+        old_ids = conn.zrange(UPDATED_TOPICS_SET_KEY, start, stop)
+
+        keys = [UPDATED_TOPIC_KEY % n for n in old_ids]
+        conn.delete(*keys)
+
+        # now delete the oldest num_to_del topics
+        conn.zremrangebyrank(UPDATED_TOPICS_SET_KEY, start, stop)
+
+
+def get_latest_posts(num_posts=MAX_POSTS, forum_id=None):
+    """
+    This function retrieves num_posts latest posts for the forum with the given
+    forum_id. If forum_id is None, the posts are retrieved from the combined
+    forums datastore. A list of dictionaries is returned. Each dictionary
+    contains information about a post.
+
+    """
+    key = 'forums:latest:%d' % forum_id if forum_id else 'forums:latest:*'
+
+    num_posts = max(0, min(MAX_POSTS, num_posts))
+
+    if num_posts == 0:
+        return []
+
+    conn = get_redis_connection()
+    raw_posts = conn.lrange(key, 0, num_posts - 1)
+
+    posts = []
+    for raw_post in raw_posts:
+        post = simplejson.loads(raw_post)
+
+        # fix up the pubdate; turn it back into a datetime object
+        post['pubdate'] = datetime.datetime.fromtimestamp(post['pubdate'])
+
+        posts.append(post)
+
+    return posts
+
+
+@receiver(topic_content_update, dispatch_uid='forums.latest_posts')
+def on_topic_update(sender, **kwargs):
+    """
+    This function is our signal handler, called when a topic has been updated.
+    We only care about newly created topics, and ignore updates.
+
+    We kick off a Celery task to perform work outside of the request/response
+    cycle.
+
+    """
+    # ignore non-new topics
+    if not kwargs['created']:
+        return
+
+    # Kick off a Celery task to process this new post
+    forums.tasks.new_topic_task.delay(sender.id)
+
+
+def process_new_topic(topic_id):
+    """
+    This function contains new topic processing. Currently we only update the
+    topic count statistic.
+
+    """
+    try:
+        topic = Topic.objects.select_related().get(pk=topic_id)
+    except Topic.DoesNotExist:
+        logger.warning("process_new_topic: topic %d does not exist", topic_id)
+        return
+
+    # selectively process topics from non-public forums
+    public_forums = Forum.objects.public_forum_ids()
+
+    if topic.forum.id not in public_forums:
+        return
+
+    # update the topic count statistic
+    conn = get_redis_connection()
+
+    result = conn.incr(TOPIC_COUNT_KEY)
+    if result == 1:
+        # it is likely redis got trashed, so re-compute the correct value
+
+        count = Topic.objects.filter(forum__in=public_forums).count()
+        conn.set(TOPIC_COUNT_KEY, count)
+
+
+def get_stats():
+    """
+    This function returns the topic and post count statistics as a tuple, in
+    that order. If a statistic is not available, its position in the tuple will
+    be None.
+
+    """
+    try:
+        conn = get_redis_connection()
+        result = conn.mget(TOPIC_COUNT_KEY, POST_COUNT_KEY)
+    except redis.RedisError, e:
+        logger.error(e)
+        return (None, None)
+
+    topic_count = int(result[0]) if result[0] else None
+    post_count = int(result[1]) if result[1] else None
+
+    return (topic_count, post_count)
+
+
+def get_latest_topic_ids(num):
+    """
+    Return a list of topic ids from the latest topics that have posts. The ids
+    will be sorted from newest to oldest.
+
+    """
+    try:
+        conn = get_redis_connection()
+        result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1)
+    except redis.RedisError, e:
+        logger.error(e)
+        return []
+
+    return [int(n) for n in result]
+
+
+def get_latest_topics(num):
+    """
+    Return a list of dictionaries with information about the latest topics that
+    have updated posts. The topics are sorted from newest to oldest.
+
+    """
+    try:
+        conn = get_redis_connection()
+        result = conn.zrevrange(UPDATED_TOPICS_SET_KEY, 0, num - 1)
+
+        topic_keys = [UPDATED_TOPIC_KEY % n for n in result]
+        json_list = conn.mget(topic_keys) if topic_keys else []
+
+    except redis.RedisError, e:
+        logger.error(e)
+        return []
+
+    topics = []
+    for s in json_list:
+        item = simplejson.loads(s)
+        item['date'] = datetime.datetime.fromtimestamp(item['date'])
+        topics.append(item)
+
+    return topics
+
+
+def notify_topic_delete(topic):
+    """
+    This function should be called when a topic is deleted. It will remove the
+    topic from the updated topics set, if present, and delete any info we have
+    about the topic.
+
+    Note we don't do anything like this for posts. Since they just populate RSS
+    feeds we'll let them 404. The updated topic list is seen in a prominent
+    template tag however, so it is a bit more important to get that cleaned up.
+
+    """
+    try:
+        conn = get_redis_connection()
+        pipeline = conn.pipeline()
+        pipeline.zrem(UPDATED_TOPICS_SET_KEY, topic.id)
+        pipeline.delete(UPDATED_TOPIC_KEY % topic.id)
+        pipeline.execute()
+    except redis.RedisError, e:
+        logger.error(e)
+
+
+# Down here to avoid a circular import
+import forums.tasks