changeset 509:248dd8dd67f8

For #237, use Redis as the source of posts for the RSS feeds to hopefully eliminate some slow queries.
author Brian Neal <bgneal@gmail.com>
date Wed, 07 Dec 2011 01:08:54 +0000 (2011-12-07)
parents 6f5fff924877
children e6298cde9cc9
files gpp/forums/__init__.py gpp/forums/feeds.py gpp/forums/latest.py gpp/urls.py
diffstat 4 files changed, 127 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/gpp/forums/__init__.py	Sun Dec 04 19:53:27 2011 +0000
+++ b/gpp/forums/__init__.py	Wed Dec 07 01:08:54 2011 +0000
@@ -1,1 +1,2 @@
 import signals
+import latest
--- a/gpp/forums/feeds.py	Sun Dec 04 19:53:27 2011 +0000
+++ b/gpp/forums/feeds.py	Wed Dec 07 01:08:54 2011 +0000
@@ -1,11 +1,14 @@
-"""This file contains the feed class for the forums application."""
+"""
+This file contains the feed class for the forums application.
 
+"""
 from django.contrib.syndication.views import Feed
 from django.core.exceptions import ObjectDoesNotExist
 from django.shortcuts import get_object_or_404
 
 from forums.models import Forum, Topic, Post
 from core.functions import copyright_str
+from forums.latest import get_latest_posts
 
 
 class ForumsFeed(Feed):
@@ -16,9 +19,10 @@
     author_email = 'admin@surfguitar101.com'
 
     def get_object(self, request, forum_slug):
-        # only return public forums
+
         if forum_slug:
-            forum = get_object_or_404(Forum, slug=forum_slug)
+            forum = Forum.objects.get(slug=forum_slug)
+            # only return public forums
             if forum.id not in Forum.objects.public_forum_ids():
                 raise ObjectDoesNotExist
             return forum
@@ -52,38 +56,23 @@
         return copyright_str()
 
     def items(self, obj):
-        if obj is None:
-            # return a combined feed of public forum threads
-
-            # This was tricky to do without suffering a large performance
-            # impact. Because the number of forums is small, MySQL did not
-            # try to use an index and ended up searching all the topics for
-            # candidate posts. We work around this by first getting a small list
-            # of candidate topics, and then searching them. This is more
-            # queries but a *lot* more time efficient.
-
-            forum_ids = Forum.objects.public_forum_ids()
-            topic_ids = list(Topic.objects.filter(forum__in=forum_ids).order_by(
-                    '-update_date').values_list('id', flat=True)[:30])
-            items = Post.objects.filter(topic__in=topic_ids)
-
-        else:
-            items = Post.objects.filter(topic__forum=obj)
-
-        return items.order_by('-creation_date').select_related('topic', 'user',
-                'topic__forum')[:30]
+        forum_id = obj.id if obj else None
+        return get_latest_posts(forum_id=forum_id)
 
     def item_title(self, item):
-        return item.topic.name
+        return item['title']
 
     def item_description(self, item):
-        return item.html
+        return item['content']
 
     def item_author_name(self, item):
-        return item.user.username
+        return item['author']
 
     def item_pubdate(self, item):
-       return item.creation_date
+        return item['pubdate']
 
     def item_categories(self, item):
-       return (item.topic.forum.name, )
+        return [item['forum_name']]
+
+    def item_link(self, item):
+        return item['url']
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gpp/forums/latest.py	Wed Dec 07 01:08:54 2011 +0000
@@ -0,0 +1,107 @@
+"""
+This module maintains the latest posts datastore. The latest posts are often
+needed by RSS feeds, "latest posts" template tags, etc. This module listens for
+the post_content_update signal, then bundles the post up and stores it by forum
+ID in Redis. We also maintain a combined forums list. This allows quick
+retrieval of the latest posts and avoids some slow SQL queries.
+
+"""
+import datetime
+import time
+
+from django.dispatch import receiver
+from django.utils import simplejson
+
+from forums.signals import post_content_update
+from forums.models import Forum
+from core.services import get_redis_connection
+
+
+# This constant controls how many latest posts per forum we store
+MAX_POSTS = 50
+
+
+@receiver(post_content_update, dispatch_uid='forums.latest_posts')
+def on_post_update(sender, **kwargs):
+    """
+    This function is our signal handler, called when a post has been updated.
+    We only care about newly created posts, and ignore updates.
+
+    We serialize a post to JSON then store in two lists in Redis:
+        1. The list for the post's parent forum
+        2. The combined forum list
+
+    Note that we only store posts from public forums.
+
+    """
+    # ignore non-new posts
+    if not kwargs['created']:
+        return
+
+    # ignore posts from non-public forums
+    public_forums = Forum.objects.public_forum_ids()
+    if sender.topic.forum.id not in public_forums:
+        return
+
+    # serialize post attributes
+    post_content = {
+        'id': sender.id,
+        'title': sender.topic.name,
+        'content': sender.html,
+        'author': sender.user.username,
+        'pubdate': int(time.mktime(sender.creation_date.timetuple())),
+        'forum_name': sender.topic.forum.name,
+        'url': sender.get_absolute_url()
+    }
+
+    s = simplejson.dumps(post_content)
+
+    # store in Redis
+
+    redis = get_redis_connection()
+    pipeline = redis.pipeline()
+
+    key = 'forums:latest:%d' % sender.topic.forum.id
+
+    pipeline.lpush(key, s)
+    pipeline.ltrim(key, 0, MAX_POSTS - 1)
+
+    # store in the combined feed; yes this wastes some memory storing it twice,
+    # but it makes things much easier
+
+    key = 'forums:latest:*'
+
+    pipeline.lpush(key, s)
+    pipeline.ltrim(key, 0, MAX_POSTS - 1)
+
+    pipeline.execute()
+
+
+def get_latest_posts(num_posts=MAX_POSTS, forum_id=None):
+    """
+    This function retrieves num_posts latest posts for the forum with the given
+    forum_id. If forum_id is None, the posts are retrieved from the combined
+    forums datastore. A list of dictionaries is returned. Each dictionary
+    contains information about a post.
+
+    """
+    key = 'forums:latest:%d' % forum_id if forum_id else 'forums:latest:*'
+
+    num_posts = max(0, min(MAX_POSTS, num_posts))
+
+    if num_posts == 0:
+        return []
+
+    redis = get_redis_connection()
+    raw_posts = redis.lrange(key, 0, num_posts - 1)
+
+    posts = []
+    for raw_post in raw_posts:
+        post = simplejson.loads(raw_post)
+
+        # fix up the pubdate; turn it back into a datetime object
+        post['pubdate'] = datetime.datetime.fromtimestamp(post['pubdate'])
+
+        posts.append(post)
+
+    return posts
--- a/gpp/urls.py	Sun Dec 04 19:53:27 2011 +0000
+++ b/gpp/urls.py	Wed Dec 07 01:08:54 2011 +0000
@@ -28,11 +28,11 @@
        cache_page(LatestNewsFeed(), 6 * 60 * 60),
        name='feeds-news'),
    url(r'^feeds/forums/$',
-       cache_page(ForumsFeed(), 15 * 60),
+       cache_page(ForumsFeed(), 5 * 60),
        {'forum_slug': None},
        'feeds-forum_combined'),
    url(r'^feeds/forums/(?P<forum_slug>[\w\d-]+)/$',
-       cache_page(ForumsFeed(), 15 * 60),
+       cache_page(ForumsFeed(), 5 * 60),
        name='feeds-forum'),
    (r'^forums/', include('forums.urls')),
    (r'^irc/', include('irc.urls')),