Mercurial > public > sg101
changeset 509:248dd8dd67f8
For #237, use Redis as the source of posts for the RSS feeds to hopefully eliminate some slow queries.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Wed, 07 Dec 2011 01:08:54 +0000 |
parents | 6f5fff924877 |
children | e6298cde9cc9 |
files | gpp/forums/__init__.py gpp/forums/feeds.py gpp/forums/latest.py gpp/urls.py |
diffstat | 4 files changed, 127 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/gpp/forums/__init__.py Sun Dec 04 19:53:27 2011 +0000 +++ b/gpp/forums/__init__.py Wed Dec 07 01:08:54 2011 +0000 @@ -1,1 +1,2 @@ import signals +import latest
--- a/gpp/forums/feeds.py Sun Dec 04 19:53:27 2011 +0000 +++ b/gpp/forums/feeds.py Wed Dec 07 01:08:54 2011 +0000 @@ -1,11 +1,14 @@ -"""This file contains the feed class for the forums application.""" +""" +This file contains the feed class for the forums application. +""" from django.contrib.syndication.views import Feed from django.core.exceptions import ObjectDoesNotExist from django.shortcuts import get_object_or_404 from forums.models import Forum, Topic, Post from core.functions import copyright_str +from forums.latest import get_latest_posts class ForumsFeed(Feed): @@ -16,9 +19,10 @@ author_email = 'admin@surfguitar101.com' def get_object(self, request, forum_slug): - # only return public forums + if forum_slug: - forum = get_object_or_404(Forum, slug=forum_slug) + forum = Forum.objects.get(slug=forum_slug) + # only return public forums if forum.id not in Forum.objects.public_forum_ids(): raise ObjectDoesNotExist return forum @@ -52,38 +56,23 @@ return copyright_str() def items(self, obj): - if obj is None: - # return a combined feed of public forum threads - - # This was tricky to do without suffering a large performance - # impact. Because the number of forums is small, MySQL did not - # try to use an index and ended up searching all the topics for - # candidate posts. We work around this by first getting a small list - # of candidate topics, and then searching them. This is more - # queries but a *lot* more time efficient. - - forum_ids = Forum.objects.public_forum_ids() - topic_ids = list(Topic.objects.filter(forum__in=forum_ids).order_by( - '-update_date').values_list('id', flat=True)[:30]) - items = Post.objects.filter(topic__in=topic_ids) - - else: - items = Post.objects.filter(topic__forum=obj) - - return items.order_by('-creation_date').select_related('topic', 'user', - 'topic__forum')[:30] + forum_id = obj.id if obj else None + return get_latest_posts(forum_id=forum_id) def item_title(self, item): - return item.topic.name + return item['title'] def item_description(self, item): - return item.html + return item['content'] def item_author_name(self, item): - return item.user.username + return item['author'] def item_pubdate(self, item): - return item.creation_date + return item['pubdate'] def item_categories(self, item): - return (item.topic.forum.name, ) + return [item['forum_name']] + + def item_link(self, item): + return item['url']
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gpp/forums/latest.py Wed Dec 07 01:08:54 2011 +0000 @@ -0,0 +1,107 @@ +""" +This module maintains the latest posts datastore. The latest posts are often +needed by RSS feeds, "latest posts" template tags, etc. This module listens for +the post_content_update signal, then bundles the post up and stores it by forum +ID in Redis. We also maintain a combined forums list. This allows quick +retrieval of the latest posts and avoids some slow SQL queries. + +""" +import datetime +import time + +from django.dispatch import receiver +from django.utils import simplejson + +from forums.signals import post_content_update +from forums.models import Forum +from core.services import get_redis_connection + + +# This constant controls how many latest posts per forum we store +MAX_POSTS = 50 + + +@receiver(post_content_update, dispatch_uid='forums.latest_posts') +def on_post_update(sender, **kwargs): + """ + This function is our signal handler, called when a post has been updated. + We only care about newly created posts, and ignore updates. + + We serialize a post to JSON then store in two lists in Redis: + 1. The list for the post's parent forum + 2. The combined forum list + + Note that we only store posts from public forums. + + """ + # ignore non-new posts + if not kwargs['created']: + return + + # ignore posts from non-public forums + public_forums = Forum.objects.public_forum_ids() + if sender.topic.forum.id not in public_forums: + return + + # serialize post attributes + post_content = { + 'id': sender.id, + 'title': sender.topic.name, + 'content': sender.html, + 'author': sender.user.username, + 'pubdate': int(time.mktime(sender.creation_date.timetuple())), + 'forum_name': sender.topic.forum.name, + 'url': sender.get_absolute_url() + } + + s = simplejson.dumps(post_content) + + # store in Redis + + redis = get_redis_connection() + pipeline = redis.pipeline() + + key = 'forums:latest:%d' % sender.topic.forum.id + + pipeline.lpush(key, s) + pipeline.ltrim(key, 0, MAX_POSTS - 1) + + # store in the combined feed; yes this wastes some memory storing it twice, + # but it makes things much easier + + key = 'forums:latest:*' + + pipeline.lpush(key, s) + pipeline.ltrim(key, 0, MAX_POSTS - 1) + + pipeline.execute() + + +def get_latest_posts(num_posts=MAX_POSTS, forum_id=None): + """ + This function retrieves num_posts latest posts for the forum with the given + forum_id. If forum_id is None, the posts are retrieved from the combined + forums datastore. A list of dictionaries is returned. Each dictionary + contains information about a post. + + """ + key = 'forums:latest:%d' % forum_id if forum_id else 'forums:latest:*' + + num_posts = max(0, min(MAX_POSTS, num_posts)) + + if num_posts == 0: + return [] + + redis = get_redis_connection() + raw_posts = redis.lrange(key, 0, num_posts - 1) + + posts = [] + for raw_post in raw_posts: + post = simplejson.loads(raw_post) + + # fix up the pubdate; turn it back into a datetime object + post['pubdate'] = datetime.datetime.fromtimestamp(post['pubdate']) + + posts.append(post) + + return posts
--- a/gpp/urls.py Sun Dec 04 19:53:27 2011 +0000 +++ b/gpp/urls.py Wed Dec 07 01:08:54 2011 +0000 @@ -28,11 +28,11 @@ cache_page(LatestNewsFeed(), 6 * 60 * 60), name='feeds-news'), url(r'^feeds/forums/$', - cache_page(ForumsFeed(), 15 * 60), + cache_page(ForumsFeed(), 5 * 60), {'forum_slug': None}, 'feeds-forum_combined'), url(r'^feeds/forums/(?P<forum_slug>[\w\d-]+)/$', - cache_page(ForumsFeed(), 15 * 60), + cache_page(ForumsFeed(), 5 * 60), name='feeds-forum'), (r'^forums/', include('forums.urls')), (r'^irc/', include('irc.urls')),