# HG changeset patch # User Brian Neal # Date 1388797278 21600 # Node ID ad53d929281aac5ea2d2b321910d3ae2537a719b # Parent 95f4e7f352fd6e401ad4cafdd878e6ab39416463 For issue #62, upgrade Haystack from 1.2.7 to 2.1.0. diff -r 95f4e7f352fd -r ad53d929281a bio/search_indexes.py --- a/bio/search_indexes.py Wed Jan 01 19:52:07 2014 -0600 +++ b/bio/search_indexes.py Fri Jan 03 19:01:18 2014 -0600 @@ -1,30 +1,18 @@ """Haystack search index for the bio application.""" -from haystack.indexes import * -from haystack import site -from custom_search.indexes import CondQueuedSearchIndex +from haystack import indexes from bio.models import UserProfile -from bio.signals import profile_content_update -class UserProfileIndex(CondQueuedSearchIndex): - text = CharField(document=True, use_template=True) - author = CharField(model_attr='user') +class UserProfileIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + author = indexes.CharField(model_attr='user') - def index_queryset(self): + def get_model(self): + return UserProfile + + def index_queryset(self, using=None): return UserProfile.objects.filter(user__is_active=True) def get_updated_field(self): return 'update_date' - - def _setup_save(self, model): - profile_content_update.connect(self.enqueue_save) - - def _teardown_save(self, model): - profile_content_update.disconnect(self.enqueue_save) - - def enqueue_save(self, sender, **kwargs): - return self.enqueue('update', sender) - - -site.register(UserProfile, UserProfileIndex) diff -r 95f4e7f352fd -r ad53d929281a custom_search/forms.py --- a/custom_search/forms.py Wed Jan 01 19:52:07 2014 -0600 +++ b/custom_search/forms.py Fri Jan 03 19:01:18 2014 -0600 @@ -3,7 +3,10 @@ our needs. """ +import logging + from django import forms +from django.conf import settings from haystack.forms import ModelSearchForm @@ -18,20 +21,79 @@ ('ygroup.post', 'Yahoo Group Archives'), ) +logger = logging.getLogger(__name__) + class CustomModelSearchForm(ModelSearchForm): """ This customized ModelSearchForm allows us to explictly label and order the model choices. + We also provide "all words", "exact phrase", and "exclude" text input boxes. + Haystack 2.1.0's auto_query() function did not seem to work right so we just + rolled our own. + """ - q = forms.CharField(required=False, label='', - widget=forms.TextInput(attrs={'type': 'search', - 'class': 'search', - 'size': 48, - })) + q = forms.CharField(required=False, label='All these words', + widget=forms.TextInput(attrs={'type': 'search', 'class': 'search', + 'size': 48})) + exact = forms.CharField(required=False, label='This exact word or phrase', + widget=forms.TextInput(attrs={'type': 'search', 'class': 'search', + 'size': 48})) + exclude = forms.CharField(required=False, label='None of these words', + widget=forms.TextInput(attrs={'type': 'search', 'class': 'search', + 'size': 48})) def __init__(self, *args, **kwargs): super(CustomModelSearchForm, self).__init__(*args, **kwargs) self.fields['models'] = forms.MultipleChoiceField(choices=MODEL_CHOICES, - label='', widget=forms.CheckboxSelectMultiple) + label='Search in', widget=forms.CheckboxSelectMultiple) + + def clean(self): + if not settings.SEARCH_QUEUE_ENABLED: + raise forms.ValidationError("Our search function is offline for " + "maintenance. Please try again later. " + "We apologize for any inconvenience.") + + if not (self.cleaned_data['q'] or self.cleaned_data['exact'] or + self.cleaned_data['exclude']): + raise forms.ValidationError('Please supply some search terms') + + return self.cleaned_data + + def search(self): + if not self.is_valid(): + return self.no_query_found() + + logger.info('Search executed: /%s/%s/%s/ in %s', + self.cleaned_data['q'], + self.cleaned_data['exact'], + self.cleaned_data['exclude'], + self.cleaned_data['models']) + + sqs = self.searchqueryset + + # Note that in Haystack 2.x content is untrusted and is automatically + # auto-escaped for us. + # + # Filter on the q terms; these should be and'ed together: + terms = self.cleaned_data['q'].split() + for term in terms: + sqs = sqs.filter(content=term) + + # Exact words or phrases: + if self.cleaned_data['exact']: + sqs = sqs.filter(content__exact=self.cleaned_data['exact']) + + # Exclude terms: + terms = self.cleaned_data['exclude'].split() + for term in terms: + sqs = sqs.exclude(content=term) + + if self.load_all: + sqs = sqs.load_all() + + # Apply model filtering + sqs = sqs.models(*self.get_models()) + + return sqs diff -r 95f4e7f352fd -r ad53d929281a custom_search/indexes.py --- a/custom_search/indexes.py Wed Jan 01 19:52:07 2014 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -""" -This module contains custom search indexes to tailor the Haystack search -application to our needs. - -""" -from queued_search.indexes import QueuedSearchIndex - - -class CondQueuedSearchIndex(QueuedSearchIndex): - """ - This customized version of QueuedSearchIndex conditionally enqueues items - to be indexed by calling the can_index() method. - - """ - def can_index(self, instance): - """ - The default is to index all instances. Override this method to - customize the behavior. This will be called on all update operations. - - """ - return True - - def enqueue(self, action, instance): - """ - This method enqueues the instance only if the can_index() method - returns True. - - """ - if (action == 'update' and self.can_index(instance) or - action == 'delete'): - super(CondQueuedSearchIndex, self).enqueue(action, instance) - - -class PublicQueuedSearchIndex(QueuedSearchIndex): - """QueuedSearchIndex for models with is_public attributes.""" - - def enqueue(self, action, instance): - """Conditionally enqueue actions as follows. - - For update actions: if is_public is True, enqueue the update. If - is_public is False, enqueue a delete action. - - Delete actions are always enqueued. - - """ - if action == 'update' and instance.is_public: - super(PublicQueuedSearchIndex, self).enqueue(action, instance) - elif (action == 'update' and not instance.is_public) or action == 'delete': - super(PublicQueuedSearchIndex, self).enqueue('delete', instance) diff -r 95f4e7f352fd -r ad53d929281a custom_search/signals.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/custom_search/signals.py Fri Jan 03 19:01:18 2014 -0600 @@ -0,0 +1,118 @@ +"""This module contains a custom Haystack signal processing class to update the +search index in realtime. We update our search index by enqueuing edits and +deletes into a queue for batch processing. Our class ensures we only enqueue +content that should be in the search index. + +""" +from django.db.models import signals +import queued_search.signals + +from bio.signals import profile_content_update +from forums.signals import topic_content_update, post_content_update + +import ygroup.models +from weblinks.models import Link +from podcast.models import Item +from news.models import Story +from downloads.models import Download +from forums.models import Forum, Topic, Post +from bio.models import UserProfile + + +UID = 'custom_search.signals' + + +class QueuedSignalProcessor(queued_search.signals.QueuedSignalProcessor): + """ + This customized version of queued_search's QueuedSignalProcessor + conditionally enqueues items to be indexed. + + """ + def __init__(self, *args, **kwargs): + + # We assume that it is okay to attempt to delete a model from the search + # index even if the model object is not in the index. In other words, + # attempting to delete an object from the index will not cause any + # errors if it is not in the index. Thus if we see an object that has an + # 'is_public' attribute, and it is false, we can safely enqueue a delete + # in case the 'is_public' attribute just went from True to False. We + # have no way of knowing that, it could have been False all along, but we + # just try the delete in case to be safe. + + # To make the code easier to read, use a table to drive our signal + # connecting and disconnecting: + self.signal_chain = [ + # Yahoo Group posts are always updated: + (signals.post_save, ygroup.models.Post, self.enqueue_save), + (signals.post_delete, ygroup.models.Post, self.enqueue_delete), + + # Weblink Links are updated if they are public: + (signals.post_save, Link, self.enqueue_public_save), + (signals.post_delete, Link, self.enqueue_delete), + + # Podcast Items are always updated: + (signals.post_save, Item, self.enqueue_save), + (signals.post_delete, Item, self.enqueue_delete), + + # News Stories are always updated: + (signals.post_save, Story, self.enqueue_save), + (signals.post_delete, Story, self.enqueue_delete), + + # Downloads are updated if they are public: + (signals.post_save, Download, self.enqueue_public_save), + (signals.post_delete, Download, self.enqueue_delete), + + # Forum Topics are updated if they belong to a public forum: + (topic_content_update, None, self.enqueue_topic_save), + (signals.post_delete, Topic, self.enqueue_delete), + + # Forum Posts are updated if they belong to a public forum: + (post_content_update, None, self.enqueue_post_save), + (signals.post_delete, Post, self.enqueue_delete), + + # UserProfiles are updated when we receive a special signal: + (profile_content_update, None, self.enqueue_profile), + (signals.post_delete, UserProfile, self.enqueue_delete), + ] + + super(QueuedSignalProcessor, self).__init__(*args, **kwargs) + + def setup(self): + """We override setup() so we can attach signal handlers to only the + models we search on. In some cases we have custom signals to tell us + when to update the search index. + + """ + for signal, sender, receiver in self.signal_chain: + signal.connect(receiver, sender=sender, dispatch_uid=UID) + + def teardown(self): + """Disconnect all signals we previously connected.""" + for signal, sender, receiver in self.signal_chain: + signal.disconnect(receiver, sender=sender, dispatch_uid=UID) + + def enqueue_public_save(self, sender, instance, **kwargs): + """Index only if the instance is_public. + + If not, enqueue a delete just in case the is_public flag got flipped + from True to False. + + """ + if instance.is_public: + self.enqueue_save(sender, instance, **kwargs) + else: + self.enqueue_delete(sender, instance, **kwargs) + + def enqueue_topic_save(self, sender, **kwargs): + """Enqueue only if the topic instance belongs to a public forum.""" + if sender.forum.id in Forum.objects.public_forum_ids(): + self.enqueue_save(Topic, sender, **kwargs) + + def enqueue_post_save(self, sender, **kwargs): + """Enqueue only if the post instance belongs to a public forum.""" + if sender.topic.forum.id in Forum.objects.public_forum_ids(): + self.enqueue_save(Post, sender, **kwargs) + + def enqueue_profile(self, sender, **kwargs): + """Forward the user profile instance on unconditionally.""" + self.enqueue_save(UserProfile, sender, **kwargs) diff -r 95f4e7f352fd -r ad53d929281a downloads/search_indexes.py --- a/downloads/search_indexes.py Wed Jan 01 19:52:07 2014 -0600 +++ b/downloads/search_indexes.py Fri Jan 03 19:01:18 2014 -0600 @@ -1,20 +1,19 @@ """Haystack search index for the downloads application.""" -from haystack.indexes import CharField, DateTimeField -from haystack import site -from custom_search.indexes import PublicQueuedSearchIndex +from haystack import indexes from downloads.models import Download -class DownloadIndex(PublicQueuedSearchIndex): - text = CharField(document=True, use_template=True) - author = CharField(model_attr='user') - pub_date = DateTimeField(model_attr='date_added') +class DownloadIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + author = indexes.CharField(model_attr='user') + pub_date = indexes.DateTimeField(model_attr='date_added') - def index_queryset(self): + def get_model(self): + return Download + + def index_queryset(self, using=None): return Download.public_objects.all() def get_updated_field(self): return 'update_date' - -site.register(Download, DownloadIndex) diff -r 95f4e7f352fd -r ad53d929281a forums/search_indexes.py --- a/forums/search_indexes.py Wed Jan 01 19:52:07 2014 -0600 +++ b/forums/search_indexes.py Fri Jan 03 19:01:18 2014 -0600 @@ -1,60 +1,35 @@ """Haystack search index for the weblinks application.""" -from haystack.indexes import * -from haystack import site -from custom_search.indexes import CondQueuedSearchIndex +from haystack import indexes from forums.models import Forum, Topic, Post -from forums.signals import topic_content_update, post_content_update -class TopicIndex(CondQueuedSearchIndex): - text = CharField(document=True, use_template=True) - author = CharField(model_attr='user') - pub_date = DateTimeField(model_attr='creation_date') +class TopicIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + author = indexes.CharField(model_attr='user') + pub_date = indexes.DateTimeField(model_attr='creation_date') - def index_queryset(self): + def get_model(self): + return Topic + + def index_queryset(self, using=None): return Topic.objects.filter(forum__in=Forum.objects.public_forum_ids()) def get_updated_field(self): return 'update_date' - def _setup_save(self, model): - topic_content_update.connect(self.enqueue_save) - def _teardown_save(self, model): - topic_content_update.disconnect(self.enqueue_save) +class PostIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + author = indexes.CharField(model_attr='user') + pub_date = indexes.DateTimeField(model_attr='creation_date') - def enqueue_save(self, sender, **kwargs): - return self.enqueue('update', sender) + def get_model(self): + return Post - def can_index(self, instance): - return instance.forum.id in Forum.objects.public_forum_ids() - - -class PostIndex(CondQueuedSearchIndex): - text = CharField(document=True, use_template=True) - author = CharField(model_attr='user') - pub_date = DateTimeField(model_attr='creation_date') - - def index_queryset(self): + def index_queryset(self, using=None): return Post.objects.filter( topic__forum__in=Forum.objects.public_forum_ids()) def get_updated_field(self): return 'update_date' - - def _setup_save(self, model): - post_content_update.connect(self.enqueue_save) - - def _teardown_save(self, model): - post_content_update.disconnect(self.enqueue_save) - - def enqueue_save(self, sender, **kwargs): - return self.enqueue('update', sender) - - def can_index(self, instance): - return instance.topic.forum.id in Forum.objects.public_forum_ids() - - -site.register(Topic, TopicIndex) -site.register(Post, PostIndex) diff -r 95f4e7f352fd -r ad53d929281a news/search_indexes.py --- a/news/search_indexes.py Wed Jan 01 19:52:07 2014 -0600 +++ b/news/search_indexes.py Fri Jan 03 19:01:18 2014 -0600 @@ -1,18 +1,16 @@ """Haystack search index for the news application.""" -from haystack.indexes import * -from haystack import site -from custom_search.indexes import CondQueuedSearchIndex +from haystack import indexes from news.models import Story -class StoryIndex(CondQueuedSearchIndex): - text = CharField(document=True, use_template=True) - author = CharField(model_attr='submitter') - pub_date = DateTimeField(model_attr='date_submitted') +class StoryIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + author = indexes.CharField(model_attr='submitter') + pub_date = indexes.DateTimeField(model_attr='date_submitted') + + def get_model(self): + return Story def get_updated_field(self): return 'update_date' - - -site.register(Story, StoryIndex) diff -r 95f4e7f352fd -r ad53d929281a podcast/search_indexes.py --- a/podcast/search_indexes.py Wed Jan 01 19:52:07 2014 -0600 +++ b/podcast/search_indexes.py Fri Jan 03 19:01:18 2014 -0600 @@ -1,18 +1,16 @@ """Haystack search index for the news application.""" -from haystack.indexes import * -from haystack import site -from custom_search.indexes import CondQueuedSearchIndex +from haystack import indexes from podcast.models import Item -class ItemIndex(CondQueuedSearchIndex): - text = CharField(document=True, use_template=True) - author = CharField(model_attr='author') - pub_date = DateTimeField(model_attr='pubdate') +class ItemIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + author = indexes.CharField(model_attr='author') + pub_date = indexes.DateTimeField(model_attr='pubdate') + + def get_model(self): + return Item def get_updated_field(self): return 'update_date' - - -site.register(Item, ItemIndex) diff -r 95f4e7f352fd -r ad53d929281a requirements_dev.txt --- a/requirements_dev.txt Wed Jan 01 19:52:07 2014 -0600 +++ b/requirements_dev.txt Fri Jan 03 19:01:18 2014 -0600 @@ -3,16 +3,16 @@ MySQL-python==1.2.4 django-debug-toolbar==1.0 -e git+https://github.com/gremmie/django-elsewhere.git@1203bd331aba4c5d4e702cc4e64d807310f2b591#egg=django_elsewhere-dev -django-haystack==1.2.7 +django-haystack==2.1.0 django-tagging==0.3.1 gdata==2.0.15 html5lib==0.90 pytz==2013b -queued-search==1.0.4 -queues==0.6.1 +queued-search==2.1.0 +queues==0.6.3 redis==2.7.2 repoze.timeago==0.5 -xapian-haystack==1.1.5beta +-e git+https://github.com/notanumber/xapian-haystack.git@37add92bc43fe50bf165e91f370269c26272f1eb#egg=xapian_haystack-dev anyjson==0.3.3 celery==3.1.7 django-picklefield==0.3.1 diff -r 95f4e7f352fd -r ad53d929281a sg101/search_sites.py --- a/sg101/search_sites.py Wed Jan 01 19:52:07 2014 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -import haystack -haystack.autodiscover() diff -r 95f4e7f352fd -r ad53d929281a sg101/settings/base.py --- a/sg101/settings/base.py Wed Jan 01 19:52:07 2014 -0600 +++ b/sg101/settings/base.py Fri Jan 03 19:01:18 2014 -0600 @@ -192,10 +192,14 @@ ####################################################################### # Haystack Search Settings ####################################################################### -HAYSTACK_SITECONF = 'sg101.search_sites' -HAYSTACK_SEARCH_ENGINE = 'xapian' -HAYSTACK_XAPIAN_PATH = os.path.join(PROJECT_PATH, 'xapian_index') +HAYSTACK_SIGNAL_PROCESSOR = 'custom_search.signals.QueuedSignalProcessor' +HAYSTACK_CONNECTIONS = { + 'default': { + 'ENGINE': 'xapian_backend.XapianEngine', + 'PATH': os.path.join(PROJECT_PATH, 'xapian_index'), + }, +} ####################################################################### # Redis integration & settings @@ -297,7 +301,7 @@ # If this flag is False, the queued_search queue will not be processed. This is # useful when we are rebuilding the search index. -SEARCH_QUEUE_ENABLED = True +SEARCH_QUEUE_ENABLED = False ####################################################################### # Asynchronous settings (queues, queued_search, redis, celery, etc) diff -r 95f4e7f352fd -r ad53d929281a sg101/templates/search/search.html --- a/sg101/templates/search/search.html Wed Jan 01 19:52:07 2014 -0600 +++ b/sg101/templates/search/search.html Fri Jan 03 19:01:18 2014 -0600 @@ -28,60 +28,69 @@ {% block content %}

Search Search

-{{ form.q }}
-Search in: -{{ form.models }} -

Check all | Check none

+Find content with: + + {{ form.as_table }} + + + + + + + + +
Check all | Check none
- {% if query %} -

Results for "{{ query }}" page {{ page.number }} of {{ page.paginator.num_pages }}

+{% if form.is_valid %} +

Search results; page {{ page.number }} of {{ page.paginator.num_pages }}

- {% if page.paginator.count %} -

- {{ page.paginator.count }} hit{{ page.paginator.count|pluralize }} -

- {% endif %} + {% if page.paginator.count %} +

+ {{ page.paginator.count }} hit{{ page.paginator.count|pluralize }} +

+ {% endif %} - {% if page.object_list %} -
- {% for result in page.object_list %} -
- {{ result.verbose_name }}: {{ result.object.search_title }} ({{ result.score }}) -
-
- {% highlight result.object.search_summary with query css_class "highlight" max_length 200 %} -
- {% endfor %} -
- {% else %} -

No results found for {{ query }}.

- {% endif %} + {% if page.object_list %} +
+ {% for result in page.object_list %} +
+ {{ result.verbose_name }}: {{ result.object.search_title }} ({{ result.score }}) +
+
+ {% highlight result.object.search_summary with query css_class "highlight" max_length 200 %} +
+ {% endfor %} +
+ {% else %} +

No search results found.

+ {% endif %} - {% if page.has_previous or page.has_next %} -
- {% if page.has_previous %}{% endif %}« Previous{% if page.has_previous %}{% endif %} - | - {% if page.has_next %}{% endif %}Next »{% if page.has_next %}{% endif %} -
- {% endif %} - {% else %} -
-

Thank you for using the SG101 search engine! Here are some searching tips that may help you find what you are looking for.

- -
- {% endif %} + {% if page.has_previous or page.has_next %} +
+ {% if page.has_previous %}{% endif %}« Previous{% if page.has_previous %}{% endif %} + | + {% if page.has_next %}{% endif %}Next »{% if page.has_next %}{% endif %} +
+ {% endif %} +{% else %} +
+

Thank you for using the SG101 search engine! Here are some searching tips that may help you find what you are looking for.

+ +
+{% endif %} {% endblock %} diff -r 95f4e7f352fd -r ad53d929281a weblinks/search_indexes.py --- a/weblinks/search_indexes.py Wed Jan 01 19:52:07 2014 -0600 +++ b/weblinks/search_indexes.py Fri Jan 03 19:01:18 2014 -0600 @@ -1,20 +1,20 @@ """Haystack search index for the weblinks application.""" -from haystack.indexes import CharField, DateTimeField -from haystack import site -from custom_search.indexes import PublicQueuedSearchIndex +from haystack import indexes from weblinks.models import Link -class LinkIndex(PublicQueuedSearchIndex): - text = CharField(document=True, use_template=True) - author = CharField(model_attr='user') - pub_date = DateTimeField(model_attr='date_added') +class LinkIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + author = indexes.CharField(model_attr='user') + pub_date = indexes.DateTimeField(model_attr='date_added') - def index_queryset(self): + def get_model(self): + return Link + + def index_queryset(self, using=None): return Link.public_objects.all() def get_updated_field(self): return 'update_date' -site.register(Link, LinkIndex) diff -r 95f4e7f352fd -r ad53d929281a ygroup/search_indexes.py --- a/ygroup/search_indexes.py Wed Jan 01 19:52:07 2014 -0600 +++ b/ygroup/search_indexes.py Fri Jan 03 19:01:18 2014 -0600 @@ -2,19 +2,18 @@ Haystack search index for the Yahoo Group archives application. """ -from haystack.indexes import * -from haystack import site -from custom_search.indexes import CondQueuedSearchIndex +from haystack import indexes from ygroup.models import Post -class PostIndex(CondQueuedSearchIndex): - text = CharField(document=True, use_template=True) - pub_date = DateTimeField(model_attr='creation_date') +class PostIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + pub_date = indexes.DateTimeField(model_attr='creation_date') + + def get_model(self): + return Post def get_updated_field(self): return 'creation_date' - -site.register(Post, PostIndex)