changeset 943:cf9918328c64

Haystack tweaks for Django 1.7.7. I had to upgrade to Haystack 2.3.1 to get it to work with Django 1.7.7. I also had to update the Xapian backend. But I ran into problems. On my laptop anyway (Ubuntu 14.0.4), xapian gets mad when search terms are greater than 245 chars (or something) when indexing. So I created a custom field that would simply omit terms greater than 64 chars and used this field everywhere I previously used a CharField. Secondly, the custom search form was broken now. Something changed in the Xapian backend and exact searches stopped working. Fortunately the auto_query (which I was using originally and broke during an upgrade) started working again. So I cut the search form back over to doing an auto_query. I kept the form the same (3 fields) because I didn't want to change the form and I think it's better that way.
author Brian Neal <bgneal@gmail.com>
date Wed, 13 May 2015 20:25:07 -0500 (2015-05-14)
parents e8b170fca581
children 7ab180ff6f7b
files bio/search_indexes.py custom_search/fields.py custom_search/forms.py downloads/search_indexes.py forums/search_indexes.py news/search_indexes.py podcast/search_indexes.py weblinks/search_indexes.py ygroup/search_indexes.py
diffstat 9 files changed, 61 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/bio/search_indexes.py	Thu Apr 30 20:23:07 2015 -0500
+++ b/bio/search_indexes.py	Wed May 13 20:25:07 2015 -0500
@@ -2,10 +2,11 @@
 from haystack import indexes
 
 from bio.models import UserProfile
+from custom_search.fields import MaxTermSizeCharField
 
 
 class UserProfileIndex(indexes.SearchIndex, indexes.Indexable):
-    text = indexes.CharField(document=True, use_template=True)
+    text = MaxTermSizeCharField(document=True, use_template=True)
     author = indexes.CharField(model_attr='user')
 
     def get_model(self):
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/custom_search/fields.py	Wed May 13 20:25:07 2015 -0500
@@ -0,0 +1,29 @@
+"""Custom Haystack SearchFields."""
+
+import haystack.fields
+
+
+class MaxTermSizeCharField(haystack.fields.CharField):
+    """A CharField that discards large terms when preparing the search index.
+
+    Some backends (e.g. Xapian) throw errors when terms are bigger than some
+    limit. This field omits the terms over a limit when preparing the data for
+    the search index.
+
+    The keyword argument max_term_size sets the maximum size of a whitespace
+    delimited word/term. Terms over this size are not indexed. The default value
+    is 64.
+    """
+    DEFAULT_MAX_TERM_SIZE = 64
+
+    def __init__(self, *args, **kwargs):
+        self.max_term_size = kwargs.pop('max_term_size', self.DEFAULT_MAX_TERM_SIZE)
+        super(MaxTermSizeCharField, self).__init__(*args, **kwargs)
+
+    def prepare(self, obj):
+        text = super(MaxTermSizeCharField, self).prepare(obj)
+        if text is None or self.max_term_size is None:
+            return text
+
+        terms = (term for term in text.split() if len(term) <= self.max_term_size)
+        return u' '.join(terms)
--- a/custom_search/forms.py	Thu Apr 30 20:23:07 2015 -0500
+++ b/custom_search/forms.py	Wed May 13 20:25:07 2015 -0500
@@ -65,6 +65,12 @@
 
         return self.cleaned_data
 
+    def clean_exact(self):
+        exact_field = self.cleaned_data['exact']
+        if "'" in exact_field or '"' in exact_field:
+            raise forms.ValidationError("Quotes are not needed in this field")
+        return exact_field
+
     def search(self):
         if not self.is_valid():
             return self.no_query_found()
@@ -83,24 +89,25 @@
             self.cleaned_data['models'],
             username)
 
-        sqs = self.searchqueryset
-
         # Note that in Haystack 2.x content is untrusted and is automatically
         # auto-escaped for us.
         #
-        # Filter on the q terms; these should be and'ed together:
-        terms = self.cleaned_data['q'].split()
-        for term in terms:
-            sqs = sqs.filter(content=term)
+        # Gather regular search terms
+        terms = ' '.join(self.cleaned_data['q'].split())
 
         # Exact words or phrases:
-        if self.cleaned_data['exact']:
-            sqs = sqs.filter(content__exact=self.cleaned_data['exact'])
+        exact = self.cleaned_data['exact'].strip()
+        if exact:
+            exact = '"{}"'.format(exact)
 
         # Exclude terms:
-        terms = self.cleaned_data['exclude'].split()
-        for term in terms:
-            sqs = sqs.exclude(content=term)
+        exclude = ["-{}".format(term) for term in self.cleaned_data['exclude'].split()]
+        exclude = ' '.join(exclude)
+
+        query = ' '.join([terms, exact, exclude]).strip()
+        logger.debug("auto_query: %s", query)
+
+        sqs = self.searchqueryset.auto_query(query)
 
         if self.load_all:
             sqs = sqs.load_all()
--- a/downloads/search_indexes.py	Thu Apr 30 20:23:07 2015 -0500
+++ b/downloads/search_indexes.py	Wed May 13 20:25:07 2015 -0500
@@ -1,11 +1,12 @@
 """Haystack search index for the downloads application."""
 from haystack import indexes
 
+from custom_search.fields import MaxTermSizeCharField
 from downloads.models import Download
 
 
 class DownloadIndex(indexes.SearchIndex, indexes.Indexable):
-    text = indexes.CharField(document=True, use_template=True)
+    text = MaxTermSizeCharField(document=True, use_template=True)
     author = indexes.CharField(model_attr='user')
     pub_date = indexes.DateTimeField(model_attr='date_added')
 
--- a/forums/search_indexes.py	Thu Apr 30 20:23:07 2015 -0500
+++ b/forums/search_indexes.py	Wed May 13 20:25:07 2015 -0500
@@ -1,6 +1,7 @@
 """Haystack search index for the weblinks application."""
 from haystack import indexes
 
+from custom_search.fields import MaxTermSizeCharField
 from forums.models import Forum, Topic, Post
 
 
@@ -20,7 +21,7 @@
 
 
 class PostIndex(indexes.SearchIndex, indexes.Indexable):
-    text = indexes.CharField(document=True, use_template=True)
+    text = MaxTermSizeCharField(document=True, use_template=True)
     author = indexes.CharField(model_attr='user')
     pub_date = indexes.DateTimeField(model_attr='creation_date')
 
--- a/news/search_indexes.py	Thu Apr 30 20:23:07 2015 -0500
+++ b/news/search_indexes.py	Wed May 13 20:25:07 2015 -0500
@@ -1,11 +1,12 @@
 """Haystack search index for the news application."""
 from haystack import indexes
 
+from custom_search.fields import MaxTermSizeCharField
 from news.models import Story
 
 
 class StoryIndex(indexes.SearchIndex, indexes.Indexable):
-    text = indexes.CharField(document=True, use_template=True)
+    text = MaxTermSizeCharField(document=True, use_template=True)
     author = indexes.CharField(model_attr='submitter')
     pub_date = indexes.DateTimeField(model_attr='date_submitted')
 
--- a/podcast/search_indexes.py	Thu Apr 30 20:23:07 2015 -0500
+++ b/podcast/search_indexes.py	Wed May 13 20:25:07 2015 -0500
@@ -1,11 +1,12 @@
 """Haystack search index for the news application."""
 from haystack import indexes
 
+from custom_search.fields import MaxTermSizeCharField
 from podcast.models import Item
 
 
 class ItemIndex(indexes.SearchIndex, indexes.Indexable):
-    text = indexes.CharField(document=True, use_template=True)
+    text = MaxTermSizeCharField(document=True, use_template=True)
     author = indexes.CharField(model_attr='author')
     pub_date = indexes.DateTimeField(model_attr='pubdate')
 
--- a/weblinks/search_indexes.py	Thu Apr 30 20:23:07 2015 -0500
+++ b/weblinks/search_indexes.py	Wed May 13 20:25:07 2015 -0500
@@ -1,11 +1,12 @@
 """Haystack search index for the weblinks application."""
 from haystack import indexes
 
+from custom_search.fields import MaxTermSizeCharField
 from weblinks.models import Link
 
 
 class LinkIndex(indexes.SearchIndex, indexes.Indexable):
-    text = indexes.CharField(document=True, use_template=True)
+    text = MaxTermSizeCharField(document=True, use_template=True)
     author = indexes.CharField(model_attr='user')
     pub_date = indexes.DateTimeField(model_attr='date_added')
 
--- a/ygroup/search_indexes.py	Thu Apr 30 20:23:07 2015 -0500
+++ b/ygroup/search_indexes.py	Wed May 13 20:25:07 2015 -0500
@@ -4,11 +4,12 @@
 """
 from haystack import indexes
 
+from custom_search.fields import MaxTermSizeCharField
 from ygroup.models import Post
 
 
 class PostIndex(indexes.SearchIndex, indexes.Indexable):
-    text = indexes.CharField(document=True, use_template=True)
+    text = MaxTermSizeCharField(document=True, use_template=True)
     pub_date = indexes.DateTimeField(model_attr='creation_date')
 
     def get_model(self):