# HG changeset patch # User Brian Neal # Date 1431566707 18000 # Node ID cf9918328c640e07b7b336637a5e1857f0f672a0 # Parent e8b170fca581a9c942383b9d1dc2a99eb16b2d7c Haystack tweaks for Django 1.7.7. I had to upgrade to Haystack 2.3.1 to get it to work with Django 1.7.7. I also had to update the Xapian backend. But I ran into problems. On my laptop anyway (Ubuntu 14.0.4), xapian gets mad when search terms are greater than 245 chars (or something) when indexing. So I created a custom field that would simply omit terms greater than 64 chars and used this field everywhere I previously used a CharField. Secondly, the custom search form was broken now. Something changed in the Xapian backend and exact searches stopped working. Fortunately the auto_query (which I was using originally and broke during an upgrade) started working again. So I cut the search form back over to doing an auto_query. I kept the form the same (3 fields) because I didn't want to change the form and I think it's better that way. diff -r e8b170fca581 -r cf9918328c64 bio/search_indexes.py --- a/bio/search_indexes.py Thu Apr 30 20:23:07 2015 -0500 +++ b/bio/search_indexes.py Wed May 13 20:25:07 2015 -0500 @@ -2,10 +2,11 @@ from haystack import indexes from bio.models import UserProfile +from custom_search.fields import MaxTermSizeCharField class UserProfileIndex(indexes.SearchIndex, indexes.Indexable): - text = indexes.CharField(document=True, use_template=True) + text = MaxTermSizeCharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') def get_model(self): diff -r e8b170fca581 -r cf9918328c64 custom_search/fields.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/custom_search/fields.py Wed May 13 20:25:07 2015 -0500 @@ -0,0 +1,29 @@ +"""Custom Haystack SearchFields.""" + +import haystack.fields + + +class MaxTermSizeCharField(haystack.fields.CharField): + """A CharField that discards large terms when preparing the search index. + + Some backends (e.g. Xapian) throw errors when terms are bigger than some + limit. This field omits the terms over a limit when preparing the data for + the search index. + + The keyword argument max_term_size sets the maximum size of a whitespace + delimited word/term. Terms over this size are not indexed. The default value + is 64. + """ + DEFAULT_MAX_TERM_SIZE = 64 + + def __init__(self, *args, **kwargs): + self.max_term_size = kwargs.pop('max_term_size', self.DEFAULT_MAX_TERM_SIZE) + super(MaxTermSizeCharField, self).__init__(*args, **kwargs) + + def prepare(self, obj): + text = super(MaxTermSizeCharField, self).prepare(obj) + if text is None or self.max_term_size is None: + return text + + terms = (term for term in text.split() if len(term) <= self.max_term_size) + return u' '.join(terms) diff -r e8b170fca581 -r cf9918328c64 custom_search/forms.py --- a/custom_search/forms.py Thu Apr 30 20:23:07 2015 -0500 +++ b/custom_search/forms.py Wed May 13 20:25:07 2015 -0500 @@ -65,6 +65,12 @@ return self.cleaned_data + def clean_exact(self): + exact_field = self.cleaned_data['exact'] + if "'" in exact_field or '"' in exact_field: + raise forms.ValidationError("Quotes are not needed in this field") + return exact_field + def search(self): if not self.is_valid(): return self.no_query_found() @@ -83,24 +89,25 @@ self.cleaned_data['models'], username) - sqs = self.searchqueryset - # Note that in Haystack 2.x content is untrusted and is automatically # auto-escaped for us. # - # Filter on the q terms; these should be and'ed together: - terms = self.cleaned_data['q'].split() - for term in terms: - sqs = sqs.filter(content=term) + # Gather regular search terms + terms = ' '.join(self.cleaned_data['q'].split()) # Exact words or phrases: - if self.cleaned_data['exact']: - sqs = sqs.filter(content__exact=self.cleaned_data['exact']) + exact = self.cleaned_data['exact'].strip() + if exact: + exact = '"{}"'.format(exact) # Exclude terms: - terms = self.cleaned_data['exclude'].split() - for term in terms: - sqs = sqs.exclude(content=term) + exclude = ["-{}".format(term) for term in self.cleaned_data['exclude'].split()] + exclude = ' '.join(exclude) + + query = ' '.join([terms, exact, exclude]).strip() + logger.debug("auto_query: %s", query) + + sqs = self.searchqueryset.auto_query(query) if self.load_all: sqs = sqs.load_all() diff -r e8b170fca581 -r cf9918328c64 downloads/search_indexes.py --- a/downloads/search_indexes.py Thu Apr 30 20:23:07 2015 -0500 +++ b/downloads/search_indexes.py Wed May 13 20:25:07 2015 -0500 @@ -1,11 +1,12 @@ """Haystack search index for the downloads application.""" from haystack import indexes +from custom_search.fields import MaxTermSizeCharField from downloads.models import Download class DownloadIndex(indexes.SearchIndex, indexes.Indexable): - text = indexes.CharField(document=True, use_template=True) + text = MaxTermSizeCharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') pub_date = indexes.DateTimeField(model_attr='date_added') diff -r e8b170fca581 -r cf9918328c64 forums/search_indexes.py --- a/forums/search_indexes.py Thu Apr 30 20:23:07 2015 -0500 +++ b/forums/search_indexes.py Wed May 13 20:25:07 2015 -0500 @@ -1,6 +1,7 @@ """Haystack search index for the weblinks application.""" from haystack import indexes +from custom_search.fields import MaxTermSizeCharField from forums.models import Forum, Topic, Post @@ -20,7 +21,7 @@ class PostIndex(indexes.SearchIndex, indexes.Indexable): - text = indexes.CharField(document=True, use_template=True) + text = MaxTermSizeCharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') pub_date = indexes.DateTimeField(model_attr='creation_date') diff -r e8b170fca581 -r cf9918328c64 news/search_indexes.py --- a/news/search_indexes.py Thu Apr 30 20:23:07 2015 -0500 +++ b/news/search_indexes.py Wed May 13 20:25:07 2015 -0500 @@ -1,11 +1,12 @@ """Haystack search index for the news application.""" from haystack import indexes +from custom_search.fields import MaxTermSizeCharField from news.models import Story class StoryIndex(indexes.SearchIndex, indexes.Indexable): - text = indexes.CharField(document=True, use_template=True) + text = MaxTermSizeCharField(document=True, use_template=True) author = indexes.CharField(model_attr='submitter') pub_date = indexes.DateTimeField(model_attr='date_submitted') diff -r e8b170fca581 -r cf9918328c64 podcast/search_indexes.py --- a/podcast/search_indexes.py Thu Apr 30 20:23:07 2015 -0500 +++ b/podcast/search_indexes.py Wed May 13 20:25:07 2015 -0500 @@ -1,11 +1,12 @@ """Haystack search index for the news application.""" from haystack import indexes +from custom_search.fields import MaxTermSizeCharField from podcast.models import Item class ItemIndex(indexes.SearchIndex, indexes.Indexable): - text = indexes.CharField(document=True, use_template=True) + text = MaxTermSizeCharField(document=True, use_template=True) author = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pubdate') diff -r e8b170fca581 -r cf9918328c64 weblinks/search_indexes.py --- a/weblinks/search_indexes.py Thu Apr 30 20:23:07 2015 -0500 +++ b/weblinks/search_indexes.py Wed May 13 20:25:07 2015 -0500 @@ -1,11 +1,12 @@ """Haystack search index for the weblinks application.""" from haystack import indexes +from custom_search.fields import MaxTermSizeCharField from weblinks.models import Link class LinkIndex(indexes.SearchIndex, indexes.Indexable): - text = indexes.CharField(document=True, use_template=True) + text = MaxTermSizeCharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') pub_date = indexes.DateTimeField(model_attr='date_added') diff -r e8b170fca581 -r cf9918328c64 ygroup/search_indexes.py --- a/ygroup/search_indexes.py Thu Apr 30 20:23:07 2015 -0500 +++ b/ygroup/search_indexes.py Wed May 13 20:25:07 2015 -0500 @@ -4,11 +4,12 @@ """ from haystack import indexes +from custom_search.fields import MaxTermSizeCharField from ygroup.models import Post class PostIndex(indexes.SearchIndex, indexes.Indexable): - text = indexes.CharField(document=True, use_template=True) + text = MaxTermSizeCharField(document=True, use_template=True) pub_date = indexes.DateTimeField(model_attr='creation_date') def get_model(self):