From: Marcin Koziej Date: Wed, 21 Nov 2012 10:03:17 +0000 (+0100) Subject: Filter some search characters which make Solr too unhappy. X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/b34b62e9545e4c8a887c4ea54203082fb6ecec42?ds=sidebyside;hp=-c Filter some search characters which make Solr too unhappy. --- b34b62e9545e4c8a887c4ea54203082fb6ecec42 diff --git a/apps/search/views.py b/apps/search/views.py index 2109a7398..9a0b469a1 100644 --- a/apps/search/views.py +++ b/apps/search/views.py @@ -3,7 +3,6 @@ from django.conf import settings from django.shortcuts import render_to_response, get_object_or_404 from django.template import RequestContext -from django.contrib.auth.decorators import login_required from django.views.decorators import cache from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect from django.utils.translation import ugettext as _ @@ -13,9 +12,7 @@ from catalogue.models import Book, Tag, Fragment from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook from catalogue.views import JSONResponse from search import Search, SearchResult -from lucene import StringReader from suggest.forms import PublishingSuggestForm -from time import sleep import re #import enchant import json @@ -28,6 +25,13 @@ def match_word_re(word): return "[[:<:]]%s[[:>:]]" % word +query_syntax_chars = re.compile(r"[\\/*:(){}]") + + +def remove_query_syntax_chars(query, replace=' '): + return query_syntax_chars.sub(' ', query) + + def did_you_mean(query, tokens): return query # change = {} @@ -59,6 +63,8 @@ def hint(request): if len(prefix) < 2: return JSONResponse([]) + prefix = remove_query_syntax_chars(prefix) + search = Search() # tagi beda ograniczac tutaj # ale tagi moga byc na ksiazce i na fragmentach @@ -114,6 +120,9 @@ def main(request): return render_to_response('catalogue/search_too_short.html', {'prefix': query}, context_instance=RequestContext(request)) + + query = remove_query_syntax_chars(query) + search = Search() theme_terms = search.index.analyze(text=query, field="themes_pl") \ @@ -191,7 +200,8 @@ def main(request): results = author_results + translator_results + title_results + text_phrase + everywhere # ensure books do exists & sort them - results.sort(reverse=True) + for res in (author_results, translator_results, title_results, text_phrase, everywhere): + res.sort(reverse=True) # We don't want to redirect to book text, but rather display result page even with one result. # if len(results) == 1: