1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.shortcuts import render_to_response, get_object_or_404
7 from django.template import RequestContext
8 from django.views.decorators import cache
9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
10 from django.utils.translation import ugettext as _
12 from catalogue.utils import split_tags
13 from catalogue.models import Book, Tag, Fragment
14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
15 from search.index import Search, SearchResult
16 from suggest.forms import PublishingSuggestForm
22 def match_word_re(word):
23 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24 return r"\b%s\b" % word
25 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26 return "[[:<:]]%s[[:>:]]" % word
29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
32 def remove_query_syntax_chars(query, replace=' '):
33 return query_syntax_chars.sub(' ', query)
36 def did_you_mean(query, tokens):
40 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41 # if len(authors) > 0:
45 # if not dictionary.check(t):
47 # change_to = dictionary.suggest(t)[0].lower()
48 # if change_to != t.lower():
49 # change[t] = change_to
56 # for frm, to in change.items():
57 # query = query.replace(frm, to)
64 prefix = request.GET.get('term', '')
66 return JsonResponse([], safe=False)
68 prefix = remove_query_syntax_chars(prefix)
71 # tagi beda ograniczac tutaj
72 # ale tagi moga byc na ksiazce i na fragmentach
73 # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
74 # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
77 if isinstance(tag, PDCounterAuthor):
78 if filter(lambda t: t.slug == tag.slug and t != tag, tags):
80 elif isinstance(tag, PDCounterBook):
81 if filter(lambda b: b.slug == tag.slug, tags):
86 if c.startswith('pd_'):
91 limit = int(request.GET.get('max', ''))
100 tags = search.hint_tags(prefix, pdcounter=True)
101 tags = filter(lambda t: not is_dupe(t), tags)
108 'category': category_name(t.category),
110 'url': t.get_absolute_url()
113 books = search.hint_books(prefix)
120 'category': _('book'),
122 'url': b.get_absolute_url()
125 callback = request.GET.get('callback', None)
127 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
128 content_type="application/json; charset=utf-8")
130 return JsonResponse(data, safe=False)
140 query = request.GET.get('q', '')
143 return render_to_response('catalogue/search_too_short.html',
145 context_instance=RequestContext(request))
147 query = remove_query_syntax_chars(query)
151 theme_terms = search.index.analyze(text=query, field="themes_pl") \
152 + search.index.analyze(text=query, field="themes")
155 tags = search.hint_tags(query, pdcounter=True, prefix=False)
156 tags = split_tags(tags)
158 author_results = search.search_phrase(query, 'authors', book=True)
159 translator_results = search.search_phrase(query, 'translators', book=True)
161 title_results = search.search_phrase(query, 'title', book=True)
163 # Boost main author/title results with mixed search, and save some of its results for end of list.
164 # boost author, title results
165 author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
166 author_title_rest = []
168 for b in author_title_mixed:
169 also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
170 for b2 in also_in_mixed:
172 if also_in_mixed is []:
173 author_title_rest.append(b)
175 # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
176 # Because the query is using only one field.
177 text_phrase = SearchResult.aggregate(
178 search.search_phrase(query, 'text', snippets=True, book=False),
179 search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
181 everywhere = search.search_everywhere(query, query_terms=theme_terms)
183 def already_found(results):
186 if e.book_id == r.book_id:
192 f = already_found(author_results + translator_results + title_results + text_phrase)
193 everywhere = filter(lambda x: not f(x), everywhere)
195 author_results = SearchResult.aggregate(author_results)
196 translator_results = SearchResult.aggregate(translator_results)
197 title_results = SearchResult.aggregate(title_results)
199 everywhere = SearchResult.aggregate(everywhere, author_title_rest)
201 for field, res in [('authors', author_results),
202 ('translators', translator_results),
203 ('title', title_results),
204 ('text', text_phrase),
205 ('text', everywhere)]:
206 res.sort(reverse=True)
208 search.get_snippets(r, query, field, 3)
212 def ensure_exists(r):
215 except Book.DoesNotExist:
218 author_results = filter(ensure_exists, author_results)
219 translator_results = filter(ensure_exists, translator_results)
220 title_results = filter(ensure_exists, title_results)
221 text_phrase = filter(ensure_exists, text_phrase)
222 everywhere = filter(ensure_exists, everywhere)
224 results = author_results + translator_results + title_results + text_phrase + everywhere
225 # ensure books do exists & sort them
226 for res in (author_results, translator_results, title_results, text_phrase, everywhere):
227 res.sort(reverse=True)
229 # We don't want to redirect to book text, but rather display result page even with one result.
230 # if len(results) == 1:
231 # fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
232 # if len(fragment_hits) == 1:
233 # #anchor = fragment_hits[0]['fragment']
234 # #frag = Fragment.objects.get(anchor=anchor)
235 # return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
236 # return HttpResponseRedirect(results[0].book.get_absolute_url())
237 if len(results) == 0:
238 form = PublishingSuggestForm(initial={"books": query + ", "})
239 return render_to_response('catalogue/search_no_hits.html',
243 'did_you_mean': suggestion},
244 context_instance=RequestContext(request))
246 return render_to_response('catalogue/search_multiple_hits.html',
249 'results': {'author': author_results,
250 'translator': translator_results,
251 'title': title_results,
252 'content': text_phrase,
253 'other': everywhere},
254 'did_you_mean': suggestion},
255 context_instance=RequestContext(request))