1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.shortcuts import render_to_response, get_object_or_404
7 from django.template import RequestContext
8 from django.views.decorators import cache
9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
10 from django.utils.translation import ugettext as _
12 from catalogue.utils import split_tags
13 from catalogue.models import Book, Tag, Fragment
14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
15 from search.index import Search, SearchResult
16 from suggest.forms import PublishingSuggestForm
22 def match_word_re(word):
23 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24 return r"\b%s\b" % word
25 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26 return "[[:<:]]%s[[:>:]]" % word
29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
32 def remove_query_syntax_chars(query, replace=' '):
33 return query_syntax_chars.sub(' ', query)
36 def did_you_mean(query, tokens):
40 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41 # if len(authors) > 0:
45 # if not dictionary.check(t):
47 # change_to = dictionary.suggest(t)[0].lower()
48 # if change_to != t.lower():
49 # change[t] = change_to
56 # for frm, to in change.items():
57 # query = query.replace(frm, to)
64 prefix = request.GET.get('term', '')
66 return JsonResponse([], safe=False)
68 prefix = remove_query_syntax_chars(prefix)
71 # tagi beda ograniczac tutaj
72 # ale tagi moga byc na ksiazce i na fragmentach
73 # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
74 # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
76 tags = search.hint_tags(prefix, pdcounter=True)
77 books = search.hint_books(prefix)
80 if isinstance(tag, PDCounterAuthor):
81 if filter(lambda t: t.slug == tag.slug and t != tag, tags):
83 elif isinstance(tag, PDCounterBook):
84 if filter(lambda b: b.slug == tag.slug, tags):
88 tags = filter(lambda t: not is_dupe(t), tags)
91 if c.startswith('pd_'):
95 callback = request.GET.get('callback', None)
96 data = [{'label': t.name,
97 'category': category_name(t.category),
99 'url': t.get_absolute_url()}
102 'category': _('book'),
104 'url': b.get_absolute_url()}
107 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
108 content_type="application/json; charset=utf-8")
110 return JsonResponse(data, safe=False)
120 query = request.GET.get('q', '')
123 return render_to_response('catalogue/search_too_short.html',
125 context_instance=RequestContext(request))
127 query = remove_query_syntax_chars(query)
131 theme_terms = search.index.analyze(text=query, field="themes_pl") \
132 + search.index.analyze(text=query, field="themes")
135 tags = search.hint_tags(query, pdcounter=True, prefix=False)
136 tags = split_tags(tags)
138 author_results = search.search_phrase(query, 'authors', book=True)
139 translator_results = search.search_phrase(query, 'translators', book=True)
141 title_results = search.search_phrase(query, 'title', book=True)
143 # Boost main author/title results with mixed search, and save some of its results for end of list.
144 # boost author, title results
145 author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
146 author_title_rest = []
148 for b in author_title_mixed:
149 also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
150 for b2 in also_in_mixed:
152 if also_in_mixed is []:
153 author_title_rest.append(b)
155 # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
156 # Because the query is using only one field.
157 text_phrase = SearchResult.aggregate(
158 search.search_phrase(query, 'text', snippets=True, book=False),
159 search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
161 everywhere = search.search_everywhere(query, query_terms=theme_terms)
163 def already_found(results):
166 if e.book_id == r.book_id:
172 f = already_found(author_results + translator_results + title_results + text_phrase)
173 everywhere = filter(lambda x: not f(x), everywhere)
175 author_results = SearchResult.aggregate(author_results)
176 translator_results = SearchResult.aggregate(translator_results)
177 title_results = SearchResult.aggregate(title_results)
179 everywhere = SearchResult.aggregate(everywhere, author_title_rest)
181 for field, res in [('authors', author_results),
182 ('translators', translator_results),
183 ('title', title_results),
184 ('text', text_phrase),
185 ('text', everywhere)]:
186 res.sort(reverse=True)
188 search.get_snippets(r, query, field, 3)
192 def ensure_exists(r):
195 except Book.DoesNotExist:
198 author_results = filter(ensure_exists, author_results)
199 translator_results = filter(ensure_exists, translator_results)
200 title_results = filter(ensure_exists, title_results)
201 text_phrase = filter(ensure_exists, text_phrase)
202 everywhere = filter(ensure_exists, everywhere)
204 results = author_results + translator_results + title_results + text_phrase + everywhere
205 # ensure books do exists & sort them
206 for res in (author_results, translator_results, title_results, text_phrase, everywhere):
207 res.sort(reverse=True)
209 # We don't want to redirect to book text, but rather display result page even with one result.
210 # if len(results) == 1:
211 # fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
212 # if len(fragment_hits) == 1:
213 # #anchor = fragment_hits[0]['fragment']
214 # #frag = Fragment.objects.get(anchor=anchor)
215 # return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
216 # return HttpResponseRedirect(results[0].book.get_absolute_url())
217 if len(results) == 0:
218 form = PublishingSuggestForm(initial={"books": query + ", "})
219 return render_to_response('catalogue/search_no_hits.html',
223 'did_you_mean': suggestion},
224 context_instance=RequestContext(request))
226 return render_to_response('catalogue/search_multiple_hits.html',
229 'results': {'author': author_results,
230 'translator': translator_results,
231 'title': title_results,
232 'content': text_phrase,
233 'other': everywhere},
234 'did_you_mean': suggestion},
235 context_instance=RequestContext(request))