1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.shortcuts import render_to_response, get_object_or_404
7 from django.template import RequestContext
8 from django.views.decorators import cache
9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
10 from django.utils.translation import ugettext as _
12 from catalogue.utils import split_tags
13 from catalogue.models import Book, Tag, Fragment
14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
15 from search.index import Search, SearchResult
16 from suggest.forms import PublishingSuggestForm
22 def match_word_re(word):
23 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24 return r"\b%s\b" % word
25 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26 return "[[:<:]]%s[[:>:]]" % word
29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
32 def remove_query_syntax_chars(query, replace=' '):
33 return query_syntax_chars.sub(' ', query)
36 def did_you_mean(query, tokens):
40 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41 # if len(authors) > 0:
45 # if not dictionary.check(t):
47 # change_to = dictionary.suggest(t)[0].lower()
48 # if change_to != t.lower():
49 # change[t] = change_to
56 # for frm, to in change.items():
57 # query = query.replace(frm, to)
63 prefix = request.GET.get('term', '')
65 return JsonResponse([])
67 prefix = remove_query_syntax_chars(prefix)
70 # tagi beda ograniczac tutaj
71 # ale tagi moga byc na ksiazce i na fragmentach
72 # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
73 # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
75 tags = search.hint_tags(prefix, pdcounter=True)
76 books = search.hint_books(prefix)
79 if isinstance(tag, PDCounterAuthor):
80 if filter(lambda t: t.slug == tag.slug and t != tag, tags):
82 elif isinstance(tag, PDCounterBook):
83 if filter(lambda b: b.slug == tag.slug, tags):
87 tags = filter(lambda t: not is_dupe(t), tags)
90 if c.startswith('pd_'):
94 callback = request.GET.get('callback', None)
95 data = [{'label': t.name,
96 'category': category_name(t.category),
98 'url': t.get_absolute_url()}
101 'category': _('book'),
103 'url': b.get_absolute_url()}
106 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
107 content_type="application/json; charset=utf-8")
109 return JsonResponse(data)
118 query = request.GET.get('q', '')
121 return render_to_response('catalogue/search_too_short.html',
123 context_instance=RequestContext(request))
125 query = remove_query_syntax_chars(query)
129 theme_terms = search.index.analyze(text=query, field="themes_pl") \
130 + search.index.analyze(text=query, field="themes")
133 tags = search.hint_tags(query, pdcounter=True, prefix=False)
134 tags = split_tags(tags)
136 author_results = search.search_phrase(query, 'authors', book=True)
137 translator_results = search.search_phrase(query, 'translators', book=True)
139 title_results = search.search_phrase(query, 'title', book=True)
141 # Boost main author/title results with mixed search, and save some of its results for end of list.
142 # boost author, title results
143 author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
144 author_title_rest = []
146 for b in author_title_mixed:
147 also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
148 for b2 in also_in_mixed:
150 if also_in_mixed is []:
151 author_title_rest.append(b)
153 # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
154 # Because the query is using only one field.
155 text_phrase = SearchResult.aggregate(
156 search.search_phrase(query, 'text', snippets=True, book=False),
157 search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
159 everywhere = search.search_everywhere(query, query_terms=theme_terms)
161 def already_found(results):
164 if e.book_id == r.book_id:
170 f = already_found(author_results + translator_results + title_results + text_phrase)
171 everywhere = filter(lambda x: not f(x), everywhere)
173 author_results = SearchResult.aggregate(author_results)
174 translator_results = SearchResult.aggregate(translator_results)
175 title_results = SearchResult.aggregate(title_results)
177 everywhere = SearchResult.aggregate(everywhere, author_title_rest)
179 for field, res in [('authors', author_results),
180 ('translators', translator_results),
181 ('title', title_results),
182 ('text', text_phrase),
183 ('text', everywhere)]:
184 res.sort(reverse=True)
186 search.get_snippets(r, query, field, 3)
190 def ensure_exists(r):
193 except Book.DoesNotExist:
196 author_results = filter(ensure_exists, author_results)
197 translator_results = filter(ensure_exists, translator_results)
198 title_results = filter(ensure_exists, title_results)
199 text_phrase = filter(ensure_exists, text_phrase)
200 everywhere = filter(ensure_exists, everywhere)
202 results = author_results + translator_results + title_results + text_phrase + everywhere
203 # ensure books do exists & sort them
204 for res in (author_results, translator_results, title_results, text_phrase, everywhere):
205 res.sort(reverse=True)
207 # We don't want to redirect to book text, but rather display result page even with one result.
208 # if len(results) == 1:
209 # fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
210 # if len(fragment_hits) == 1:
211 # #anchor = fragment_hits[0]['fragment']
212 # #frag = Fragment.objects.get(anchor=anchor)
213 # return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
214 # return HttpResponseRedirect(results[0].book.get_absolute_url())
215 if len(results) == 0:
216 form = PublishingSuggestForm(initial={"books": query + ", "})
217 return render_to_response('catalogue/search_no_hits.html',
221 'did_you_mean': suggestion},
222 context_instance=RequestContext(request))
224 return render_to_response('catalogue/search_multiple_hits.html',
227 'results': {'author': author_results,
228 'translator': translator_results,
229 'title': title_results,
230 'content': text_phrase,
231 'other': everywhere},
232 'did_you_mean': suggestion},
233 context_instance=RequestContext(request))