514dc6e1fce261c77e7e73cefffd886d7bb25ce4
[wolnelektury.git] / apps / search / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.shortcuts import render_to_response, get_object_or_404
7 from django.template import RequestContext
8 from django.views.decorators import cache
9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
10 from django.utils.translation import ugettext as _
11
12 from catalogue.utils import split_tags
13 from catalogue.models import Book, Tag, Fragment
14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
15 from search.index import Search, SearchResult
16 from suggest.forms import PublishingSuggestForm
17 import re
18 #import enchant
19 import json
20
21
22 def match_word_re(word):
23     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24         return r"\b%s\b" % word
25     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26         return "[[:<:]]%s[[:>:]]" % word
27
28
29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
30
31
32 def remove_query_syntax_chars(query, replace=' '):
33     return query_syntax_chars.sub(' ', query)
34
35
36 def did_you_mean(query, tokens):
37     return query
38     # change = {}
39     # for t in tokens:
40     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41     #     if len(authors) > 0:
42     #         continue
43
44     #     if False:
45     #         if not dictionary.check(t):
46     #             try:
47     #                 change_to = dictionary.suggest(t)[0].lower()
48     #                 if change_to != t.lower():
49     #                     change[t] = change_to
50     #             except IndexError:
51     #                 pass
52
53     # if change == {}:
54     #     return None
55
56     # for frm, to in change.items():
57     #     query = query.replace(frm, to)
58
59     # return query
60
61
62 def hint(request):
63     prefix = request.GET.get('term', '')
64     if len(prefix) < 2:
65         return JsonResponse([])
66
67     prefix = remove_query_syntax_chars(prefix)
68
69     search = Search()
70     # tagi beda ograniczac tutaj
71     # ale tagi moga byc na ksiazce i na fragmentach
72     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
73     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
74
75     tags = search.hint_tags(prefix, pdcounter=True)
76     books = search.hint_books(prefix)
77
78     def is_dupe(tag):
79         if isinstance(tag, PDCounterAuthor):
80             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
81                 return True
82         elif isinstance(tag, PDCounterBook):
83             if filter(lambda b: b.slug == tag.slug, tags):
84                 return True
85         return False
86
87     tags = filter(lambda t: not is_dupe(t), tags)
88
89     def category_name(c):
90         if c.startswith('pd_'):
91             c = c[len('pd_'):]
92         return _(c)
93
94     callback = request.GET.get('callback', None)
95     data = [{'label': t.name,
96               'category': category_name(t.category),
97               'id': t.id,
98               'url': t.get_absolute_url()}
99               for t in tags] + \
100               [{'label': b.title,
101                 'category': _('book'),
102                 'id': b.id,
103                 'url': b.get_absolute_url()}
104                 for b in books]
105     if callback:
106         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
107                             content_type="application/json; charset=utf-8")
108     else:
109         return JsonResponse(data)
110
111
112 def main(request):
113     results = {}
114
115     results = None
116     query = None
117
118     query = request.GET.get('q', '')
119
120     if len(query) < 2:
121         return render_to_response('catalogue/search_too_short.html',
122                                   {'prefix': query},
123             context_instance=RequestContext(request))
124
125     query = remove_query_syntax_chars(query)
126     
127     search = Search()
128
129     theme_terms = search.index.analyze(text=query, field="themes_pl") \
130         + search.index.analyze(text=query, field="themes")
131
132             # change hints
133     tags = search.hint_tags(query, pdcounter=True, prefix=False)
134     tags = split_tags(tags)
135
136     author_results = search.search_phrase(query, 'authors', book=True)
137     translator_results = search.search_phrase(query, 'translators', book=True)
138
139     title_results = search.search_phrase(query, 'title', book=True)
140
141     # Boost main author/title results with mixed search, and save some of its results for end of list.
142     # boost author, title results
143     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
144     author_title_rest = []
145
146     for b in author_title_mixed:
147         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
148         for b2 in also_in_mixed:
149             b2.boost *= 1.1
150         if also_in_mixed is []:
151             author_title_rest.append(b)
152
153     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
154     # Because the query is using only one field.
155     text_phrase = SearchResult.aggregate(
156         search.search_phrase(query, 'text', snippets=True, book=False),
157         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
158
159     everywhere = search.search_everywhere(query, query_terms=theme_terms)
160
161     def already_found(results):
162         def f(e):
163             for r in results:
164                 if e.book_id == r.book_id:
165                     e.boost = 0.9
166                     results.append(e)
167                     return True
168             return False
169         return f
170     f = already_found(author_results + translator_results + title_results + text_phrase)
171     everywhere = filter(lambda x: not f(x), everywhere)
172
173     author_results = SearchResult.aggregate(author_results)
174     translator_results = SearchResult.aggregate(translator_results)
175     title_results = SearchResult.aggregate(title_results)
176
177     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
178
179     for field, res in [('authors', author_results),
180                        ('translators', translator_results),
181                        ('title', title_results),
182                        ('text', text_phrase),
183                        ('text', everywhere)]:
184         res.sort(reverse=True)
185         for r in res:
186             search.get_snippets(r, query, field, 3)
187
188     suggestion = u''
189
190     def ensure_exists(r):
191         try:
192             return r.book
193         except Book.DoesNotExist:
194             return False
195
196     author_results = filter(ensure_exists, author_results)
197     translator_results = filter(ensure_exists, translator_results)
198     title_results = filter(ensure_exists, title_results)
199     text_phrase = filter(ensure_exists, text_phrase)
200     everywhere = filter(ensure_exists, everywhere)
201
202     results = author_results + translator_results + title_results + text_phrase + everywhere
203     # ensure books do exists & sort them
204     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
205         res.sort(reverse=True)
206
207     # We don't want to redirect to book text, but rather display result page even with one result.
208     # if len(results) == 1:
209     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
210     #     if len(fragment_hits) == 1:
211     #         #anchor = fragment_hits[0]['fragment']
212     #         #frag = Fragment.objects.get(anchor=anchor)
213     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
214     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
215     if len(results) == 0:
216         form = PublishingSuggestForm(initial={"books": query + ", "})
217         return render_to_response('catalogue/search_no_hits.html',
218                                   {'tags': tags,
219                                    'prefix': query,
220                                    "form": form,
221                                    'did_you_mean': suggestion},
222             context_instance=RequestContext(request))
223
224     return render_to_response('catalogue/search_multiple_hits.html',
225                               {'tags': tags,
226                                'prefix': query,
227                                'results': {'author': author_results,
228                                            'translator': translator_results,
229                                            'title': title_results,
230                                            'content': text_phrase,
231                                            'other': everywhere},
232                                'did_you_mean': suggestion},
233         context_instance=RequestContext(request))