a50897199cb62799ecf77f688eb43f5633b3bfa1
[wolnelektury.git] / apps / search / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.shortcuts import render_to_response, get_object_or_404
7 from django.template import RequestContext
8 from django.views.decorators import cache
9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
10 from django.utils.translation import ugettext as _
11
12 from catalogue.utils import split_tags
13 from catalogue.models import Book, Tag, Fragment
14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
15 from search.index import Search, SearchResult
16 from suggest.forms import PublishingSuggestForm
17 import re
18 #import enchant
19 import json
20
21
22 def match_word_re(word):
23     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24         return r"\b%s\b" % word
25     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26         return "[[:<:]]%s[[:>:]]" % word
27
28
29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
30
31
32 def remove_query_syntax_chars(query, replace=' '):
33     return query_syntax_chars.sub(' ', query)
34
35
36 def did_you_mean(query, tokens):
37     return query
38     # change = {}
39     # for t in tokens:
40     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41     #     if len(authors) > 0:
42     #         continue
43
44     #     if False:
45     #         if not dictionary.check(t):
46     #             try:
47     #                 change_to = dictionary.suggest(t)[0].lower()
48     #                 if change_to != t.lower():
49     #                     change[t] = change_to
50     #             except IndexError:
51     #                 pass
52
53     # if change == {}:
54     #     return None
55
56     # for frm, to in change.items():
57     #     query = query.replace(frm, to)
58
59     # return query
60
61
62 @cache.never_cache
63 def hint(request):
64     prefix = request.GET.get('term', '')
65     if len(prefix) < 2:
66         return JsonResponse([], safe=False)
67
68     prefix = remove_query_syntax_chars(prefix)
69
70     search = Search()
71     # tagi beda ograniczac tutaj
72     # ale tagi moga byc na ksiazce i na fragmentach
73     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
74     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
75
76     tags = search.hint_tags(prefix, pdcounter=True)
77     books = search.hint_books(prefix)
78
79     def is_dupe(tag):
80         if isinstance(tag, PDCounterAuthor):
81             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
82                 return True
83         elif isinstance(tag, PDCounterBook):
84             if filter(lambda b: b.slug == tag.slug, tags):
85                 return True
86         return False
87
88     tags = filter(lambda t: not is_dupe(t), tags)
89
90     def category_name(c):
91         if c.startswith('pd_'):
92             c = c[len('pd_'):]
93         return _(c)
94
95     callback = request.GET.get('callback', None)
96     data = [{'label': t.name,
97               'category': category_name(t.category),
98               'id': t.id,
99               'url': t.get_absolute_url()}
100               for t in tags] + \
101               [{'label': b.title,
102                 'category': _('book'),
103                 'id': b.id,
104                 'url': b.get_absolute_url()}
105                 for b in books]
106     if callback:
107         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
108                             content_type="application/json; charset=utf-8")
109     else:
110         return JsonResponse(data, safe=False)
111
112
113 @cache.never_cache
114 def main(request):
115     results = {}
116
117     results = None
118     query = None
119
120     query = request.GET.get('q', '')
121
122     if len(query) < 2:
123         return render_to_response('catalogue/search_too_short.html',
124                                   {'prefix': query},
125             context_instance=RequestContext(request))
126
127     query = remove_query_syntax_chars(query)
128     
129     search = Search()
130
131     theme_terms = search.index.analyze(text=query, field="themes_pl") \
132         + search.index.analyze(text=query, field="themes")
133
134             # change hints
135     tags = search.hint_tags(query, pdcounter=True, prefix=False)
136     tags = split_tags(tags)
137
138     author_results = search.search_phrase(query, 'authors', book=True)
139     translator_results = search.search_phrase(query, 'translators', book=True)
140
141     title_results = search.search_phrase(query, 'title', book=True)
142
143     # Boost main author/title results with mixed search, and save some of its results for end of list.
144     # boost author, title results
145     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
146     author_title_rest = []
147
148     for b in author_title_mixed:
149         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
150         for b2 in also_in_mixed:
151             b2.boost *= 1.1
152         if also_in_mixed is []:
153             author_title_rest.append(b)
154
155     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
156     # Because the query is using only one field.
157     text_phrase = SearchResult.aggregate(
158         search.search_phrase(query, 'text', snippets=True, book=False),
159         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
160
161     everywhere = search.search_everywhere(query, query_terms=theme_terms)
162
163     def already_found(results):
164         def f(e):
165             for r in results:
166                 if e.book_id == r.book_id:
167                     e.boost = 0.9
168                     results.append(e)
169                     return True
170             return False
171         return f
172     f = already_found(author_results + translator_results + title_results + text_phrase)
173     everywhere = filter(lambda x: not f(x), everywhere)
174
175     author_results = SearchResult.aggregate(author_results)
176     translator_results = SearchResult.aggregate(translator_results)
177     title_results = SearchResult.aggregate(title_results)
178
179     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
180
181     for field, res in [('authors', author_results),
182                        ('translators', translator_results),
183                        ('title', title_results),
184                        ('text', text_phrase),
185                        ('text', everywhere)]:
186         res.sort(reverse=True)
187         for r in res:
188             search.get_snippets(r, query, field, 3)
189
190     suggestion = u''
191
192     def ensure_exists(r):
193         try:
194             return r.book
195         except Book.DoesNotExist:
196             return False
197
198     author_results = filter(ensure_exists, author_results)
199     translator_results = filter(ensure_exists, translator_results)
200     title_results = filter(ensure_exists, title_results)
201     text_phrase = filter(ensure_exists, text_phrase)
202     everywhere = filter(ensure_exists, everywhere)
203
204     results = author_results + translator_results + title_results + text_phrase + everywhere
205     # ensure books do exists & sort them
206     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
207         res.sort(reverse=True)
208
209     # We don't want to redirect to book text, but rather display result page even with one result.
210     # if len(results) == 1:
211     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
212     #     if len(fragment_hits) == 1:
213     #         #anchor = fragment_hits[0]['fragment']
214     #         #frag = Fragment.objects.get(anchor=anchor)
215     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
216     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
217     if len(results) == 0:
218         form = PublishingSuggestForm(initial={"books": query + ", "})
219         return render_to_response('catalogue/search_no_hits.html',
220                                   {'tags': tags,
221                                    'prefix': query,
222                                    "form": form,
223                                    'did_you_mean': suggestion},
224             context_instance=RequestContext(request))
225
226     return render_to_response('catalogue/search_multiple_hits.html',
227                               {'tags': tags,
228                                'prefix': query,
229                                'results': {'author': author_results,
230                                            'translator': translator_results,
231                                            'title': title_results,
232                                            'content': text_phrase,
233                                            'other': everywhere},
234                                'did_you_mean': suggestion},
235         context_instance=RequestContext(request))