dc9e27b5138e709642cd4e2c1dd3164ee3389b51
[wolnelektury.git] / apps / search / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.shortcuts import render_to_response, get_object_or_404
7 from django.template import RequestContext
8 from django.views.decorators import cache
9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
10 from django.utils.translation import ugettext as _
11
12 from catalogue.utils import split_tags
13 from catalogue.models import Book, Tag, Fragment
14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
15 from catalogue.views import JSONResponse
16 from search import Search, SearchResult
17 from suggest.forms import PublishingSuggestForm
18 import re
19 #import enchant
20 import json
21
22
23 def match_word_re(word):
24     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
25         return r"\b%s\b" % word
26     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
27         return "[[:<:]]%s[[:>:]]" % word
28
29
30 query_syntax_chars = re.compile(r"[\\/*:(){}]")
31
32
33 def remove_query_syntax_chars(query, replace=' '):
34     return query_syntax_chars.sub(' ', query)
35
36
37 def did_you_mean(query, tokens):
38     return query
39     # change = {}
40     # for t in tokens:
41     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
42     #     if len(authors) > 0:
43     #         continue
44
45     #     if False:
46     #         if not dictionary.check(t):
47     #             try:
48     #                 change_to = dictionary.suggest(t)[0].lower()
49     #                 if change_to != t.lower():
50     #                     change[t] = change_to
51     #             except IndexError:
52     #                 pass
53
54     # if change == {}:
55     #     return None
56
57     # for frm, to in change.items():
58     #     query = query.replace(frm, to)
59
60     # return query
61
62
63 def hint(request):
64     prefix = request.GET.get('term', '')
65     if len(prefix) < 2:
66         return JSONResponse([])
67
68     prefix = remove_query_syntax_chars(prefix)
69
70     search = Search()
71     # tagi beda ograniczac tutaj
72     # ale tagi moga byc na ksiazce i na fragmentach
73     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
74     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
75
76     tags = search.hint_tags(prefix, pdcounter=True)
77     books = search.hint_books(prefix)
78
79     def is_dupe(tag):
80         if isinstance(tag, PDCounterAuthor):
81             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
82                 return True
83         elif isinstance(tag, PDCounterBook):
84             if filter(lambda b: b.slug == tag.slug, tags):
85                 return True
86         return False
87
88     tags = filter(lambda t: not is_dupe(t), tags)
89
90     def category_name(c):
91         if c.startswith('pd_'):
92             c = c[len('pd_'):]
93         return _(c)
94
95     callback = request.GET.get('callback', None)
96     data = [{'label': t.name,
97               'category': category_name(t.category),
98               'id': t.id,
99               'url': t.get_absolute_url()}
100               for t in tags] + \
101               [{'label': b.title,
102                 'category': _('book'),
103                 'id': b.id,
104                 'url': b.get_absolute_url()}
105                 for b in books]
106     if callback:
107         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
108                             content_type="application/json; charset=utf-8")
109     else:
110         return JSONResponse(data)
111
112
113 def main(request):
114     results = {}
115
116     results = None
117     query = None
118
119     query = request.GET.get('q', '')
120
121     if len(query) < 2:
122         return render_to_response('catalogue/search_too_short.html',
123                                   {'prefix': query},
124             context_instance=RequestContext(request))
125
126     query = remove_query_syntax_chars(query)
127     
128     search = Search()
129
130     theme_terms = search.index.analyze(text=query, field="themes_pl") \
131         + search.index.analyze(text=query, field="themes")
132
133             # change hints
134     tags = search.hint_tags(query, pdcounter=True, prefix=False)
135     tags = split_tags(tags)
136
137     author_results = search.search_phrase(query, 'authors', book=True)
138     translator_results = search.search_phrase(query, 'translators', book=True)
139
140     title_results = search.search_phrase(query, 'title', book=True)
141
142     # Boost main author/title results with mixed search, and save some of its results for end of list.
143     # boost author, title results
144     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
145     author_title_rest = []
146
147     for b in author_title_mixed:
148         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
149         for b2 in also_in_mixed:
150             b2.boost *= 1.1
151         if also_in_mixed is []:
152             author_title_rest.append(b)
153
154     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
155     # Because the query is using only one field.
156     text_phrase = SearchResult.aggregate(
157         search.search_phrase(query, 'text', snippets=True, book=False),
158         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
159
160     everywhere = search.search_everywhere(query, query_terms=theme_terms)
161
162     def already_found(results):
163         def f(e):
164             for r in results:
165                 if e.book_id == r.book_id:
166                     e.boost = 0.9
167                     results.append(e)
168                     return True
169             return False
170         return f
171     f = already_found(author_results + translator_results + title_results + text_phrase)
172     everywhere = filter(lambda x: not f(x), everywhere)
173
174     author_results = SearchResult.aggregate(author_results)
175     translator_results = SearchResult.aggregate(translator_results)
176     title_results = SearchResult.aggregate(title_results)
177
178     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
179
180     for field, res in [('authors', author_results),
181                        ('translators', translator_results),
182                        ('title', title_results),
183                        ('text', text_phrase),
184                        ('text', everywhere)]:
185         res.sort(reverse=True)
186         for r in res:
187             search.get_snippets(r, query, field, 3)
188
189     suggestion = u''
190
191     def ensure_exists(r):
192         try:
193             return r.book
194         except Book.DoesNotExist:
195             return False
196
197     author_results = filter(ensure_exists, author_results)
198     translator_results = filter(ensure_exists, translator_results)
199     title_results = filter(ensure_exists, title_results)
200     text_phrase = filter(ensure_exists, text_phrase)
201     everywhere = filter(ensure_exists, everywhere)
202
203     results = author_results + translator_results + title_results + text_phrase + everywhere
204     # ensure books do exists & sort them
205     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
206         res.sort(reverse=True)
207
208     # We don't want to redirect to book text, but rather display result page even with one result.
209     # if len(results) == 1:
210     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
211     #     if len(fragment_hits) == 1:
212     #         #anchor = fragment_hits[0]['fragment']
213     #         #frag = Fragment.objects.get(anchor=anchor)
214     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
215     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
216     if len(results) == 0:
217         form = PublishingSuggestForm(initial={"books": query + ", "})
218         return render_to_response('catalogue/search_no_hits.html',
219                                   {'tags': tags,
220                                    'prefix': query,
221                                    "form": form,
222                                    'did_you_mean': suggestion},
223             context_instance=RequestContext(request))
224
225     return render_to_response('catalogue/search_multiple_hits.html',
226                               {'tags': tags,
227                                'prefix': query,
228                                'results': {'author': author_results,
229                                            'translator': translator_results,
230                                            'title': title_results,
231                                            'content': text_phrase,
232                                            'other': everywhere},
233                                'did_you_mean': suggestion},
234         context_instance=RequestContext(request))