Off by one error
[wolnelektury.git] / apps / search / views.py
1 # -*- coding: utf-8 -*-
2
3 from django.conf import settings
4 from django.shortcuts import render_to_response, get_object_or_404
5 from django.template import RequestContext
6 from django.contrib.auth.decorators import login_required
7 from django.views.decorators import cache
8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
9 from django.utils.translation import ugettext as _
10
11 from catalogue.utils import split_tags
12 from catalogue.models import Book, Tag, Fragment
13 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
14 from catalogue.views import JSONResponse
15 from search import Search, SearchResult
16 from lucene import StringReader
17 from suggest.forms import PublishingSuggestForm
18 from time import sleep
19 import re
20 #import enchant
21 import json
22
23
24 def match_word_re(word):
25     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
26         return r"\b%s\b" % word
27     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
28         return "[[:<:]]%s[[:>:]]" % word
29
30
31 def did_you_mean(query, tokens):
32     return query
33     # change = {}
34     # for t in tokens:
35     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
36     #     if len(authors) > 0:
37     #         continue
38
39     #     if False:
40     #         if not dictionary.check(t):
41     #             try:
42     #                 change_to = dictionary.suggest(t)[0].lower()
43     #                 if change_to != t.lower():
44     #                     change[t] = change_to
45     #             except IndexError:
46     #                 pass
47
48     # if change == {}:
49     #     return None
50
51     # for frm, to in change.items():
52     #     query = query.replace(frm, to)
53
54     # return query
55
56
57 def hint(request):
58     prefix = request.GET.get('term', '')
59     if len(prefix) < 2:
60         return JSONResponse([])
61
62     search = Search()
63     # tagi beda ograniczac tutaj
64     # ale tagi moga byc na ksiazce i na fragmentach
65     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
66     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
67
68     tags = search.hint_tags(prefix, pdcounter=True)
69     books = search.hint_books(prefix)
70
71     def is_dupe(tag):
72         if isinstance(tag, PDCounterAuthor):
73             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
74                 return True
75         elif isinstance(tag, PDCounterBook):
76             if filter(lambda b: b.slug == tag.slug, tags):
77                 return True
78         return False
79
80     tags = filter(lambda t: not is_dupe(t), tags)
81
82     def category_name(c):
83         if c.startswith('pd_'):
84             c = c[len('pd_'):]
85         return _(c)
86
87     callback = request.GET.get('callback', None)
88     data = [{'label': t.name,
89               'category': category_name(t.category),
90               'id': t.id,
91               'url': t.get_absolute_url()}
92               for t in tags] + \
93               [{'label': b.title,
94                 'category': _('book'),
95                 'id': b.id,
96                 'url': b.get_absolute_url()}
97                 for b in books]
98     if callback:
99         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
100                             content_type="application/json; charset=utf-8")
101     else:
102         return JSONResponse(data)
103
104
105 def main(request):
106     results = {}
107
108     results = None
109     query = None
110
111     query = request.GET.get('q', '')
112
113     if len(query) < 2:
114         return render_to_response('catalogue/search_too_short.html',
115                                   {'prefix': query},
116             context_instance=RequestContext(request))
117     search = Search()
118
119     theme_terms = search.index.analyze(text=query, field="themes_pl") \
120         + search.index.analyze(text=query, field="themes")
121
122             # change hints
123     tags = search.hint_tags(query, pdcounter=True, prefix=False)
124     tags = split_tags(tags)
125
126     author_results = search.search_phrase(query, 'authors', book=True)
127     translator_results = search.search_phrase(query, 'translators', book=True)
128
129     title_results = search.search_phrase(query, 'title', book=True)
130
131     # Boost main author/title results with mixed search, and save some of its results for end of list.
132     # boost author, title results
133     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
134     author_title_rest = []
135
136     for b in author_title_mixed:
137         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
138         for b2 in also_in_mixed:
139             b2.boost *= 1.1
140         if also_in_mixed is []:
141             author_title_rest.append(b)
142
143     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
144     # Because the query is using only one field.
145     text_phrase = SearchResult.aggregate(
146         search.search_phrase(query, 'text', snippets=True, book=False),
147         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
148
149     everywhere = search.search_everywhere(query, query_terms=theme_terms)
150
151     def already_found(results):
152         def f(e):
153             for r in results:
154                 if e.book_id == r.book_id:
155                     e.boost = 0.9
156                     results.append(e)
157                     return True
158             return False
159         return f
160     f = already_found(author_results + translator_results + title_results + text_phrase)
161     everywhere = filter(lambda x: not f(x), everywhere)
162
163     author_results = SearchResult.aggregate(author_results)
164     translator_results = SearchResult.aggregate(translator_results)
165     title_results = SearchResult.aggregate(title_results)
166
167     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
168
169     for field, res in [('authors', author_results),
170                        ('translators', translator_results),
171                        ('title', title_results),
172                        ('text', text_phrase),
173                        ('text', everywhere)]:
174         res.sort(reverse=True)
175         for r in res:
176             search.get_snippets(r, query, field, 3)
177
178     suggestion = u''
179
180     def ensure_exists(r):
181         try:
182             return r.book
183         except Book.DoesNotExist:
184             return False
185
186     author_results = filter(ensure_exists, author_results)
187     translator_results = filter(ensure_exists, translator_results)
188     title_results = filter(ensure_exists, title_results)
189     text_phrase = filter(ensure_exists, text_phrase)
190     everywhere = filter(ensure_exists, everywhere)
191
192     results = author_results + translator_results + title_results + text_phrase + everywhere
193     # ensure books do exists & sort them
194     results.sort(reverse=True)
195
196     # We don't want to redirect to book text, but rather display result page even with one result.
197     # if len(results) == 1:
198     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
199     #     if len(fragment_hits) == 1:
200     #         #anchor = fragment_hits[0]['fragment']
201     #         #frag = Fragment.objects.get(anchor=anchor)
202     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
203     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
204     if len(results) == 0:
205         form = PublishingSuggestForm(initial={"books": query + ", "})
206         return render_to_response('catalogue/search_no_hits.html',
207                                   {'tags': tags,
208                                    'prefix': query,
209                                    "form": form,
210                                    'did_you_mean': suggestion},
211             context_instance=RequestContext(request))
212
213     return render_to_response('catalogue/search_multiple_hits.html',
214                               {'tags': tags,
215                                'prefix': query,
216                                'results': {'author': author_results,
217                                            'translator': translator_results,
218                                            'title': title_results,
219                                            'content': text_phrase,
220                                            'other': everywhere},
221                                'did_you_mean': suggestion},
222         context_instance=RequestContext(request))