Merge branch 'master' of github.com:fnp/wolnelektury
[wolnelektury.git] / apps / search / views.py
1 # -*- coding: utf-8 -*-
2
3 from django.conf import settings
4 from django.shortcuts import render_to_response, get_object_or_404
5 from django.template import RequestContext
6 from django.views.decorators import cache
7 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
8 from django.utils.translation import ugettext as _
9
10 from catalogue.utils import split_tags
11 from catalogue.models import Book, Tag, Fragment
12 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
13 from catalogue.views import JSONResponse
14 from search import Search, SearchResult
15 from suggest.forms import PublishingSuggestForm
16 import re
17 #import enchant
18 import json
19
20
21 def match_word_re(word):
22     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
23         return r"\b%s\b" % word
24     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
25         return "[[:<:]]%s[[:>:]]" % word
26
27
28 query_syntax_chars = re.compile(r"[\\/*:(){}]")
29
30
31 def remove_query_syntax_chars(query, replace=' '):
32     return query_syntax_chars.sub(' ', query)
33
34
35 def did_you_mean(query, tokens):
36     return query
37     # change = {}
38     # for t in tokens:
39     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
40     #     if len(authors) > 0:
41     #         continue
42
43     #     if False:
44     #         if not dictionary.check(t):
45     #             try:
46     #                 change_to = dictionary.suggest(t)[0].lower()
47     #                 if change_to != t.lower():
48     #                     change[t] = change_to
49     #             except IndexError:
50     #                 pass
51
52     # if change == {}:
53     #     return None
54
55     # for frm, to in change.items():
56     #     query = query.replace(frm, to)
57
58     # return query
59
60
61 def hint(request):
62     prefix = request.GET.get('term', '')
63     if len(prefix) < 2:
64         return JSONResponse([])
65
66     prefix = remove_query_syntax_chars(prefix)
67
68     search = Search()
69     # tagi beda ograniczac tutaj
70     # ale tagi moga byc na ksiazce i na fragmentach
71     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
72     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
73
74     tags = search.hint_tags(prefix, pdcounter=True)
75     books = search.hint_books(prefix)
76
77     def is_dupe(tag):
78         if isinstance(tag, PDCounterAuthor):
79             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
80                 return True
81         elif isinstance(tag, PDCounterBook):
82             if filter(lambda b: b.slug == tag.slug, tags):
83                 return True
84         return False
85
86     tags = filter(lambda t: not is_dupe(t), tags)
87
88     def category_name(c):
89         if c.startswith('pd_'):
90             c = c[len('pd_'):]
91         return _(c)
92
93     callback = request.GET.get('callback', None)
94     data = [{'label': t.name,
95               'category': category_name(t.category),
96               'id': t.id,
97               'url': t.get_absolute_url()}
98               for t in tags] + \
99               [{'label': b.title,
100                 'category': _('book'),
101                 'id': b.id,
102                 'url': b.get_absolute_url()}
103                 for b in books]
104     if callback:
105         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
106                             content_type="application/json; charset=utf-8")
107     else:
108         return JSONResponse(data)
109
110
111 def main(request):
112     results = {}
113
114     results = None
115     query = None
116
117     query = request.GET.get('q', '')
118
119     if len(query) < 2:
120         return render_to_response('catalogue/search_too_short.html',
121                                   {'prefix': query},
122             context_instance=RequestContext(request))
123
124     query = remove_query_syntax_chars(query)
125     
126     search = Search()
127
128     theme_terms = search.index.analyze(text=query, field="themes_pl") \
129         + search.index.analyze(text=query, field="themes")
130
131             # change hints
132     tags = search.hint_tags(query, pdcounter=True, prefix=False)
133     tags = split_tags(tags)
134
135     author_results = search.search_phrase(query, 'authors', book=True)
136     translator_results = search.search_phrase(query, 'translators', book=True)
137
138     title_results = search.search_phrase(query, 'title', book=True)
139
140     # Boost main author/title results with mixed search, and save some of its results for end of list.
141     # boost author, title results
142     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
143     author_title_rest = []
144
145     for b in author_title_mixed:
146         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
147         for b2 in also_in_mixed:
148             b2.boost *= 1.1
149         if also_in_mixed is []:
150             author_title_rest.append(b)
151
152     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
153     # Because the query is using only one field.
154     text_phrase = SearchResult.aggregate(
155         search.search_phrase(query, 'text', snippets=True, book=False),
156         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
157
158     everywhere = search.search_everywhere(query, query_terms=theme_terms)
159
160     def already_found(results):
161         def f(e):
162             for r in results:
163                 if e.book_id == r.book_id:
164                     e.boost = 0.9
165                     results.append(e)
166                     return True
167             return False
168         return f
169     f = already_found(author_results + translator_results + title_results + text_phrase)
170     everywhere = filter(lambda x: not f(x), everywhere)
171
172     author_results = SearchResult.aggregate(author_results)
173     translator_results = SearchResult.aggregate(translator_results)
174     title_results = SearchResult.aggregate(title_results)
175
176     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
177
178     for field, res in [('authors', author_results),
179                        ('translators', translator_results),
180                        ('title', title_results),
181                        ('text', text_phrase),
182                        ('text', everywhere)]:
183         res.sort(reverse=True)
184         for r in res:
185             search.get_snippets(r, query, field, 3)
186
187     suggestion = u''
188
189     def ensure_exists(r):
190         try:
191             return r.book
192         except Book.DoesNotExist:
193             return False
194
195     author_results = filter(ensure_exists, author_results)
196     translator_results = filter(ensure_exists, translator_results)
197     title_results = filter(ensure_exists, title_results)
198     text_phrase = filter(ensure_exists, text_phrase)
199     everywhere = filter(ensure_exists, everywhere)
200
201     results = author_results + translator_results + title_results + text_phrase + everywhere
202     # ensure books do exists & sort them
203     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
204         res.sort(reverse=True)
205
206     # We don't want to redirect to book text, but rather display result page even with one result.
207     # if len(results) == 1:
208     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
209     #     if len(fragment_hits) == 1:
210     #         #anchor = fragment_hits[0]['fragment']
211     #         #frag = Fragment.objects.get(anchor=anchor)
212     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
213     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
214     if len(results) == 0:
215         form = PublishingSuggestForm(initial={"books": query + ", "})
216         return render_to_response('catalogue/search_no_hits.html',
217                                   {'tags': tags,
218                                    'prefix': query,
219                                    "form": form,
220                                    'did_you_mean': suggestion},
221             context_instance=RequestContext(request))
222
223     return render_to_response('catalogue/search_multiple_hits.html',
224                               {'tags': tags,
225                                'prefix': query,
226                                'results': {'author': author_results,
227                                            'translator': translator_results,
228                                            'title': title_results,
229                                            'content': text_phrase,
230                                            'other': everywhere},
231                                'did_you_mean': suggestion},
232         context_instance=RequestContext(request))