simplify search results list
[wolnelektury.git] / src / search / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.shortcuts import render_to_response
7 from django.template import RequestContext
8 from django.views.decorators import cache
9 from django.http import HttpResponse, JsonResponse
10 from django.utils.translation import ugettext as _
11
12 from catalogue.utils import split_tags
13 from catalogue.models import Book
14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
15 from search.index import Search, SearchResult
16 from suggest.forms import PublishingSuggestForm
17 import re
18 import json
19
20
21 def match_word_re(word):
22     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
23         return r"\b%s\b" % word
24     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
25         return "[[:<:]]%s[[:>:]]" % word
26
27
28 query_syntax_chars = re.compile(r"[\\/*:(){}]")
29
30
31 def remove_query_syntax_chars(query, replace=' '):
32     return query_syntax_chars.sub(' ', query)
33
34
35 def did_you_mean(query, tokens):
36     return query
37     # change = {}
38     # for t in tokens:
39     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
40     #     if len(authors) > 0:
41     #         continue
42
43     #     if False:
44     #         if not dictionary.check(t):
45     #             try:
46     #                 change_to = dictionary.suggest(t)[0].lower()
47     #                 if change_to != t.lower():
48     #                     change[t] = change_to
49     #             except IndexError:
50     #                 pass
51
52     # if change == {}:
53     #     return None
54
55     # for frm, to in change.items():
56     #     query = query.replace(frm, to)
57
58     # return query
59
60
61 @cache.never_cache
62 def hint(request):
63     prefix = request.GET.get('term', '')
64     if len(prefix) < 2:
65         return JsonResponse([], safe=False)
66
67     prefix = remove_query_syntax_chars(prefix)
68
69     search = Search()
70     # tagi beda ograniczac tutaj
71     # ale tagi moga byc na ksiazce i na fragmentach
72     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
73     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
74
75     def is_dupe(tag):
76         if isinstance(tag, PDCounterAuthor):
77             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
78                 return True
79         elif isinstance(tag, PDCounterBook):
80             if filter(lambda b: b.slug == tag.slug, tags):
81                 return True
82         return False
83
84     def category_name(c):
85         if c.startswith('pd_'):
86             c = c[len('pd_'):]
87         return _(c)
88
89     try:
90         limit = int(request.GET.get('max', ''))
91     except ValueError:
92         limit = -1
93     else:
94         if limit < 1:
95             limit = -1
96
97     data = []
98
99     tags = search.hint_tags(prefix, pdcounter=True)
100     tags = filter(lambda t: not is_dupe(t), tags)
101     for t in tags:
102         if not limit:
103             break
104         limit -= 1
105         data.append({
106             'label': t.name,
107             'category': category_name(t.category),
108             'id': t.id,
109             'url': t.get_absolute_url()
110             })
111     if limit:
112         books = search.hint_books(prefix)
113         for b in books:
114             if not limit:
115                 break
116             limit -= 1
117             data.append({
118                 'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
119                 'category': _('book'),
120                 'id': b.id,
121                 'url': b.get_absolute_url()
122                 })
123
124     callback = request.GET.get('callback', None)
125     if callback:
126         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
127                             content_type="application/json; charset=utf-8")
128     else:
129         return JsonResponse(data, safe=False)
130
131
132 @cache.never_cache
133 def main(request):
134     query = request.GET.get('q', '')
135     query = ' '.join(query.split())
136     # filter out private use characters
137     import unicodedata
138     query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
139
140     if len(query) < 2:
141         return render_to_response(
142             'catalogue/search_too_short.html', {'prefix': query},
143             context_instance=RequestContext(request))
144     elif len(query) > 256:
145         return render_to_response(
146             'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
147
148     query = remove_query_syntax_chars(query)
149
150     words = query.split()
151     if len(words) > 10:
152         query = ' '.join(words[:10])
153
154     search = Search()
155
156     tags = search.hint_tags(query, pdcounter=True, prefix=False)
157     tags = split_tags(tags)
158
159     results_parts = []
160
161     search_fields = []
162     fieldsets = (
163         (['authors'], True),
164         (['title'], True),
165         (['metadata'], True),
166         (['text', 'themes_pl'], False),
167     )
168     for fieldset, is_book in fieldsets:
169         search_fields += fieldset
170         results_parts.append(search.search_words(words, search_fields, book=is_book))
171
172     results = []
173     ids_results = {}
174     for results_part in results_parts:
175         for result in sorted(SearchResult.aggregate(results_part), reverse=True):
176             book_id = result.book_id
177             if book_id in ids_results:
178                 ids_results[book_id].merge(result)
179             else:
180                 results.append(result)
181                 ids_results[book_id] = result
182
183     for result in results:
184         search.get_snippets(result, query, num=3)
185
186     suggestion = u''
187
188     def ensure_exists(r):
189         try:
190             return r.book
191         except Book.DoesNotExist:
192             return False
193
194     results = filter(ensure_exists, results)
195
196     if not results:
197         form = PublishingSuggestForm(initial={"books": query + ", "})
198         return render_to_response(
199             'catalogue/search_no_hits.html',
200             {
201                 'tags': tags,
202                 'prefix': query,
203                 'form': form,
204                 'did_you_mean': suggestion
205             },
206             context_instance=RequestContext(request))
207
208     return render_to_response(
209         'catalogue/search_multiple_hits.html',
210         {
211             'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
212             'prefix': query,
213             'results': results,
214             'did_you_mean': suggestion
215         },
216         context_instance=RequestContext(request))