search: no tag boxes + smarter pd boxes
[wolnelektury.git] / src / search / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.http.response import HttpResponseRedirect
7 from django.shortcuts import render_to_response
8 from django.template import RequestContext
9 from django.views.decorators import cache
10 from django.http import HttpResponse, JsonResponse
11
12 from catalogue.models import Book, Tag
13 from pdcounter.models import Author
14 from search.index import Search, SearchResult
15 from suggest.forms import PublishingSuggestForm
16 import re
17 import json
18
19 from wolnelektury.utils import re_escape
20
21
22 def match_word_re(word):
23     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24         return r"\b%s\b" % word
25     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26         return "[[:<:]]%s[[:>:]]" % word
27
28
29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
30
31
32 def remove_query_syntax_chars(query, replace=' '):
33     return query_syntax_chars.sub(replace, query)
34
35
36 def did_you_mean(query, tokens):
37     return query
38     # change = {}
39     # for t in tokens:
40     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41     #     if len(authors) > 0:
42     #         continue
43
44     #     if False:
45     #         if not dictionary.check(t):
46     #             try:
47     #                 change_to = dictionary.suggest(t)[0].lower()
48     #                 if change_to != t.lower():
49     #                     change[t] = change_to
50     #             except IndexError:
51     #                 pass
52
53     # if change == {}:
54     #     return None
55
56     # for frm, to in change.items():
57     #     query = query.replace(frm, to)
58
59     # return query
60
61
62 @cache.never_cache
63 def hint(request):
64     prefix = request.GET.get('term', '')
65     if len(prefix) < 2:
66         return JsonResponse([], safe=False)
67
68     prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
69
70     try:
71         limit = int(request.GET.get('max', ''))
72     except ValueError:
73         limit = 20
74     else:
75         if limit < 1:
76             limit = 20
77
78     authors = Tag.objects.filter(
79         category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
80     data = [
81         {
82             'label': author.name,
83             'id': author.id,
84             'url': author.get_absolute_url(),
85         }
86         for author in authors[:limit]
87     ]
88     if len(data) < limit:
89         data += [
90             {
91                 'label': b.title,
92                 'author': b.author_unicode(),
93                 'id': b.id,
94                 'url': b.get_absolute_url()
95             }
96             for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
97         ]
98     callback = request.GET.get('callback', None)
99     if callback:
100         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
101                             content_type="application/json; charset=utf-8")
102     else:
103         return JsonResponse(data, safe=False)
104
105
106 @cache.never_cache
107 def main(request):
108     query = request.GET.get('q', '')
109     query = ' '.join(query.split())
110     # filter out private use characters
111     import unicodedata
112     query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
113
114     if len(query) < 2:
115         return render_to_response(
116             'catalogue/search_too_short.html', {'prefix': query},
117             context_instance=RequestContext(request))
118     elif len(query) > 256:
119         return render_to_response(
120             'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
121
122     query = remove_query_syntax_chars(query)
123
124     words = query.split()
125     if len(words) > 10:
126         query = ' '.join(words[:10])
127
128     search = Search()
129
130     pd_authors = Author.objects.filter(name__icontains=query)
131     existing_slugs = Tag.objects.filter(
132         category='author', slug__in=list(pd_authors.values_list('slug', flat=True)))\
133         .values_list('slug', flat=True)
134     pd_authors = pd_authors.exclude(slug__in=existing_slugs)
135
136     results_parts = []
137
138     search_fields = []
139     fieldsets = (
140         (['authors'], True),
141         (['title'], True),
142         (['metadata'], True),
143         (['text', 'themes_pl'], False),
144     )
145     for fieldset, is_book in fieldsets:
146         search_fields += fieldset
147         results_parts.append(search.search_words(words, search_fields, book=is_book))
148
149     results = []
150     ids_results = {}
151     for results_part in results_parts:
152         for result in sorted(SearchResult.aggregate(results_part), reverse=True):
153             book_id = result.book_id
154             if book_id in ids_results:
155                 ids_results[book_id].merge(result)
156             else:
157                 results.append(result)
158                 ids_results[book_id] = result
159
160     for result in results:
161         search.get_snippets(result, query, num=3)
162
163     suggestion = u''
164
165     def ensure_exists(r):
166         try:
167             return r.book
168         except Book.DoesNotExist:
169             return False
170
171     results = filter(ensure_exists, results)
172
173     if not results and not pd_authors:
174         form = PublishingSuggestForm(initial={"books": query + ", "})
175         return render_to_response(
176             'catalogue/search_no_hits.html',
177             {
178                 'form': form,
179                 'did_you_mean': suggestion
180             },
181             context_instance=RequestContext(request))
182
183     if not results and len(pd_authors) == 1:
184         return HttpResponseRedirect(pd_authors[0].get_absolute_url())
185
186     return render_to_response(
187         'catalogue/search_multiple_hits.html',
188         {
189             'pd_authors': pd_authors,
190             'results': results,
191             'did_you_mean': suggestion
192         },
193         context_instance=RequestContext(request))