Merge branch 'api'
[wolnelektury.git] / src / search / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.shortcuts import render_to_response
7 from django.template import RequestContext
8 from django.views.decorators import cache
9 from django.http import HttpResponse, JsonResponse
10 from django.utils.translation import ugettext as _
11
12 from catalogue.utils import split_tags
13 from catalogue.models import Book, Tag
14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
15 from search.index import Search, SearchResult
16 from suggest.forms import PublishingSuggestForm
17 import re
18 import json
19
20
21 def match_word_re(word):
22     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
23         return r"\b%s\b" % word
24     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
25         return "[[:<:]]%s[[:>:]]" % word
26
27
28 query_syntax_chars = re.compile(r"[\\/*:(){}]")
29
30
31 def remove_query_syntax_chars(query, replace=' '):
32     return query_syntax_chars.sub(' ', query)
33
34
35 def did_you_mean(query, tokens):
36     return query
37     # change = {}
38     # for t in tokens:
39     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
40     #     if len(authors) > 0:
41     #         continue
42
43     #     if False:
44     #         if not dictionary.check(t):
45     #             try:
46     #                 change_to = dictionary.suggest(t)[0].lower()
47     #                 if change_to != t.lower():
48     #                     change[t] = change_to
49     #             except IndexError:
50     #                 pass
51
52     # if change == {}:
53     #     return None
54
55     # for frm, to in change.items():
56     #     query = query.replace(frm, to)
57
58     # return query
59
60
61 @cache.never_cache
62 def hint(request):
63     prefix = request.GET.get('term', '')
64     if len(prefix) < 2:
65         return JsonResponse([], safe=False)
66
67     prefix = remove_query_syntax_chars(prefix)
68
69     try:
70         limit = int(request.GET.get('max', ''))
71     except ValueError:
72         limit = -1
73     else:
74         if limit < 1:
75             limit = -1
76
77     data = [
78         {
79             'label': author.name,
80             'category': _('author'),
81             'id': author.id,
82             'url': author.get_absolute_url(),
83         }
84         for author in Tag.objects.filter(category='author', name__iregex='\m' + prefix)[:10]
85     ]
86     if len(data) < limit:
87         data += [
88             {
89                 'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
90                 'category': _('book'),
91                 'id': b.id,
92                 'url': b.get_absolute_url()
93             }
94             for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
95         ]
96     callback = request.GET.get('callback', None)
97     if callback:
98         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
99                             content_type="application/json; charset=utf-8")
100     else:
101         return JsonResponse(data, safe=False)
102
103
104 @cache.never_cache
105 def main(request):
106     query = request.GET.get('q', '')
107     query = ' '.join(query.split())
108     # filter out private use characters
109     import unicodedata
110     query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
111
112     if len(query) < 2:
113         return render_to_response(
114             'catalogue/search_too_short.html', {'prefix': query},
115             context_instance=RequestContext(request))
116     elif len(query) > 256:
117         return render_to_response(
118             'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
119
120     query = remove_query_syntax_chars(query)
121
122     words = query.split()
123     if len(words) > 10:
124         query = ' '.join(words[:10])
125
126     search = Search()
127
128     tags = search.hint_tags(query, pdcounter=True, prefix=False)
129     tags = split_tags(tags)
130
131     results_parts = []
132
133     search_fields = []
134     fieldsets = (
135         (['authors'], True),
136         (['title'], True),
137         (['metadata'], True),
138         (['text', 'themes_pl'], False),
139     )
140     for fieldset, is_book in fieldsets:
141         search_fields += fieldset
142         results_parts.append(search.search_words(words, search_fields, book=is_book))
143
144     results = []
145     ids_results = {}
146     for results_part in results_parts:
147         for result in sorted(SearchResult.aggregate(results_part), reverse=True):
148             book_id = result.book_id
149             if book_id in ids_results:
150                 ids_results[book_id].merge(result)
151             else:
152                 results.append(result)
153                 ids_results[book_id] = result
154
155     for result in results:
156         search.get_snippets(result, query, num=3)
157
158     suggestion = u''
159
160     def ensure_exists(r):
161         try:
162             return r.book
163         except Book.DoesNotExist:
164             return False
165
166     results = filter(ensure_exists, results)
167
168     if not results:
169         form = PublishingSuggestForm(initial={"books": query + ", "})
170         return render_to_response(
171             'catalogue/search_no_hits.html',
172             {
173                 'tags': tags,
174                 'prefix': query,
175                 'form': form,
176                 'did_you_mean': suggestion
177             },
178             context_instance=RequestContext(request))
179
180     return render_to_response(
181         'catalogue/search_multiple_hits.html',
182         {
183             'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
184             'prefix': query,
185             'results': results,
186             'did_you_mean': suggestion
187         },
188         context_instance=RequestContext(request))