1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.http.response import HttpResponseRedirect
7 from django.shortcuts import render
8 from django.views.decorators import cache
9 from django.http import HttpResponse, JsonResponse
11 from catalogue.models import Book, Tag
12 from pdcounter.models import Author
13 from picture.models import Picture
14 from search.index import Search, SearchResult, PictureResult
15 from suggest.forms import PublishingSuggestForm
19 from wolnelektury.utils import re_escape
22 def match_word_re(word):
23 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24 return r"\b%s\b" % word
25 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26 return "[[:<:]]%s[[:>:]]" % word
29 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
32 def remove_query_syntax_chars(query, replace=' '):
33 return query_syntax_chars.sub(replace, query)
36 def did_you_mean(query, tokens):
40 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41 # if len(authors) > 0:
45 # if not dictionary.check(t):
47 # change_to = dictionary.suggest(t)[0].lower()
48 # if change_to != t.lower():
49 # change[t] = change_to
56 # for frm, to in change.items():
57 # query = query.replace(frm, to)
64 prefix = request.GET.get('term', '')
66 return JsonResponse([], safe=False)
68 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
71 limit = int(request.GET.get('max', ''))
78 authors = Tag.objects.filter(
79 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
84 'url': author.get_absolute_url(),
86 for author in authors[:limit]
92 'author': b.author_unicode(),
94 'url': b.get_absolute_url()
96 for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
98 callback = request.GET.get('callback', None)
100 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
101 content_type="application/json; charset=utf-8")
103 return JsonResponse(data, safe=False)
108 query = request.GET.get('q', '')
111 request, 'catalogue/search_too_short.html',
113 elif len(query) > 256:
115 request, 'catalogue/search_too_long.html',
118 query = prepare_query(query)
119 pd_authors = search_pd_authors(query)
120 books = search_books(query)
121 pictures = search_pictures(query)
124 if not (books or pictures or pd_authors):
125 form = PublishingSuggestForm(initial={"books": query + ", "})
128 'catalogue/search_no_hits.html',
131 'did_you_mean': suggestion
134 if not (books or pictures) and len(pd_authors) == 1:
135 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
139 'catalogue/search_multiple_hits.html',
141 'pd_authors': pd_authors,
143 'pictures': pictures,
144 'did_you_mean': suggestion
147 def search_books(query):
151 words = query.split()
153 (['authors', 'authors_nonstem'], True),
154 (['title', 'title_nonstem'], True),
155 (['metadata', 'metadata_nonstem'], True),
156 (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
158 for fields, is_book in fieldsets:
159 search_fields += fields
160 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
163 for results_part in results_parts:
164 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
165 book_id = result.book_id
166 if book_id in ids_results:
167 ids_results[book_id].merge(result)
169 results.append(result)
170 ids_results[book_id] = result
171 descendant_ids = set(
172 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
173 results = [result for result in results if result.book_id not in descendant_ids]
174 for result in results:
175 search.get_snippets(result, query, num=3)
177 def ensure_exists(r):
180 except Book.DoesNotExist:
183 results = filter(ensure_exists, results)
187 def search_pictures(query):
191 words = query.split()
193 (['authors', 'authors_nonstem'], True),
194 (['title', 'title_nonstem'], True),
195 (['metadata', 'metadata_nonstem'], True),
196 (['themes_pl', 'themes_pl_nonstem'], False),
198 for fields, is_book in fieldsets:
199 search_fields += fields
200 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
203 for results_part in results_parts:
204 for result in sorted(PictureResult.aggregate(results_part), reverse=True):
205 picture_id = result.picture_id
206 if picture_id in ids_results:
207 ids_results[picture_id].merge(result)
209 results.append(result)
210 ids_results[picture_id] = result
212 def ensure_exists(r):
215 except Picture.DoesNotExist:
218 results = filter(ensure_exists, results)
222 def search_pd_authors(query):
223 pd_authors = Author.objects.filter(name__icontains=query)
224 existing_slugs = Tag.objects.filter(
225 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
226 .values_list('slug', flat=True)
227 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
231 def prepare_query(query):
232 query = ' '.join(query.split())
233 # filter out private use characters
235 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
236 query = remove_query_syntax_chars(query)
238 words = query.split()
240 query = ' '.join(words[:10])