1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from django.conf import settings
5 from django.http.response import HttpResponseRedirect
6 from django.shortcuts import render
7 from django.views.decorators import cache
8 from django.http import HttpResponse, JsonResponse
10 from catalogue.models import Book, Tag
11 from pdcounter.models import Author
12 from picture.models import Picture
13 from search.index import Search, SearchResult, PictureResult
14 from .forms import SearchFilters
15 from suggest.forms import PublishingSuggestForm
19 from wolnelektury.utils import re_escape
22 def match_word_re(word):
23 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24 return r"\b%s\b" % word
25 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26 return "[[:<:]]%s[[:>:]]" % word
29 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
32 def remove_query_syntax_chars(query, replace=' '):
33 return query_syntax_chars.sub(replace, query)
36 def did_you_mean(query, tokens):
40 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41 # if len(authors) > 0:
45 # if not dictionary.check(t):
47 # change_to = dictionary.suggest(t)[0].lower()
48 # if change_to != t.lower():
49 # change[t] = change_to
56 # for frm, to in change.items():
57 # query = query.replace(frm, to)
63 def hint(request, mozhint=False, param='term'):
64 prefix = request.GET.get(param, '')
66 return JsonResponse([], safe=False)
68 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
71 limit = int(request.GET.get('max', ''))
78 authors = Tag.objects.filter(
79 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
84 'url': author.get_absolute_url(),
86 for author in authors[:limit]
89 for b in Book.objects.filter(findable=True, title__iregex='\m' + prefix)[:limit-len(data)]:
90 author_str = b.author_unicode()
91 translator = b.translator()
93 author_str += ' (tłum. ' + translator + ')'
99 'url': b.get_absolute_url()
112 callback = request.GET.get('callback', None)
114 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
115 content_type="application/json; charset=utf-8")
117 return JsonResponse(data, safe=False)
123 filters = SearchFilters(request.GET)
125 'title': 'Wynik wyszukiwania',
126 'query': filters.data['q'],
129 if filters.is_valid():
130 ctx['results'] = filters.results()
131 for k, v in ctx['results'].items():
133 ctx['hasresults'] = True
135 return render(request, 'search/results.html', ctx)
140 if request.EXPERIMENTS['search'].value:
141 request.EXPERIMENTS['layout'].override(True)
142 return search(request)
144 query = request.GET.get('q', '')
146 format = request.GET.get('format')
147 lang = request.GET.get('lang')
148 epoch = request.GET.get('epoch')
149 kind = request.GET.get('kind')
150 genre = request.GET.get('genre')
154 request, 'catalogue/search_too_short.html',
156 elif len(query) > 256:
158 request, 'catalogue/search_too_long.html',
161 query = prepare_query(query)
162 if not (format or lang or epoch or kind or genre):
163 pd_authors = search_pd_authors(query)
166 if not format or format != 'obraz':
167 books = search_books(
170 only_audio=format=='audio',
171 only_synchro=format=='synchro',
178 if (not format or format == 'obraz') and not lang:
179 pictures = search_pictures(
190 if not (books or pictures or pd_authors):
191 form = PublishingSuggestForm(initial={"books": query + ", "})
194 'catalogue/search_no_hits.html',
197 'did_you_mean': suggestion
200 if not (books or pictures) and len(pd_authors) == 1:
201 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
205 'catalogue/search_multiple_hits.html',
207 'pd_authors': pd_authors,
209 'pictures': pictures,
210 'did_you_mean': suggestion,
219 'epoch': Tag.objects.filter(category='epoch', for_books=True),
220 'genre': Tag.objects.filter(category='genre', for_books=True),
221 'kind': Tag.objects.filter(category='kind', for_books=True),
225 def search_books(query, lang=None, only_audio=False, only_synchro=False, epoch=None, kind=None, genre=None):
229 words = query.split()
231 (['authors', 'authors_nonstem'], True),
232 (['title', 'title_nonstem'], True),
233 (['metadata', 'metadata_nonstem'], True),
234 (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
236 for fields, is_book in fieldsets:
237 search_fields += fields
238 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
241 for results_part in results_parts:
242 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
243 book_id = result.book_id
244 if book_id in ids_results:
245 ids_results[book_id].merge(result)
247 results.append(result)
248 ids_results[book_id] = result
249 descendant_ids = set(
250 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
251 results = [result for result in results if result.book_id not in descendant_ids]
252 for result in results:
253 search.get_snippets(result, query, num=3)
255 def ensure_exists(r):
259 except Book.DoesNotExist:
262 if lang and r.book.language != lang:
264 if only_audio and not r.book.has_mp3_file():
266 if only_synchro and not r.book.has_daisy_file():
268 if epoch and not r.book.tags.filter(category='epoch', slug=epoch).exists():
270 if kind and not r.book.tags.filter(category='kind', slug=kind).exists():
272 if genre and not r.book.tags.filter(category='genre', slug=genre).exists():
277 results = [r for r in results if ensure_exists(r)]
281 def search_pictures(query, epoch=None, kind=None, genre=None):
285 words = query.split()
287 (['authors', 'authors_nonstem'], True),
288 (['title', 'title_nonstem'], True),
289 (['metadata', 'metadata_nonstem'], True),
290 (['themes_pl', 'themes_pl_nonstem'], False),
292 for fields, is_book in fieldsets:
293 search_fields += fields
294 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
297 for results_part in results_parts:
298 for result in sorted(PictureResult.aggregate(results_part), reverse=True):
299 picture_id = result.picture_id
300 if picture_id in ids_results:
301 ids_results[picture_id].merge(result)
303 results.append(result)
304 ids_results[picture_id] = result
306 def ensure_exists(r):
310 except Picture.DoesNotExist:
313 if epoch and not r.picture.tags.filter(category='epoch', slug=epoch).exists():
315 if kind and not r.picture.tags.filter(category='kind', slug=kind).exists():
317 if genre and not r.picture.tags.filter(category='genre', slug=genre).exists():
322 results = [r for r in results if ensure_exists(r)]
326 def search_pd_authors(query):
327 pd_authors = Author.objects.filter(name__icontains=query)
328 existing_slugs = Tag.objects.filter(
329 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
330 .values_list('slug', flat=True)
331 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
335 def prepare_query(query):
336 query = ' '.join(query.split())
337 # filter out private use characters
339 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
340 query = remove_query_syntax_chars(query)
342 words = query.split()
344 query = ' '.join(words[:10])