1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from django.conf import settings
5 from django.http.response import HttpResponseRedirect
6 from django.shortcuts import render
7 from django.views.decorators import cache
8 from django.http import HttpResponse, JsonResponse
10 from catalogue.models import Book, Tag
11 from pdcounter.models import Author
12 from picture.models import Picture
13 from search.index import Search, SearchResult, PictureResult
14 from .forms import SearchFilters
15 from suggest.forms import PublishingSuggestForm
19 from wolnelektury.utils import re_escape
22 def match_word_re(word):
23 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24 return r"\b%s\b" % word
25 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26 return "[[:<:]]%s[[:>:]]" % word
29 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
32 def remove_query_syntax_chars(query, replace=' '):
33 return query_syntax_chars.sub(replace, query)
36 def did_you_mean(query, tokens):
40 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41 # if len(authors) > 0:
45 # if not dictionary.check(t):
47 # change_to = dictionary.suggest(t)[0].lower()
48 # if change_to != t.lower():
49 # change[t] = change_to
56 # for frm, to in change.items():
57 # query = query.replace(frm, to)
63 def hint(request, mozhint=False, param='term'):
64 prefix = request.GET.get(param, '')
66 return JsonResponse([], safe=False)
68 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
71 limit = int(request.GET.get('max', ''))
78 authors = Tag.objects.filter(
79 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
84 'url': author.get_absolute_url(),
86 for author in authors[:limit]
89 for b in Book.objects.filter(findable=True, title__iregex='\m' + prefix)[:limit-len(data)]:
90 author_str = b.author_unicode()
91 translator = b.translator()
93 author_str += ' (tłum. ' + translator + ')'
99 'url': b.get_absolute_url()
112 callback = request.GET.get('callback', None)
114 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
115 content_type="application/json; charset=utf-8")
117 return JsonResponse(data, safe=False)
123 filters = SearchFilters(request.GET)
125 'title': 'Wynik wyszukiwania',
126 'query': filters.data['q'],
129 if filters.is_valid():
130 ctx['results'] = filters.results()
131 for k, v in ctx['results'].items():
133 ctx['hasresults'] = True
135 return render(request, 'search/results.html', ctx)
140 if request.EXPERIMENTS['layout'].value:
141 return search(request)
143 query = request.GET.get('q', '')
145 format = request.GET.get('format')
146 lang = request.GET.get('lang')
147 epoch = request.GET.get('epoch')
148 kind = request.GET.get('kind')
149 genre = request.GET.get('genre')
153 request, 'catalogue/search_too_short.html',
155 elif len(query) > 256:
157 request, 'catalogue/search_too_long.html',
160 query = prepare_query(query)
161 if not (format or lang or epoch or kind or genre):
162 pd_authors = search_pd_authors(query)
165 if not format or format != 'obraz':
166 books = search_books(
169 only_audio=format=='audio',
170 only_synchro=format=='synchro',
177 if (not format or format == 'obraz') and not lang:
178 pictures = search_pictures(
189 if not (books or pictures or pd_authors):
190 form = PublishingSuggestForm(initial={"books": query + ", "})
193 'catalogue/search_no_hits.html',
196 'did_you_mean': suggestion
199 if not (books or pictures) and len(pd_authors) == 1:
200 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
204 'catalogue/search_multiple_hits.html',
206 'pd_authors': pd_authors,
208 'pictures': pictures,
209 'did_you_mean': suggestion,
218 'epoch': Tag.objects.filter(category='epoch', for_books=True),
219 'genre': Tag.objects.filter(category='genre', for_books=True),
220 'kind': Tag.objects.filter(category='kind', for_books=True),
224 def search_books(query, lang=None, only_audio=False, only_synchro=False, epoch=None, kind=None, genre=None):
228 words = query.split()
230 (['authors', 'authors_nonstem'], True),
231 (['title', 'title_nonstem'], True),
232 (['metadata', 'metadata_nonstem'], True),
233 (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
235 for fields, is_book in fieldsets:
236 search_fields += fields
237 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
240 for results_part in results_parts:
241 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
242 book_id = result.book_id
243 if book_id in ids_results:
244 ids_results[book_id].merge(result)
246 results.append(result)
247 ids_results[book_id] = result
248 descendant_ids = set(
249 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
250 results = [result for result in results if result.book_id not in descendant_ids]
251 for result in results:
252 search.get_snippets(result, query, num=3)
254 def ensure_exists(r):
258 except Book.DoesNotExist:
261 if lang and r.book.language != lang:
263 if only_audio and not r.book.has_mp3_file():
265 if only_synchro and not r.book.has_daisy_file():
267 if epoch and not r.book.tags.filter(category='epoch', slug=epoch).exists():
269 if kind and not r.book.tags.filter(category='kind', slug=kind).exists():
271 if genre and not r.book.tags.filter(category='genre', slug=genre).exists():
276 results = [r for r in results if ensure_exists(r)]
280 def search_pictures(query, epoch=None, kind=None, genre=None):
284 words = query.split()
286 (['authors', 'authors_nonstem'], True),
287 (['title', 'title_nonstem'], True),
288 (['metadata', 'metadata_nonstem'], True),
289 (['themes_pl', 'themes_pl_nonstem'], False),
291 for fields, is_book in fieldsets:
292 search_fields += fields
293 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
296 for results_part in results_parts:
297 for result in sorted(PictureResult.aggregate(results_part), reverse=True):
298 picture_id = result.picture_id
299 if picture_id in ids_results:
300 ids_results[picture_id].merge(result)
302 results.append(result)
303 ids_results[picture_id] = result
305 def ensure_exists(r):
309 except Picture.DoesNotExist:
312 if epoch and not r.picture.tags.filter(category='epoch', slug=epoch).exists():
314 if kind and not r.picture.tags.filter(category='kind', slug=kind).exists():
316 if genre and not r.picture.tags.filter(category='genre', slug=genre).exists():
321 results = [r for r in results if ensure_exists(r)]
325 def search_pd_authors(query):
326 pd_authors = Author.objects.filter(name__icontains=query)
327 existing_slugs = Tag.objects.filter(
328 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
329 .values_list('slug', flat=True)
330 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
334 def prepare_query(query):
335 query = ' '.join(query.split())
336 # filter out private use characters
338 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
339 query = remove_query_syntax_chars(query)
341 words = query.split()
343 query = ' '.join(words[:10])