1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from django.conf import settings
5 from django.http.response import HttpResponseRedirect
6 from django.shortcuts import render
7 from django.views.decorators import cache
8 from django.http import HttpResponse, JsonResponse
10 from catalogue.models import Book, Tag
11 from pdcounter.models import Author
12 from picture.models import Picture
13 from search.index import Search, SearchResult, PictureResult
14 from suggest.forms import PublishingSuggestForm
18 from wolnelektury.utils import re_escape
21 def match_word_re(word):
22 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
23 return r"\b%s\b" % word
24 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
25 return "[[:<:]]%s[[:>:]]" % word
28 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
31 def remove_query_syntax_chars(query, replace=' '):
32 return query_syntax_chars.sub(replace, query)
35 def did_you_mean(query, tokens):
39 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
40 # if len(authors) > 0:
44 # if not dictionary.check(t):
46 # change_to = dictionary.suggest(t)[0].lower()
47 # if change_to != t.lower():
48 # change[t] = change_to
55 # for frm, to in change.items():
56 # query = query.replace(frm, to)
62 def hint(request, mozhint=False, param='term'):
63 prefix = request.GET.get(param, '')
65 return JsonResponse([], safe=False)
67 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
70 limit = int(request.GET.get('max', ''))
77 authors = Tag.objects.filter(
78 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
83 'url': author.get_absolute_url(),
85 for author in authors[:limit]
88 for b in Book.objects.filter(findable=True, title__iregex='\m' + prefix)[:limit-len(data)]:
89 author_str = b.author_unicode()
90 translator = b.translator()
92 author_str += ' (tłum. ' + translator + ')'
98 'url': b.get_absolute_url()
111 callback = request.GET.get('callback', None)
113 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
114 content_type="application/json; charset=utf-8")
116 return JsonResponse(data, safe=False)
121 query = request.GET.get('q', '')
123 format = request.GET.get('format')
124 lang = request.GET.get('lang')
125 epoch = request.GET.get('epoch')
126 kind = request.GET.get('kind')
127 genre = request.GET.get('genre')
131 request, 'catalogue/search_too_short.html',
133 elif len(query) > 256:
135 request, 'catalogue/search_too_long.html',
138 query = prepare_query(query)
139 if not (format or lang or epoch or kind or genre):
140 pd_authors = search_pd_authors(query)
143 if not format or format != 'obraz':
144 books = search_books(
147 only_audio=format=='audio',
148 only_synchro=format=='synchro',
155 if (not format or format == 'obraz') and not lang:
156 pictures = search_pictures(
167 if not (books or pictures or pd_authors):
168 form = PublishingSuggestForm(initial={"books": query + ", "})
171 'catalogue/search_no_hits.html',
174 'did_you_mean': suggestion
177 if not (books or pictures) and len(pd_authors) == 1:
178 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
182 'catalogue/search_multiple_hits.html',
184 'pd_authors': pd_authors,
186 'pictures': pictures,
187 'did_you_mean': suggestion,
196 'epoch': Tag.objects.filter(category='epoch', for_books=True),
197 'genre': Tag.objects.filter(category='genre', for_books=True),
198 'kind': Tag.objects.filter(category='kind', for_books=True),
202 def search_books(query, lang=None, only_audio=False, only_synchro=False, epoch=None, kind=None, genre=None):
206 words = query.split()
208 (['authors', 'authors_nonstem'], True),
209 (['title', 'title_nonstem'], True),
210 (['metadata', 'metadata_nonstem'], True),
211 (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
213 for fields, is_book in fieldsets:
214 search_fields += fields
215 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
218 for results_part in results_parts:
219 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
220 book_id = result.book_id
221 if book_id in ids_results:
222 ids_results[book_id].merge(result)
224 results.append(result)
225 ids_results[book_id] = result
226 descendant_ids = set(
227 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
228 results = [result for result in results if result.book_id not in descendant_ids]
229 for result in results:
230 search.get_snippets(result, query, num=3)
232 def ensure_exists(r):
236 except Book.DoesNotExist:
239 if lang and r.book.language != lang:
241 if only_audio and not r.book.has_mp3_file():
243 if only_synchro and not r.book.has_daisy_file():
245 if epoch and not r.book.tags.filter(category='epoch', slug=epoch).exists():
247 if kind and not r.book.tags.filter(category='kind', slug=kind).exists():
249 if genre and not r.book.tags.filter(category='genre', slug=genre).exists():
254 results = [r for r in results if ensure_exists(r)]
258 def search_pictures(query, epoch=None, kind=None, genre=None):
262 words = query.split()
264 (['authors', 'authors_nonstem'], True),
265 (['title', 'title_nonstem'], True),
266 (['metadata', 'metadata_nonstem'], True),
267 (['themes_pl', 'themes_pl_nonstem'], False),
269 for fields, is_book in fieldsets:
270 search_fields += fields
271 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
274 for results_part in results_parts:
275 for result in sorted(PictureResult.aggregate(results_part), reverse=True):
276 picture_id = result.picture_id
277 if picture_id in ids_results:
278 ids_results[picture_id].merge(result)
280 results.append(result)
281 ids_results[picture_id] = result
283 def ensure_exists(r):
287 except Picture.DoesNotExist:
290 if epoch and not r.picture.tags.filter(category='epoch', slug=epoch).exists():
292 if kind and not r.picture.tags.filter(category='kind', slug=kind).exists():
294 if genre and not r.picture.tags.filter(category='genre', slug=genre).exists():
299 results = [r for r in results if ensure_exists(r)]
303 def search_pd_authors(query):
304 pd_authors = Author.objects.filter(name__icontains=query)
305 existing_slugs = Tag.objects.filter(
306 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
307 .values_list('slug', flat=True)
308 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
312 def prepare_query(query):
313 query = ' '.join(query.split())
314 # filter out private use characters
316 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
317 query = remove_query_syntax_chars(query)
319 words = query.split()
321 query = ' '.join(words[:10])