1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from django.conf import settings
5 from django.http.response import HttpResponseRedirect
6 from django.shortcuts import render
7 from django.views.decorators import cache
8 from django.http import HttpResponse, JsonResponse
10 from catalogue.models import Book, Tag
11 from pdcounter.models import Author
12 from picture.models import Picture
13 from search.index import Search, SearchResult, PictureResult
14 from suggest.forms import PublishingSuggestForm
18 from wolnelektury.utils import re_escape
21 def match_word_re(word):
22 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
23 return r"\b%s\b" % word
24 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
25 return "[[:<:]]%s[[:>:]]" % word
28 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
31 def remove_query_syntax_chars(query, replace=' '):
32 return query_syntax_chars.sub(replace, query)
35 def did_you_mean(query, tokens):
39 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
40 # if len(authors) > 0:
44 # if not dictionary.check(t):
46 # change_to = dictionary.suggest(t)[0].lower()
47 # if change_to != t.lower():
48 # change[t] = change_to
55 # for frm, to in change.items():
56 # query = query.replace(frm, to)
62 def hint(request, mozhint=False, param='term'):
63 prefix = request.GET.get(param, '')
65 return JsonResponse([], safe=False)
67 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
70 limit = int(request.GET.get('max', ''))
77 authors = Tag.objects.filter(
78 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
83 'url': author.get_absolute_url(),
85 for author in authors[:limit]
91 'author': b.author_unicode(),
93 'url': b.get_absolute_url()
95 for b in Book.objects.filter(findable=True, title__iregex='\m' + prefix)[:limit-len(data)]
107 callback = request.GET.get('callback', None)
109 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
110 content_type="application/json; charset=utf-8")
112 return JsonResponse(data, safe=False)
117 query = request.GET.get('q', '')
119 format = request.GET.get('format')
120 lang = request.GET.get('lang')
121 epoch = request.GET.get('epoch')
122 kind = request.GET.get('kind')
123 genre = request.GET.get('genre')
127 request, 'catalogue/search_too_short.html',
129 elif len(query) > 256:
131 request, 'catalogue/search_too_long.html',
134 query = prepare_query(query)
135 if not (format or lang or epoch or kind or genre):
136 pd_authors = search_pd_authors(query)
139 if not format or format != 'obraz':
140 books = search_books(
143 only_audio=format=='audio',
144 only_synchro=format=='synchro',
151 if (not format or format == 'obraz') and not lang:
152 pictures = search_pictures(
163 if not (books or pictures or pd_authors):
164 form = PublishingSuggestForm(initial={"books": query + ", "})
167 'catalogue/search_no_hits.html',
170 'did_you_mean': suggestion
173 if not (books or pictures) and len(pd_authors) == 1:
174 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
178 'catalogue/search_multiple_hits.html',
180 'pd_authors': pd_authors,
182 'pictures': pictures,
183 'did_you_mean': suggestion,
192 'epoch': Tag.objects.filter(category='epoch', for_books=True),
193 'genre': Tag.objects.filter(category='genre', for_books=True),
194 'kind': Tag.objects.filter(category='kind', for_books=True),
198 def search_books(query, lang=None, only_audio=False, only_synchro=False, epoch=None, kind=None, genre=None):
202 words = query.split()
204 (['authors', 'authors_nonstem'], True),
205 (['title', 'title_nonstem'], True),
206 (['metadata', 'metadata_nonstem'], True),
207 (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
209 for fields, is_book in fieldsets:
210 search_fields += fields
211 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
214 for results_part in results_parts:
215 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
216 book_id = result.book_id
217 if book_id in ids_results:
218 ids_results[book_id].merge(result)
220 results.append(result)
221 ids_results[book_id] = result
222 descendant_ids = set(
223 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
224 results = [result for result in results if result.book_id not in descendant_ids]
225 for result in results:
226 search.get_snippets(result, query, num=3)
228 def ensure_exists(r):
232 except Book.DoesNotExist:
235 if lang and r.book.language != lang:
237 if only_audio and not r.book.has_mp3_file():
239 if only_synchro and not r.book.has_daisy_file():
241 if epoch and not r.book.tags.filter(category='epoch', slug=epoch).exists():
243 if kind and not r.book.tags.filter(category='kind', slug=kind).exists():
245 if genre and not r.book.tags.filter(category='genre', slug=genre).exists():
250 results = [r for r in results if ensure_exists(r)]
254 def search_pictures(query, epoch=None, kind=None, genre=None):
258 words = query.split()
260 (['authors', 'authors_nonstem'], True),
261 (['title', 'title_nonstem'], True),
262 (['metadata', 'metadata_nonstem'], True),
263 (['themes_pl', 'themes_pl_nonstem'], False),
265 for fields, is_book in fieldsets:
266 search_fields += fields
267 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
270 for results_part in results_parts:
271 for result in sorted(PictureResult.aggregate(results_part), reverse=True):
272 picture_id = result.picture_id
273 if picture_id in ids_results:
274 ids_results[picture_id].merge(result)
276 results.append(result)
277 ids_results[picture_id] = result
279 def ensure_exists(r):
283 except Picture.DoesNotExist:
286 if epoch and not r.picture.tags.filter(category='epoch', slug=epoch).exists():
288 if kind and not r.picture.tags.filter(category='kind', slug=kind).exists():
290 if genre and not r.picture.tags.filter(category='genre', slug=genre).exists():
295 results = [r for r in results if ensure_exists(r)]
299 def search_pd_authors(query):
300 pd_authors = Author.objects.filter(name__icontains=query)
301 existing_slugs = Tag.objects.filter(
302 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
303 .values_list('slug', flat=True)
304 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
308 def prepare_query(query):
309 query = ' '.join(query.split())
310 # filter out private use characters
312 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
313 query = remove_query_syntax_chars(query)
315 words = query.split()
317 query = ' '.join(words[:10])