1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from django.conf import settings
5 from django.http.response import HttpResponseRedirect
6 from django.shortcuts import render
7 from django.views.decorators import cache
8 from django.http import HttpResponse, JsonResponse
10 from catalogue.models import Book, Tag
11 from pdcounter.models import Author
12 from picture.models import Picture
13 from search.index import Search, SearchResult, PictureResult
14 from suggest.forms import PublishingSuggestForm
18 from wolnelektury.utils import re_escape
21 def match_word_re(word):
22 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
23 return r"\b%s\b" % word
24 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
25 return "[[:<:]]%s[[:>:]]" % word
28 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
31 def remove_query_syntax_chars(query, replace=' '):
32 return query_syntax_chars.sub(replace, query)
35 def did_you_mean(query, tokens):
39 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
40 # if len(authors) > 0:
44 # if not dictionary.check(t):
46 # change_to = dictionary.suggest(t)[0].lower()
47 # if change_to != t.lower():
48 # change[t] = change_to
55 # for frm, to in change.items():
56 # query = query.replace(frm, to)
62 def hint(request, mozhint=False, param='term'):
63 prefix = request.GET.get(param, '')
65 return JsonResponse([], safe=False)
67 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
70 limit = int(request.GET.get('max', ''))
77 authors = Tag.objects.filter(
78 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
83 'url': author.get_absolute_url(),
85 for author in authors[:limit]
91 'author': b.author_unicode(),
93 'url': b.get_absolute_url()
95 for b in Book.objects.filter(findable=True, title__iregex='\m' + prefix)[:limit-len(data)]
107 callback = request.GET.get('callback', None)
109 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
110 content_type="application/json; charset=utf-8")
112 return JsonResponse(data, safe=False)
117 query = request.GET.get('q', '')
120 request, 'catalogue/search_too_short.html',
122 elif len(query) > 256:
124 request, 'catalogue/search_too_long.html',
127 query = prepare_query(query)
128 pd_authors = search_pd_authors(query)
129 books = search_books(query)
130 pictures = search_pictures(query)
133 if not (books or pictures or pd_authors):
134 form = PublishingSuggestForm(initial={"books": query + ", "})
137 'catalogue/search_no_hits.html',
140 'did_you_mean': suggestion
143 if not (books or pictures) and len(pd_authors) == 1:
144 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
148 'catalogue/search_multiple_hits.html',
150 'pd_authors': pd_authors,
152 'pictures': pictures,
153 'did_you_mean': suggestion
156 def search_books(query):
160 words = query.split()
162 (['authors', 'authors_nonstem'], True),
163 (['title', 'title_nonstem'], True),
164 (['metadata', 'metadata_nonstem'], True),
165 (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
167 for fields, is_book in fieldsets:
168 search_fields += fields
169 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
172 for results_part in results_parts:
173 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
174 book_id = result.book_id
175 if book_id in ids_results:
176 ids_results[book_id].merge(result)
178 results.append(result)
179 ids_results[book_id] = result
180 descendant_ids = set(
181 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
182 results = [result for result in results if result.book_id not in descendant_ids]
183 for result in results:
184 search.get_snippets(result, query, num=3)
186 def ensure_exists(r):
189 except Book.DoesNotExist:
192 results = [r for r in results if ensure_exists(r)]
196 def search_pictures(query):
200 words = query.split()
202 (['authors', 'authors_nonstem'], True),
203 (['title', 'title_nonstem'], True),
204 (['metadata', 'metadata_nonstem'], True),
205 (['themes_pl', 'themes_pl_nonstem'], False),
207 for fields, is_book in fieldsets:
208 search_fields += fields
209 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
212 for results_part in results_parts:
213 for result in sorted(PictureResult.aggregate(results_part), reverse=True):
214 picture_id = result.picture_id
215 if picture_id in ids_results:
216 ids_results[picture_id].merge(result)
218 results.append(result)
219 ids_results[picture_id] = result
221 def ensure_exists(r):
224 except Picture.DoesNotExist:
227 results = [r for r in results if ensure_exists(r)]
231 def search_pd_authors(query):
232 pd_authors = Author.objects.filter(name__icontains=query)
233 existing_slugs = Tag.objects.filter(
234 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
235 .values_list('slug', flat=True)
236 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
240 def prepare_query(query):
241 query = ' '.join(query.split())
242 # filter out private use characters
244 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
245 query = remove_query_syntax_chars(query)
247 words = query.split()
249 query = ' '.join(words[:10])