1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from django.conf import settings
5 from django.http.response import HttpResponseRedirect
6 from django.shortcuts import render
7 from django.views.decorators import cache
8 from django.http import HttpResponse, JsonResponse
10 from catalogue.models import Book, Tag
11 from pdcounter.models import Author
12 from picture.models import Picture
13 from search.index import Search, SearchResult, PictureResult
14 from suggest.forms import PublishingSuggestForm
18 from wolnelektury.utils import re_escape
21 def match_word_re(word):
22 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
23 return r"\b%s\b" % word
24 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
25 return "[[:<:]]%s[[:>:]]" % word
28 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
31 def remove_query_syntax_chars(query, replace=' '):
32 return query_syntax_chars.sub(replace, query)
35 def did_you_mean(query, tokens):
39 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
40 # if len(authors) > 0:
44 # if not dictionary.check(t):
46 # change_to = dictionary.suggest(t)[0].lower()
47 # if change_to != t.lower():
48 # change[t] = change_to
55 # for frm, to in change.items():
56 # query = query.replace(frm, to)
63 prefix = request.GET.get('term', '')
65 return JsonResponse([], safe=False)
67 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
70 limit = int(request.GET.get('max', ''))
77 authors = Tag.objects.filter(
78 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
83 'url': author.get_absolute_url(),
85 for author in authors[:limit]
91 'author': b.author_unicode(),
93 'url': b.get_absolute_url()
95 for b in Book.objects.filter(findable=True, title__iregex='\m' + prefix)[:limit-len(data)]
97 callback = request.GET.get('callback', None)
99 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
100 content_type="application/json; charset=utf-8")
102 return JsonResponse(data, safe=False)
107 query = request.GET.get('q', '')
110 request, 'catalogue/search_too_short.html',
112 elif len(query) > 256:
114 request, 'catalogue/search_too_long.html',
117 query = prepare_query(query)
118 pd_authors = search_pd_authors(query)
119 books = search_books(query)
120 pictures = search_pictures(query)
123 if not (books or pictures or pd_authors):
124 form = PublishingSuggestForm(initial={"books": query + ", "})
127 'catalogue/search_no_hits.html',
130 'did_you_mean': suggestion
133 if not (books or pictures) and len(pd_authors) == 1:
134 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
138 'catalogue/search_multiple_hits.html',
140 'pd_authors': pd_authors,
142 'pictures': pictures,
143 'did_you_mean': suggestion
146 def search_books(query):
150 words = query.split()
152 (['authors', 'authors_nonstem'], True),
153 (['title', 'title_nonstem'], True),
154 (['metadata', 'metadata_nonstem'], True),
155 (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
157 for fields, is_book in fieldsets:
158 search_fields += fields
159 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
162 for results_part in results_parts:
163 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
164 book_id = result.book_id
165 if book_id in ids_results:
166 ids_results[book_id].merge(result)
168 results.append(result)
169 ids_results[book_id] = result
170 descendant_ids = set(
171 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
172 results = [result for result in results if result.book_id not in descendant_ids]
173 for result in results:
174 search.get_snippets(result, query, num=3)
176 def ensure_exists(r):
179 except Book.DoesNotExist:
182 results = [r for r in results if ensure_exists(r)]
186 def search_pictures(query):
190 words = query.split()
192 (['authors', 'authors_nonstem'], True),
193 (['title', 'title_nonstem'], True),
194 (['metadata', 'metadata_nonstem'], True),
195 (['themes_pl', 'themes_pl_nonstem'], False),
197 for fields, is_book in fieldsets:
198 search_fields += fields
199 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
202 for results_part in results_parts:
203 for result in sorted(PictureResult.aggregate(results_part), reverse=True):
204 picture_id = result.picture_id
205 if picture_id in ids_results:
206 ids_results[picture_id].merge(result)
208 results.append(result)
209 ids_results[picture_id] = result
211 def ensure_exists(r):
214 except Picture.DoesNotExist:
217 results = [r for r in results if ensure_exists(r)]
221 def search_pd_authors(query):
222 pd_authors = Author.objects.filter(name__icontains=query)
223 existing_slugs = Tag.objects.filter(
224 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
225 .values_list('slug', flat=True)
226 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
230 def prepare_query(query):
231 query = ' '.join(query.split())
232 # filter out private use characters
234 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
235 query = remove_query_syntax_chars(query)
237 words = query.split()
239 query = ' '.join(words[:10])