1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.http.response import HttpResponseRedirect
7 from django.shortcuts import render_to_response
8 from django.template import RequestContext
9 from django.views.decorators import cache
10 from django.http import HttpResponse, JsonResponse
12 from catalogue.models import Book, Tag
13 from pdcounter.models import Author
14 from picture.models import Picture
15 from search.index import Search, SearchResult, PictureResult
16 from suggest.forms import PublishingSuggestForm
20 from wolnelektury.utils import re_escape
23 def match_word_re(word):
24 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
25 return r"\b%s\b" % word
26 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
27 return "[[:<:]]%s[[:>:]]" % word
30 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
33 def remove_query_syntax_chars(query, replace=' '):
34 return query_syntax_chars.sub(replace, query)
37 def did_you_mean(query, tokens):
41 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
42 # if len(authors) > 0:
46 # if not dictionary.check(t):
48 # change_to = dictionary.suggest(t)[0].lower()
49 # if change_to != t.lower():
50 # change[t] = change_to
57 # for frm, to in change.items():
58 # query = query.replace(frm, to)
65 prefix = request.GET.get('term', '')
67 return JsonResponse([], safe=False)
69 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
72 limit = int(request.GET.get('max', ''))
79 authors = Tag.objects.filter(
80 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
85 'url': author.get_absolute_url(),
87 for author in authors[:limit]
93 'author': b.author_unicode(),
95 'url': b.get_absolute_url()
97 for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
99 callback = request.GET.get('callback', None)
101 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
102 content_type="application/json; charset=utf-8")
104 return JsonResponse(data, safe=False)
109 return HttpResponse('Search is temporarily disabled', status=503)
110 query = request.GET.get('q', '')
112 return render_to_response(
113 'catalogue/search_too_short.html', {'prefix': query},
114 context_instance=RequestContext(request))
115 elif len(query) > 256:
116 return render_to_response(
117 'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
119 query = prepare_query(query)
120 pd_authors = search_pd_authors(query)
121 books = search_books(query)
122 pictures = search_pictures(query)
125 if not (books or pictures or pd_authors):
126 form = PublishingSuggestForm(initial={"books": query + ", "})
127 return render_to_response(
128 'catalogue/search_no_hits.html',
131 'did_you_mean': suggestion
133 context_instance=RequestContext(request))
135 if not (books or pictures) and len(pd_authors) == 1:
136 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
138 return render_to_response(
139 'catalogue/search_multiple_hits.html',
141 'pd_authors': pd_authors,
143 'pictures': pictures,
144 'did_you_mean': suggestion
146 context_instance=RequestContext(request))
149 def search_books(query):
153 words = query.split()
157 (['metadata'], True),
158 (['text', 'themes_pl'], False),
160 for fields, is_book in fieldsets:
161 search_fields += fields
162 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
165 for results_part in results_parts:
166 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
167 book_id = result.book_id
168 if book_id in ids_results:
169 ids_results[book_id].merge(result)
171 results.append(result)
172 ids_results[book_id] = result
173 descendant_ids = set(
174 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
175 results = [result for result in results if result.book_id not in descendant_ids]
176 for result in results:
177 search.get_snippets(result, query, num=3)
179 def ensure_exists(r):
182 except Book.DoesNotExist:
185 results = filter(ensure_exists, results)
189 def search_pictures(query):
193 words = query.split()
197 (['metadata'], True),
198 (['themes_pl'], False),
200 for fields, is_book in fieldsets:
201 search_fields += fields
202 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
205 for results_part in results_parts:
206 for result in sorted(PictureResult.aggregate(results_part), reverse=True):
207 picture_id = result.picture_id
208 if picture_id in ids_results:
209 ids_results[picture_id].merge(result)
211 results.append(result)
212 ids_results[picture_id] = result
214 def ensure_exists(r):
217 except Picture.DoesNotExist:
220 results = filter(ensure_exists, results)
224 def search_pd_authors(query):
225 pd_authors = Author.objects.filter(name__icontains=query)
226 existing_slugs = Tag.objects.filter(
227 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
228 .values_list('slug', flat=True)
229 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
233 def prepare_query(query):
234 query = ' '.join(query.split())
235 # filter out private use characters
237 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
238 query = remove_query_syntax_chars(query)
240 words = query.split()
242 query = ' '.join(words[:10])