1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.http.response import HttpResponseRedirect
7 from django.shortcuts import render_to_response
8 from django.template import RequestContext
9 from django.views.decorators import cache
10 from django.http import HttpResponse, JsonResponse
12 from catalogue.models import Book, Tag
13 from pdcounter.models import Author
14 from picture.models import Picture
15 from search.index import Search, SearchResult, PictureResult
16 from suggest.forms import PublishingSuggestForm
20 from wolnelektury.utils import re_escape
23 def match_word_re(word):
24 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
25 return r"\b%s\b" % word
26 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
27 return "[[:<:]]%s[[:>:]]" % word
30 query_syntax_chars = re.compile(r"[\\/*:(){}]")
33 def remove_query_syntax_chars(query, replace=' '):
34 return query_syntax_chars.sub(replace, query)
37 def did_you_mean(query, tokens):
41 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
42 # if len(authors) > 0:
46 # if not dictionary.check(t):
48 # change_to = dictionary.suggest(t)[0].lower()
49 # if change_to != t.lower():
50 # change[t] = change_to
57 # for frm, to in change.items():
58 # query = query.replace(frm, to)
65 prefix = request.GET.get('term', '')
67 return JsonResponse([], safe=False)
69 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
72 limit = int(request.GET.get('max', ''))
79 authors = Tag.objects.filter(
80 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
85 'url': author.get_absolute_url(),
87 for author in authors[:limit]
93 'author': b.author_unicode(),
95 'url': b.get_absolute_url()
97 for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
99 callback = request.GET.get('callback', None)
101 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
102 content_type="application/json; charset=utf-8")
104 return JsonResponse(data, safe=False)
109 query = request.GET.get('q', '')
111 return render_to_response(
112 'catalogue/search_too_short.html', {'prefix': query},
113 context_instance=RequestContext(request))
114 elif len(query) > 256:
115 return render_to_response(
116 'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
118 query = prepare_query(query)
119 pd_authors = search_pd_authors(query)
120 books = search_books(query)
121 pictures = search_pictures(query)
124 if not (books or pictures or pd_authors):
125 form = PublishingSuggestForm(initial={"books": query + ", "})
126 return render_to_response(
127 'catalogue/search_no_hits.html',
130 'did_you_mean': suggestion
132 context_instance=RequestContext(request))
134 if not (books or pictures) and len(pd_authors) == 1:
135 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
137 return render_to_response(
138 'catalogue/search_multiple_hits.html',
140 'pd_authors': pd_authors,
142 'pictures': pictures,
143 'did_you_mean': suggestion
145 context_instance=RequestContext(request))
148 def search_books(query):
152 words = query.split()
154 (['authors'], True, 8),
155 (['title'], True, 4),
156 (['metadata'], True, 2),
157 (['text', 'themes_pl'], False, 1),
159 # for fields, is_book in fieldsets:
160 # search_fields += fields
161 # results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
162 query_results = search.search_words(words, fieldsets)
165 # for results_part in results_parts:
166 for result in sorted(SearchResult.aggregate(query_results), reverse=True):
167 book_id = result.book_id
168 if book_id in ids_results:
169 ids_results[book_id].merge(result)
171 results.append(result)
172 ids_results[book_id] = result
173 descendant_ids = set(
174 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
175 results = [result for result in results if result.book_id not in descendant_ids]
176 for result in results:
177 search.get_snippets(result, query, num=3)
179 def ensure_exists(r):
182 except Book.DoesNotExist:
185 results = filter(ensure_exists, results)
189 def search_pictures(query):
193 words = query.split()
195 (['authors'], True, 8),
196 (['title'], True, 4),
197 (['metadata'], True, 2),
198 (['themes_pl'], False, 1),
200 # for fields, is_book in fieldsets:
201 # search_fields += fields
202 # results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
203 query_results = search.search_words(words, fieldsets, picture=True)
206 # for results_part in results_parts:
207 for result in sorted(PictureResult.aggregate(query_results), reverse=True):
208 picture_id = result.picture_id
209 if picture_id in ids_results:
210 ids_results[picture_id].merge(result)
212 results.append(result)
213 ids_results[picture_id] = result
215 def ensure_exists(r):
218 except Picture.DoesNotExist:
221 results = filter(ensure_exists, results)
225 def search_pd_authors(query):
226 pd_authors = Author.objects.filter(name__icontains=query)
227 existing_slugs = Tag.objects.filter(
228 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
229 .values_list('slug', flat=True)
230 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
234 def prepare_query(query):
235 query = ' '.join(query.split())
236 # filter out private use characters
238 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
239 query = remove_query_syntax_chars(query)
241 words = query.split()
243 query = ' '.join(words[:10])