1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.http.response import HttpResponseRedirect
7 from django.shortcuts import render_to_response
8 from django.template import RequestContext
9 from django.views.decorators import cache
10 from django.http import HttpResponse, JsonResponse
12 from catalogue.models import Book, Tag
13 from pdcounter.models import Author
14 from picture.models import Picture
15 from search.index import Search, SearchResult, PictureResult
16 from suggest.forms import PublishingSuggestForm
20 from wolnelektury.utils import re_escape
23 def match_word_re(word):
24 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
25 return r"\b%s\b" % word
26 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
27 return "[[:<:]]%s[[:>:]]" % word
30 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
33 def remove_query_syntax_chars(query, replace=' '):
34 return query_syntax_chars.sub(replace, query)
37 def did_you_mean(query, tokens):
41 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
42 # if len(authors) > 0:
46 # if not dictionary.check(t):
48 # change_to = dictionary.suggest(t)[0].lower()
49 # if change_to != t.lower():
50 # change[t] = change_to
57 # for frm, to in change.items():
58 # query = query.replace(frm, to)
65 prefix = request.GET.get('term', '')
67 return JsonResponse([], safe=False)
69 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
72 limit = int(request.GET.get('max', ''))
79 authors = Tag.objects.filter(
80 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
85 'url': author.get_absolute_url(),
87 for author in authors[:limit]
93 'author': b.author_unicode(),
95 'url': b.get_absolute_url()
97 for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
99 callback = request.GET.get('callback', None)
101 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
102 content_type="application/json; charset=utf-8")
104 return JsonResponse(data, safe=False)
109 query = request.GET.get('q', '')
111 return render_to_response(
112 'catalogue/search_too_short.html', {'prefix': query},
113 context_instance=RequestContext(request))
114 elif len(query) > 256:
115 return render_to_response(
116 'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
118 query = prepare_query(query)
119 pd_authors = search_pd_authors(query)
120 books = search_books(query)
121 pictures = search_pictures(query)
124 if not (books or pictures or pd_authors):
125 form = PublishingSuggestForm(initial={"books": query + ", "})
126 return render_to_response(
127 'catalogue/search_no_hits.html',
130 'did_you_mean': suggestion
132 context_instance=RequestContext(request))
134 if not (books or pictures) and len(pd_authors) == 1:
135 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
137 return render_to_response(
138 'catalogue/search_multiple_hits.html',
140 'pd_authors': pd_authors,
142 'pictures': pictures,
143 'did_you_mean': suggestion
145 context_instance=RequestContext(request))
148 def search_books(query):
152 words = query.split()
154 (['authors', 'authors_nonstem'], True),
155 (['title', 'title_nonstem'], True),
156 (['metadata', 'metadata_nonstem'], True),
157 (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
159 for fields, is_book in fieldsets:
160 search_fields += fields
161 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
164 for results_part in results_parts:
165 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
166 book_id = result.book_id
167 if book_id in ids_results:
168 ids_results[book_id].merge(result)
170 results.append(result)
171 ids_results[book_id] = result
172 descendant_ids = set(
173 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
174 results = [result for result in results if result.book_id not in descendant_ids]
175 for result in results:
176 search.get_snippets(result, query, num=3)
178 def ensure_exists(r):
181 except Book.DoesNotExist:
184 results = filter(ensure_exists, results)
188 def search_pictures(query):
192 words = query.split()
194 (['authors', 'authors_nonstem'], True),
195 (['title', 'title_nonstem'], True),
196 (['metadata', 'metadata_nonstem'], True),
197 (['themes_pl', 'themes_pl_nonstem'], False),
199 for fields, is_book in fieldsets:
200 search_fields += fields
201 results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
204 for results_part in results_parts:
205 for result in sorted(PictureResult.aggregate(results_part), reverse=True):
206 picture_id = result.picture_id
207 if picture_id in ids_results:
208 ids_results[picture_id].merge(result)
210 results.append(result)
211 ids_results[picture_id] = result
213 def ensure_exists(r):
216 except Picture.DoesNotExist:
219 results = filter(ensure_exists, results)
223 def search_pd_authors(query):
224 pd_authors = Author.objects.filter(name__icontains=query)
225 existing_slugs = Tag.objects.filter(
226 category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
227 .values_list('slug', flat=True)
228 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
232 def prepare_query(query):
233 query = ' '.join(query.split())
234 # filter out private use characters
236 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
237 query = remove_query_syntax_chars(query)
239 words = query.split()
241 query = ' '.join(words[:10])