1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.http.response import HttpResponseRedirect
7 from django.shortcuts import render_to_response
8 from django.template import RequestContext
9 from django.views.decorators import cache
10 from django.http import HttpResponse, JsonResponse
12 from catalogue.models import Book, Tag
13 from pdcounter.models import Author
14 from search.index import Search, SearchResult
15 from suggest.forms import PublishingSuggestForm
19 from wolnelektury.utils import re_escape
22 def match_word_re(word):
23 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
24 return r"\b%s\b" % word
25 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
26 return "[[:<:]]%s[[:>:]]" % word
29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
32 def remove_query_syntax_chars(query, replace=' '):
33 return query_syntax_chars.sub(replace, query)
36 def did_you_mean(query, tokens):
40 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
41 # if len(authors) > 0:
45 # if not dictionary.check(t):
47 # change_to = dictionary.suggest(t)[0].lower()
48 # if change_to != t.lower():
49 # change[t] = change_to
56 # for frm, to in change.items():
57 # query = query.replace(frm, to)
64 prefix = request.GET.get('term', '')
66 return JsonResponse([], safe=False)
68 prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
71 limit = int(request.GET.get('max', ''))
78 authors = Tag.objects.filter(
79 category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
84 'url': author.get_absolute_url(),
86 for author in authors[:limit]
92 'author': b.author_unicode(),
94 'url': b.get_absolute_url()
96 for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
98 callback = request.GET.get('callback', None)
100 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
101 content_type="application/json; charset=utf-8")
103 return JsonResponse(data, safe=False)
108 query = request.GET.get('q', '')
109 query = ' '.join(query.split())
110 # filter out private use characters
112 query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
115 return render_to_response(
116 'catalogue/search_too_short.html', {'prefix': query},
117 context_instance=RequestContext(request))
118 elif len(query) > 256:
119 return render_to_response(
120 'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
122 query = remove_query_syntax_chars(query)
124 words = query.split()
126 query = ' '.join(words[:10])
130 pd_authors = Author.objects.filter(name__icontains=query)
131 existing_slugs = Tag.objects.filter(
132 category='author', slug__in=list(pd_authors.values_list('slug', flat=True)))\
133 .values_list('slug', flat=True)
134 pd_authors = pd_authors.exclude(slug__in=existing_slugs)
142 (['metadata'], True),
143 (['text', 'themes_pl'], False),
145 for fields, is_book in fieldsets:
146 search_fields += fields
147 results_parts.append(search.search_words(words, search_fields, book=is_book))
151 for results_part in results_parts:
152 for result in sorted(SearchResult.aggregate(results_part), reverse=True):
153 book_id = result.book_id
154 if book_id in ids_results:
155 ids_results[book_id].merge(result)
157 results.append(result)
158 ids_results[book_id] = result
160 descendant_ids = set(
161 Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
162 results = [result for result in results if result.book_id not in descendant_ids]
164 for result in results:
165 search.get_snippets(result, query, num=3)
169 def ensure_exists(r):
172 except Book.DoesNotExist:
175 results = filter(ensure_exists, results)
177 if not results and not pd_authors:
178 form = PublishingSuggestForm(initial={"books": query + ", "})
179 return render_to_response(
180 'catalogue/search_no_hits.html',
183 'did_you_mean': suggestion
185 context_instance=RequestContext(request))
187 if not results and len(pd_authors) == 1:
188 return HttpResponseRedirect(pd_authors[0].get_absolute_url())
190 return render_to_response(
191 'catalogue/search_multiple_hits.html',
193 'pd_authors': pd_authors,
195 'did_you_mean': suggestion
197 context_instance=RequestContext(request))