1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.shortcuts import render_to_response, get_object_or_404
7 from django.template import RequestContext
8 from django.views.decorators import cache
9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
10 from django.utils.translation import ugettext as _
12 from catalogue.utils import split_tags
13 from catalogue.models import Book, Tag, Fragment
14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
15 from catalogue.views import JSONResponse
16 from search import Search, SearchResult
17 from suggest.forms import PublishingSuggestForm
23 def match_word_re(word):
24 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
25 return r"\b%s\b" % word
26 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
27 return "[[:<:]]%s[[:>:]]" % word
30 query_syntax_chars = re.compile(r"[\\/*:(){}]")
33 def remove_query_syntax_chars(query, replace=' '):
34 return query_syntax_chars.sub(' ', query)
37 def did_you_mean(query, tokens):
41 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
42 # if len(authors) > 0:
46 # if not dictionary.check(t):
48 # change_to = dictionary.suggest(t)[0].lower()
49 # if change_to != t.lower():
50 # change[t] = change_to
57 # for frm, to in change.items():
58 # query = query.replace(frm, to)
64 prefix = request.GET.get('term', '')
66 return JSONResponse([])
68 prefix = remove_query_syntax_chars(prefix)
71 # tagi beda ograniczac tutaj
72 # ale tagi moga byc na ksiazce i na fragmentach
73 # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
74 # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
76 tags = search.hint_tags(prefix, pdcounter=True)
77 books = search.hint_books(prefix)
80 if isinstance(tag, PDCounterAuthor):
81 if filter(lambda t: t.slug == tag.slug and t != tag, tags):
83 elif isinstance(tag, PDCounterBook):
84 if filter(lambda b: b.slug == tag.slug, tags):
88 tags = filter(lambda t: not is_dupe(t), tags)
91 if c.startswith('pd_'):
95 callback = request.GET.get('callback', None)
96 data = [{'label': t.name,
97 'category': category_name(t.category),
99 'url': t.get_absolute_url()}
102 'category': _('book'),
104 'url': b.get_absolute_url()}
107 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
108 content_type="application/json; charset=utf-8")
110 return JSONResponse(data)
119 query = request.GET.get('q', '')
122 return render_to_response('catalogue/search_too_short.html',
124 context_instance=RequestContext(request))
126 query = remove_query_syntax_chars(query)
130 theme_terms = search.index.analyze(text=query, field="themes_pl") \
131 + search.index.analyze(text=query, field="themes")
134 tags = search.hint_tags(query, pdcounter=True, prefix=False)
135 tags = split_tags(tags)
137 author_results = search.search_phrase(query, 'authors', book=True)
138 translator_results = search.search_phrase(query, 'translators', book=True)
140 title_results = search.search_phrase(query, 'title', book=True)
142 # Boost main author/title results with mixed search, and save some of its results for end of list.
143 # boost author, title results
144 author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
145 author_title_rest = []
147 for b in author_title_mixed:
148 also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
149 for b2 in also_in_mixed:
151 if also_in_mixed is []:
152 author_title_rest.append(b)
154 # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
155 # Because the query is using only one field.
156 text_phrase = SearchResult.aggregate(
157 search.search_phrase(query, 'text', snippets=True, book=False),
158 search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
160 everywhere = search.search_everywhere(query, query_terms=theme_terms)
162 def already_found(results):
165 if e.book_id == r.book_id:
171 f = already_found(author_results + translator_results + title_results + text_phrase)
172 everywhere = filter(lambda x: not f(x), everywhere)
174 author_results = SearchResult.aggregate(author_results)
175 translator_results = SearchResult.aggregate(translator_results)
176 title_results = SearchResult.aggregate(title_results)
178 everywhere = SearchResult.aggregate(everywhere, author_title_rest)
180 for field, res in [('authors', author_results),
181 ('translators', translator_results),
182 ('title', title_results),
183 ('text', text_phrase),
184 ('text', everywhere)]:
185 res.sort(reverse=True)
187 search.get_snippets(r, query, field, 3)
191 def ensure_exists(r):
194 except Book.DoesNotExist:
197 author_results = filter(ensure_exists, author_results)
198 translator_results = filter(ensure_exists, translator_results)
199 title_results = filter(ensure_exists, title_results)
200 text_phrase = filter(ensure_exists, text_phrase)
201 everywhere = filter(ensure_exists, everywhere)
203 results = author_results + translator_results + title_results + text_phrase + everywhere
204 # ensure books do exists & sort them
205 for res in (author_results, translator_results, title_results, text_phrase, everywhere):
206 res.sort(reverse=True)
208 # We don't want to redirect to book text, but rather display result page even with one result.
209 # if len(results) == 1:
210 # fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
211 # if len(fragment_hits) == 1:
212 # #anchor = fragment_hits[0]['fragment']
213 # #frag = Fragment.objects.get(anchor=anchor)
214 # return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
215 # return HttpResponseRedirect(results[0].book.get_absolute_url())
216 if len(results) == 0:
217 form = PublishingSuggestForm(initial={"books": query + ", "})
218 return render_to_response('catalogue/search_no_hits.html',
222 'did_you_mean': suggestion},
223 context_instance=RequestContext(request))
225 return render_to_response('catalogue/search_multiple_hits.html',
228 'results': {'author': author_results,
229 'translator': translator_results,
230 'title': title_results,
231 'content': text_phrase,
232 'other': everywhere},
233 'did_you_mean': suggestion},
234 context_instance=RequestContext(request))