1 # -*- coding: utf-8 -*-
3 from django.conf import settings
4 from django.shortcuts import render_to_response, get_object_or_404
5 from django.template import RequestContext
6 from django.contrib.auth.decorators import login_required
7 from django.views.decorators import cache
8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
9 from django.utils.translation import ugettext as _
11 from catalogue.utils import split_tags
12 from catalogue.models import Book, Tag, Fragment
13 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
14 from catalogue.views import JSONResponse
15 from search import Search, JVM, SearchResult
16 from lucene import StringReader
17 from suggest.forms import PublishingSuggestForm
18 from time import sleep
22 dictionary = enchant.Dict('pl_PL')
25 def match_word_re(word):
26 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
27 return r"\b%s\b" % word
28 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
29 return "[[:<:]]%s[[:>:]]" % word
32 def did_you_mean(query, tokens):
35 authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
39 if not dictionary.check(t):
41 change_to = dictionary.suggest(t)[0].lower()
42 if change_to != t.lower():
50 for frm, to in change.items():
51 query = query.replace(frm, to)
56 JVM.attachCurrentThread()
63 while _search is False:
73 prefix = request.GET.get('term', '')
75 return JSONResponse([])
76 JVM.attachCurrentThread()
81 tags = request.GET.get('tags', '')
82 hint.tags(Tag.get_tag_list(tags))
86 # tagi beda ograniczac tutaj
87 # ale tagi moga byc na ksiazce i na fragmentach
88 # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
89 # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
91 tags = search.hint_tags(prefix, pdcounter=True)
92 books = search.hint_books(prefix)
96 if isinstance(tag, PDCounterAuthor):
97 if filter(lambda t: t.slug == tag.slug and t != tag, tags):
99 elif isinstance(tag, PDCounterBook):
100 if filter(lambda b: b.slug == tag.slug, tags):
104 tags = filter(lambda t: not is_dupe(t), tags)
106 def category_name(c):
107 if c.startswith('pd_'):
113 'category': category_name(t.category),
115 'url': t.get_absolute_url()}
118 'category': _('book'),
120 'url': b.get_absolute_url()}
126 JVM.attachCurrentThread() # where to put this?
132 query = request.GET.get('q','')
133 # book_id = request.GET.get('book', None)
135 # if book_id is not None:
136 # book = get_object_or_404(Book, id=book_id)
138 # hint = search.hint()
140 # tag_list = Tag.get_tag_list(tags)
145 return render_to_response('catalogue/search_too_short.html', {'prefix': query},
146 context_instance=RequestContext(request))
148 search = get_search()
149 # hint.tags(tag_list)
152 tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
153 tags = split_tags(tags)
155 toks = StringReader(query)
158 author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
159 title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
161 # Boost main author/title results with mixed search, and save some of its results for end of list.
162 # boost author, title results
163 author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
164 author_title_rest = []
165 for b in author_title_mixed:
166 bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
170 author_title_rest.append(b)
172 # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
173 # Because the query is using only one field.
174 text_phrase = SearchResult.aggregate(
175 search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
176 search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
178 everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
180 def already_found(results):
183 if e.book_id == r.book_id:
189 f = already_found(author_results + title_results + text_phrase)
190 everywhere = filter(lambda x: not f(x), everywhere)
192 author_results = SearchResult.aggregate(author_results)
193 title_results = SearchResult.aggregate(title_results)
195 everywhere = SearchResult.aggregate(everywhere, author_title_rest)
197 for res in [author_results, title_results, text_phrase, everywhere]:
198 res.sort(reverse=True)
201 h['snippets'] = map(lambda s:
202 re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
203 re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
205 suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
207 def ensure_exists(r):
210 except Book.DoesNotExist:
213 author_results = filter(ensure_exists, author_results)
214 title_results = filter(ensure_exists, title_results)
215 text_phrase = filter(ensure_exists, text_phrase)
216 everywhere = filter(ensure_exists, everywhere)
218 results = author_results + title_results + text_phrase + everywhere
219 # ensure books do exists & sort them
220 results.sort(reverse=True)
222 if len(results) == 1:
223 fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
224 if len(fragment_hits) == 1:
225 #anchor = fragment_hits[0]['fragment']
226 #frag = Fragment.objects.get(anchor=anchor)
227 return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
228 return HttpResponseRedirect(results[0].book.get_absolute_url())
229 elif len(results) == 0:
230 form = PublishingSuggestForm(initial={"books": query + ", "})
231 return render_to_response('catalogue/search_no_hits.html',
235 'did_you_mean': suggestion},
236 context_instance=RequestContext(request))
238 return render_to_response('catalogue/search_multiple_hits.html',
241 'results': { 'author': author_results,
242 'title': title_results,
243 'content': text_phrase,
244 'other': everywhere},
245 'did_you_mean': suggestion},
246 context_instance=RequestContext(request))