1 # -*- coding: utf-8 -*-
3 from django.conf import settings
4 from django.shortcuts import render_to_response, get_object_or_404
5 from django.template import RequestContext
6 from django.contrib.auth.decorators import login_required
7 from django.views.decorators import cache
8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
9 from django.utils.translation import ugettext as _
11 from catalogue.utils import split_tags
12 from catalogue.models import Book, Tag, Fragment
13 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
14 from catalogue.views import JSONResponse
15 from search import Search, SearchResult
16 from lucene import StringReader
17 from suggest.forms import PublishingSuggestForm
18 from time import sleep
24 def match_word_re(word):
25 if 'sqlite' in settings.DATABASES['default']['ENGINE']:
26 return r"\b%s\b" % word
27 elif 'mysql' in settings.DATABASES['default']['ENGINE']:
28 return "[[:<:]]%s[[:>:]]" % word
31 def did_you_mean(query, tokens):
35 # authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
36 # if len(authors) > 0:
40 # if not dictionary.check(t):
42 # change_to = dictionary.suggest(t)[0].lower()
43 # if change_to != t.lower():
44 # change[t] = change_to
51 # for frm, to in change.items():
52 # query = query.replace(frm, to)
58 prefix = request.GET.get('term', '')
60 return JSONResponse([])
63 # tagi beda ograniczac tutaj
64 # ale tagi moga byc na ksiazce i na fragmentach
65 # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
66 # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
68 tags = search.hint_tags(prefix, pdcounter=True)
69 books = search.hint_books(prefix)
72 if isinstance(tag, PDCounterAuthor):
73 if filter(lambda t: t.slug == tag.slug and t != tag, tags):
75 elif isinstance(tag, PDCounterBook):
76 if filter(lambda b: b.slug == tag.slug, tags):
80 tags = filter(lambda t: not is_dupe(t), tags)
83 if c.startswith('pd_'):
87 callback = request.GET.get('callback', None)
88 data = [{'label': t.name,
89 'category': category_name(t.category),
91 'url': t.get_absolute_url()}
94 'category': _('book'),
96 'url': b.get_absolute_url()}
99 return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
100 content_type="application/json; charset=utf-8")
102 return JSONResponse(data)
111 query = request.GET.get('q', '')
114 return render_to_response('catalogue/search_too_short.html',
116 context_instance=RequestContext(request))
119 theme_terms = search.index.analyze(text=query, field="themes_pl") \
120 + search.index.analyze(text=query, field="themes")
123 tags = search.hint_tags(query, pdcounter=True, prefix=False)
124 tags = split_tags(tags)
126 author_results = search.search_phrase(query, 'authors', book=True)
127 title_results = search.search_phrase(query, 'title', book=True)
129 # Boost main author/title results with mixed search, and save some of its results for end of list.
130 # boost author, title results
131 author_title_mixed = search.search_some(query, ['authors', 'title', 'tags'], query_terms=theme_terms)
132 author_title_rest = []
134 for b in author_title_mixed:
135 also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
136 for b2 in also_in_mixed:
138 if also_in_mixed is []:
139 author_title_rest.append(b)
141 # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
142 # Because the query is using only one field.
143 text_phrase = SearchResult.aggregate(
144 search.search_phrase(query, 'text', snippets=True, book=False),
145 search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
147 everywhere = search.search_everywhere(query, query_terms=theme_terms)
149 def already_found(results):
152 if e.book_id == r.book_id:
158 f = already_found(author_results + title_results + text_phrase)
159 everywhere = filter(lambda x: not f(x), everywhere)
161 author_results = SearchResult.aggregate(author_results)
162 title_results = SearchResult.aggregate(title_results)
164 everywhere = SearchResult.aggregate(everywhere, author_title_rest)
166 for field, res in [('authors', author_results),
167 ('title', title_results),
168 ('text', text_phrase),
169 ('text', everywhere)]:
170 res.sort(reverse=True)
171 print "get snips %s, res size %d" % (field, len(res))
173 print "Get snippets for %s" % r
174 search.get_snippets(r, query, field, 3)
177 # h['snippets'] = map(lambda s:
178 # re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
179 # re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
181 # suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
184 def ensure_exists(r):
187 except Book.DoesNotExist:
190 author_results = filter(ensure_exists, author_results)
191 title_results = filter(ensure_exists, title_results)
192 text_phrase = filter(ensure_exists, text_phrase)
193 everywhere = filter(ensure_exists, everywhere)
195 results = author_results + title_results + text_phrase + everywhere
196 # ensure books do exists & sort them
197 results.sort(reverse=True)
199 if len(results) == 1:
200 fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
201 if len(fragment_hits) == 1:
202 #anchor = fragment_hits[0]['fragment']
203 #frag = Fragment.objects.get(anchor=anchor)
204 return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
205 return HttpResponseRedirect(results[0].book.get_absolute_url())
206 elif len(results) == 0:
207 form = PublishingSuggestForm(initial={"books": query + ", "})
208 return render_to_response('catalogue/search_no_hits.html',
212 'did_you_mean': suggestion},
213 context_instance=RequestContext(request))
215 return render_to_response('catalogue/search_multiple_hits.html',
218 'results': {'author': author_results,
219 'title': title_results,
220 'content': text_phrase,
221 'other': everywhere},
222 'did_you_mean': suggestion},
223 context_instance=RequestContext(request))