move to django 1.4 and django-pipeline,
[wolnelektury.git] / apps / search / views.py
1 # -*- coding: utf-8 -*-
2
3 from django.conf import settings
4 from django.shortcuts import render_to_response, get_object_or_404
5 from django.template import RequestContext
6 from django.contrib.auth.decorators import login_required
7 from django.views.decorators import cache
8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
9 from django.utils.translation import ugettext as _
10
11 from catalogue.utils import split_tags
12 from catalogue.models import Book, Tag, Fragment
13 from catalogue.views import JSONResponse
14 from search import Search, JVM, SearchResult
15 from lucene import StringReader
16 from suggest.forms import PublishingSuggestForm
17 import re
18 import enchant
19
20 dictionary = enchant.Dict('pl_PL')
21
22
23 def match_word_re(word):
24     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
25         return r"\b%s\b" % word
26     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
27         return "[[:<:]]%s[[:>:]]" % word
28
29
30 def did_you_mean(query, tokens):
31     change = {}
32     for t in tokens:
33         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
34         if len(authors) > 0:
35             continue
36
37         if not dictionary.check(t):
38             try:
39                 change_to = dictionary.suggest(t)[0].lower()
40                 if change_to != t.lower():
41                     change[t] = change_to
42             except IndexError:
43                 pass
44
45     if change == {}:
46         return None
47
48     for frm, to in change.items():
49         query = query.replace(frm, to)
50
51     return query
52
53 JVM.attachCurrentThread()
54 search = Search()
55
56
57 def hint(request):
58     prefix = request.GET.get('term', '')
59     if len(prefix) < 2:
60         return JSONResponse([])
61     JVM.attachCurrentThread()
62
63     hint = search.hint()
64     try:
65         tags = request.GET.get('tags', '')
66         hint.tags(Tag.get_tag_list(tags))
67     except:
68         pass
69
70     # tagi beda ograniczac tutaj
71     # ale tagi moga byc na ksiazce i na fragmentach
72     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
73     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
74
75     tags = search.hint_tags(prefix, pdcounter=True)
76     books = search.hint_books(prefix)
77
78     def category_name(c):
79         if c.startswith('pd_'):
80             c = c[len('pd_'):]
81         return _(c)
82
83     return JSONResponse(
84         [{'label': t.name,
85           'category': category_name(t.category),
86           'id': t.id,
87           'url': t.get_absolute_url()}
88           for t in tags] + \
89           [{'label': b.title,
90             'category': _('book'),
91             'id': b.id,
92             'url': b.get_absolute_url()}
93             for b in books])
94
95
96 def main(request):
97     results = {}
98     JVM.attachCurrentThread()  # where to put this?
99
100     results = None
101     query = None
102     fuzzy = False #0.8
103
104     query = request.GET.get('q','')
105     # book_id = request.GET.get('book', None)
106     # book = None
107     # if book_id is not None:
108     #     book = get_object_or_404(Book, id=book_id)
109
110     # hint = search.hint()
111     # try:
112     #     tag_list = Tag.get_tag_list(tags)
113     # except:
114     #     tag_list = []
115
116     if len(query) < 2:
117         return render_to_response('catalogue/search_too_short.html', {'prefix': query},
118                                   context_instance=RequestContext(request))
119
120     # hint.tags(tag_list)
121     # if book:
122     #     hint.books(book)
123     tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
124     tags = split_tags(tags)
125
126     toks = StringReader(query)
127     tokens_cache = {}
128
129     author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
130     title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
131
132     # Boost main author/title results with mixed search, and save some of its results for end of list.
133     # boost author, title results
134     author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
135     author_title_rest = []
136     for b in author_title_mixed:
137         bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
138         for b2 in bks:
139             b2.boost *= 1.1
140         if bks is []:
141             author_title_rest.append(b)
142
143     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
144     # Because the query is using only one field.
145     text_phrase = SearchResult.aggregate(
146         search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
147         search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
148
149     everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
150
151     def already_found(results):
152         def f(e):
153             for r in results:
154                 if e.book_id == r.book_id:
155                     e.boost = 0.9
156                     results.append(e)
157                     return True
158             return False
159         return f
160     f = already_found(author_results + title_results + text_phrase)
161     everywhere = filter(lambda x: not f(x), everywhere)
162
163     author_results = SearchResult.aggregate(author_results)
164     title_results = SearchResult.aggregate(title_results)
165
166     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
167
168     for res in [author_results, title_results, text_phrase, everywhere]:
169         res.sort(reverse=True)
170         for r in res:
171             for h in r.hits:
172                 h['snippets'] = map(lambda s:
173                                     re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
174                                             re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
175
176     suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
177
178     def ensure_exists(r):
179         try:
180             return r.book
181         except Book.DoesNotExist:
182             return False
183
184     author_results = filter(ensure_exists, author_results)
185     title_results = filter(ensure_exists, title_results)
186     text_phrase = filter(ensure_exists, text_phrase)
187     everywhere = filter(ensure_exists, everywhere)
188
189     results = author_results + title_results + text_phrase + everywhere
190     # ensure books do exists & sort them
191     results.sort(reverse=True)
192
193     if len(results) == 1:
194         fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
195         if len(fragment_hits) == 1:
196             #anchor = fragment_hits[0]['fragment']
197             #frag = Fragment.objects.get(anchor=anchor)
198             return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
199         return HttpResponseRedirect(results[0].book.get_absolute_url())
200     elif len(results) == 0:
201         form = PublishingSuggestForm(initial={"books": query + ", "})
202         return render_to_response('catalogue/search_no_hits.html',
203                                   {'tags': tags,
204                                    'prefix': query,
205                                    "form": form,
206                                    'did_you_mean': suggestion},
207             context_instance=RequestContext(request))
208
209     return render_to_response('catalogue/search_multiple_hits.html',
210                               {'tags': tags,
211                                'prefix': query,
212                                'results': { 'author': author_results,
213                                             'title': title_results,
214                                             'content': text_phrase,
215                                             'other': everywhere},
216                                'did_you_mean': suggestion},
217         context_instance=RequestContext(request))