apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from catalogue.views import JSONResponse
  14 from search import Search, JVM, SearchResult
  15 from lucene import StringReader
  16 from suggest.forms import PublishingSuggestForm
  17 from time import sleep
  18 import re
  19 import enchant
  20
  21 dictionary = enchant.Dict('pl_PL')
  22
  23
  24 def match_word_re(word):
  25     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  26         return r"\b%s\b" % word
  27     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  28         return "[[:<:]]%s[[:>:]]" % word
  29
  30
  31 def did_you_mean(query, tokens):
  32     change = {}
  33     for t in tokens:
  34         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  35         if len(authors) > 0:
  36             continue
  37
  38         if not dictionary.check(t):
  39             try:
  40                 change_to = dictionary.suggest(t)[0].lower()
  41                 if change_to != t.lower():
  42                     change[t] = change_to
  43             except IndexError:
  44                 pass
  45
  46     if change == {}:
  47         return None
  48
  49     for frm, to in change.items():
  50         query = query.replace(frm, to)
  51
  52     return query
  53
  54
  55 JVM.attachCurrentThread()
  56 _search = None
  57
  58
  59 def get_search():
  60     global _search
  61
  62     while _search is False:
  63         sleep(1)
  64
  65     if _search is None:
  66         _search = False
  67         _search = Search()
  68     return _search
  69
  70
  71 def hint(request):
  72     prefix = request.GET.get('term', '')
  73     if len(prefix) < 2:
  74         return JSONResponse([])
  75     JVM.attachCurrentThread()
  76
  77     search = get_search()
  78     hint = search.hint()
  79     try:
  80         tags = request.GET.get('tags', '')
  81         hint.tags(Tag.get_tag_list(tags))
  82     except:
  83         pass
  84
  85     # tagi beda ograniczac tutaj
  86     # ale tagi moga byc na ksiazce i na fragmentach
  87     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  88     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  89
  90     tags = search.hint_tags(prefix, pdcounter=True)
  91     books = search.hint_books(prefix)
  92
  93     def category_name(c):
  94         if c.startswith('pd_'):
  95             c = c[len('pd_'):]
  96         return _(c)
  97
  98     return JSONResponse(
  99         [{'label': t.name,
 100           'category': category_name(t.category),
 101           'id': t.id,
 102           'url': t.get_absolute_url()}
 103           for t in tags] + \
 104           [{'label': b.title,
 105             'category': _('book'),
 106             'id': b.id,
 107             'url': b.get_absolute_url()}
 108             for b in books])
 109
 110
 111 def main(request):
 112     results = {}
 113     JVM.attachCurrentThread()  # where to put this?
 114
 115     results = None
 116     query = None
 117     fuzzy = False #0.8
 118
 119     query = request.GET.get('q','')
 120     # book_id = request.GET.get('book', None)
 121     # book = None
 122     # if book_id is not None:
 123     #     book = get_object_or_404(Book, id=book_id)
 124
 125     # hint = search.hint()
 126     # try:
 127     #     tag_list = Tag.get_tag_list(tags)
 128     # except:
 129     #     tag_list = []
 130
 131     if len(query) < 2:
 132         return render_to_response('catalogue/search_too_short.html', {'prefix': query},
 133                                   context_instance=RequestContext(request))
 134
 135     search = get_search()
 136     # hint.tags(tag_list)
 137     # if book:
 138     #     hint.books(book)
 139     tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
 140     tags = split_tags(tags)
 141
 142     toks = StringReader(query)
 143     tokens_cache = {}
 144
 145     author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 146     title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 147
 148     # Boost main author/title results with mixed search, and save some of its results for end of list.
 149     # boost author, title results
 150     author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 151     author_title_rest = []
 152     for b in author_title_mixed:
 153         bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 154         for b2 in bks:
 155             b2.boost *= 1.1
 156         if bks is []:
 157             author_title_rest.append(b)
 158
 159     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 160     # Because the query is using only one field.
 161     text_phrase = SearchResult.aggregate(
 162         search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
 163         search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
 164
 165     everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 166
 167     def already_found(results):
 168         def f(e):
 169             for r in results:
 170                 if e.book_id == r.book_id:
 171                     e.boost = 0.9
 172                     results.append(e)
 173                     return True
 174             return False
 175         return f
 176     f = already_found(author_results + title_results + text_phrase)
 177     everywhere = filter(lambda x: not f(x), everywhere)
 178
 179     author_results = SearchResult.aggregate(author_results)
 180     title_results = SearchResult.aggregate(title_results)
 181
 182     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 183
 184     for res in [author_results, title_results, text_phrase, everywhere]:
 185         res.sort(reverse=True)
 186         for r in res:
 187             for h in r.hits:
 188                 h['snippets'] = map(lambda s:
 189                                     re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 190                                             re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 191
 192     suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
 193
 194     def ensure_exists(r):
 195         try:
 196             return r.book
 197         except Book.DoesNotExist:
 198             return False
 199
 200     author_results = filter(ensure_exists, author_results)
 201     title_results = filter(ensure_exists, title_results)
 202     text_phrase = filter(ensure_exists, text_phrase)
 203     everywhere = filter(ensure_exists, everywhere)
 204
 205     results = author_results + title_results + text_phrase + everywhere
 206     # ensure books do exists & sort them
 207     results.sort(reverse=True)
 208
 209     if len(results) == 1:
 210         fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 211         if len(fragment_hits) == 1:
 212             #anchor = fragment_hits[0]['fragment']
 213             #frag = Fragment.objects.get(anchor=anchor)
 214             return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 215         return HttpResponseRedirect(results[0].book.get_absolute_url())
 216     elif len(results) == 0:
 217         form = PublishingSuggestForm(initial={"books": query + ", "})
 218         return render_to_response('catalogue/search_no_hits.html',
 219                                   {'tags': tags,
 220                                    'prefix': query,
 221                                    "form": form,
 222                                    'did_you_mean': suggestion},
 223             context_instance=RequestContext(request))
 224
 225     return render_to_response('catalogue/search_multiple_hits.html',
 226                               {'tags': tags,
 227                                'prefix': query,
 228                                'results': { 'author': author_results,
 229                                             'title': title_results,
 230                                             'content': text_phrase,
 231                                             'other': everywhere},
 232                                'did_you_mean': suggestion},
 233         context_instance=RequestContext(request))