apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import get_random_hash
  12 from catalogue.models import Book, Tag, Fragment
  13 from catalogue.fields import dumps
  14 from catalogue.views import JSONResponse
  15 from search import Search, JVM, SearchResult
  16 from lucene import StringReader
  17 from suggest.forms import PublishingSuggestForm
  18 import re
  19 import enchant
  20
  21 dictionary = enchant.Dict('pl_PL')
  22
  23
  24 def match_word_re(word):
  25     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  26         return r"\b%s\b" % word
  27     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  28         return "[[:<:]]%s[[:>:]]" % word
  29
  30
  31 def did_you_mean(query, tokens):
  32     change = {}
  33     for t in tokens:
  34         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  35         if len(authors) > 0:
  36             continue
  37
  38         if not dictionary.check(t):
  39             try:
  40                 change[t] = dictionary.suggest(t)[0]
  41             except IndexError:
  42                 pass
  43
  44     if change == {}:
  45         return None
  46
  47     for frm, to in change.items():
  48         query = query.replace(frm, to)
  49
  50     return query
  51
  52
  53 def hint(request):
  54     prefix = request.GET.get('term', '')
  55     if len(prefix) < 2:
  56         return JSONResponse([])
  57     JVM.attachCurrentThread()
  58     s = Search()
  59
  60     hint = s.hint()
  61     try:
  62         tags = request.GET.get('tags', '')
  63         hint.tags(Tag.get_tag_list(tags))
  64     except:
  65         pass
  66
  67     # tagi beda ograniczac tutaj
  68     # ale tagi moga byc na ksiazce i na fragmentach
  69     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  70     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  71
  72     tags = s.hint_tags(prefix)
  73     books = s.hint_books(prefix)
  74
  75     # TODO DODAC TU HINTY
  76
  77     return JSONResponse(
  78         [{'label': t.name,
  79           'category': _(t.category),
  80           'id': t.id,
  81           'url': t.get_absolute_url()}
  82           for t in tags] + \
  83           [{'label': b.title,
  84             'category': _('book'),
  85             'id': b.id,
  86             'url': b.get_absolute_url()}
  87             for b in books])
  88
  89
  90 def main(request):
  91     results = {}
  92     JVM.attachCurrentThread()  # where to put this?
  93     srch = Search()
  94
  95     results = None
  96     query = None
  97     fuzzy = False
  98
  99     if 'q' in request.GET:
 100         tags = request.GET.get('tags', '')
 101         query = request.GET['q']
 102         book_id = request.GET.get('book', None)
 103         book = None
 104         if book_id is not None:
 105             book = get_object_or_404(Book, id=book_id)
 106
 107         hint = srch.hint()
 108         try:
 109             tag_list = Tag.get_tag_list(tags)
 110         except:
 111             tag_list = []
 112
 113         if len(query) < 2:
 114             return render_to_response('catalogue/search_too_short.html', {'tags': tag_list, 'prefix': query},
 115                                       context_instance=RequestContext(request))
 116
 117         hint.tags(tag_list)
 118         if book:
 119             hint.books(book)
 120
 121         toks = StringReader(query)
 122         tokens_cache = {}
 123         fuzzy = 'fuzzy' in request.GET
 124         if fuzzy:
 125             fuzzy = 0.7
 126
 127         author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 128         title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 129
 130         # Boost main author/title results with mixed search, and save some of its results for end of list.
 131         # boost author, title results
 132         author_title_mixed = srch.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 133         author_title_rest = []
 134         for b in author_title_mixed:
 135             bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 136             for b2 in bks:
 137                 b2.boost *= 1.1
 138             if bks is []:
 139                 author_title_rest.append(b)
 140
 141         text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False))
 142
 143         everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 144
 145         def already_found(results):
 146             def f(e):
 147                 for r in results:
 148                     if e.book_id == r.book_id:
 149                         results.append(e)
 150                         return True
 151                 return False
 152             return f
 153         f = already_found(author_results + title_results)
 154         everywhere = filter(lambda x: not f(x), everywhere)
 155
 156         author_results = SearchResult.aggregate(author_results)
 157         title_results = SearchResult.aggregate(title_results)
 158
 159         everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 160
 161         for res in [author_results, title_results, text_phrase, everywhere]:
 162             res.sort(reverse=True)
 163             for r in res:
 164                 for h in r.hits:
 165                     h['snippets'] = map(lambda s:
 166                                         re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 167                                                 re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 168
 169         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
 170         print "dym? %s" % repr(suggestion).encode('utf-8')
 171
 172         results = author_results + title_results + text_phrase + everywhere
 173         results.sort(reverse=True)
 174
 175         if len(results) == 1:
 176             fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 177             if len(fragment_hits) == 1:
 178                 anchor = fragment_hits[0]['fragment']
 179                 frag = Fragment.objects.get(anchor=anchor)
 180                 return HttpResponseRedirect(frag.get_absolute_url())
 181             return HttpResponseRedirect(results[0].book.get_absolute_url())
 182         elif len(results) == 0:
 183             form = PublishingSuggestForm(initial={"books": query + ", "})
 184             return render_to_response('catalogue/search_no_hits.html',
 185                                       {'tags': tag_list,
 186                                        'prefix': query,
 187                                        "form": form,
 188                                        'did_you_mean': suggestion},
 189                 context_instance=RequestContext(request))
 190
 191         return render_to_response('catalogue/search_multiple_hits.html',
 192                                   {'tags': tag_list,
 193                                    'prefix': query,
 194                                    'results': { 'author': author_results,
 195                                                 'title': title_results,
 196                                                 'content': text_phrase,
 197                                                 'other': everywhere},
 198                                    'did_you_mean': suggestion},
 199             context_instance=RequestContext(request))