apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from catalogue.fields import dumps
  14 from catalogue.views import JSONResponse
  15 from search import Search, JVM, SearchResult
  16 from lucene import StringReader
  17 from suggest.forms import PublishingSuggestForm
  18 import re
  19 import enchant
  20
  21 dictionary = enchant.Dict('pl_PL')
  22
  23
  24 def match_word_re(word):
  25     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  26         return r"\b%s\b" % word
  27     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  28         return "[[:<:]]%s[[:>:]]" % word
  29
  30
  31 def did_you_mean(query, tokens):
  32     change = {}
  33     for t in tokens:
  34         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  35         if len(authors) > 0:
  36             continue
  37
  38         if not dictionary.check(t):
  39             try:
  40                 change_to = dictionary.suggest(t)[0].lower()
  41                 if change_to != t.lower():
  42                     change[t] = change_to
  43             except IndexError:
  44                 pass
  45
  46     if change == {}:
  47         return None
  48
  49     for frm, to in change.items():
  50         query = query.replace(frm, to)
  51
  52     return query
  53
  54
  55 def hint(request):
  56     prefix = request.GET.get('term', '')
  57     if len(prefix) < 2:
  58         return JSONResponse([])
  59     JVM.attachCurrentThread()
  60     s = Search()
  61
  62     hint = s.hint()
  63     try:
  64         tags = request.GET.get('tags', '')
  65         hint.tags(Tag.get_tag_list(tags))
  66     except:
  67         pass
  68
  69     # tagi beda ograniczac tutaj
  70     # ale tagi moga byc na ksiazce i na fragmentach
  71     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  72     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  73
  74     tags = s.hint_tags(prefix, pdcounter=True)
  75     books = s.hint_books(prefix)
  76
  77     # TODO DODAC TU HINTY
  78
  79     return JSONResponse(
  80         [{'label': t.name,
  81           'category': _(t.category),
  82           'id': t.id,
  83           'url': t.get_absolute_url()}
  84           for t in tags] + \
  85           [{'label': b.title,
  86             'category': _('book'),
  87             'id': b.id,
  88             'url': b.get_absolute_url()}
  89             for b in books])
  90
  91
  92 def main(request):
  93     results = {}
  94     JVM.attachCurrentThread()  # where to put this?
  95     srch = Search()
  96
  97     results = None
  98     query = None
  99     fuzzy = False #0.8
 100
 101     if 'q' in request.GET:
 102         # tags = request.GET.get('tags', '')
 103         query = request.GET['q']
 104         # book_id = request.GET.get('book', None)
 105         # book = None
 106         # if book_id is not None:
 107         #     book = get_object_or_404(Book, id=book_id)
 108
 109         # hint = srch.hint()
 110         # try:
 111         #     tag_list = Tag.get_tag_list(tags)
 112         # except:
 113         #     tag_list = []
 114
 115         if len(query) < 2:
 116             return render_to_response('catalogue/search_too_short.html', {'prefix': query},
 117                                       context_instance=RequestContext(request))
 118
 119         # hint.tags(tag_list)
 120         # if book:
 121         #     hint.books(book)
 122         tags = srch.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
 123         tags = split_tags(tags)
 124
 125         toks = StringReader(query)
 126         tokens_cache = {}
 127
 128         author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 129         title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 130
 131         # Boost main author/title results with mixed search, and save some of its results for end of list.
 132         # boost author, title results
 133         author_title_mixed = srch.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 134         author_title_rest = []
 135         for b in author_title_mixed:
 136             bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 137             for b2 in bks:
 138                 b2.boost *= 1.1
 139             if bks is []:
 140                 author_title_rest.append(b)
 141
 142         # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 143         # Because the query is using only one field.
 144         text_phrase = SearchResult.aggregate(
 145             srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
 146             srch.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
 147
 148         everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 149
 150         def already_found(results):
 151             def f(e):
 152                 for r in results:
 153                     if e.book_id == r.book_id:
 154                         e.boost = 0.9
 155                         results.append(e)
 156                         return True
 157                 return False
 158             return f
 159         f = already_found(author_results + title_results + text_phrase)
 160         everywhere = filter(lambda x: not f(x), everywhere)
 161
 162         author_results = SearchResult.aggregate(author_results)
 163         title_results = SearchResult.aggregate(title_results)
 164
 165         everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 166
 167         for res in [author_results, title_results, text_phrase, everywhere]:
 168             res.sort(reverse=True)
 169             for r in res:
 170                 for h in r.hits:
 171                     h['snippets'] = map(lambda s:
 172                                         re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 173                                                 re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 174
 175         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
 176         print "dym? %s" % repr(suggestion).encode('utf-8')
 177
 178         results = author_results + title_results + text_phrase + everywhere
 179         results.sort(reverse=True)
 180
 181         if len(results) == 1:
 182             fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 183             if len(fragment_hits) == 1:
 184                 #anchor = fragment_hits[0]['fragment']
 185                 #frag = Fragment.objects.get(anchor=anchor)
 186                 return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 187             return HttpResponseRedirect(results[0].book.get_absolute_url())
 188         elif len(results) == 0:
 189             form = PublishingSuggestForm(initial={"books": query + ", "})
 190             return render_to_response('catalogue/search_no_hits.html',
 191                                       {'tags': tags,
 192                                        'prefix': query,
 193                                        "form": form,
 194                                        'did_you_mean': suggestion},
 195                 context_instance=RequestContext(request))
 196
 197         print "TAGS: %s" % tags
 198         return render_to_response('catalogue/search_multiple_hits.html',
 199                                   {'tags': tags,
 200                                    'prefix': query,
 201                                    'results': { 'author': author_results,
 202                                                 'title': title_results,
 203                                                 'content': text_phrase,
 204                                                 'other': everywhere},
 205                                    'did_you_mean': suggestion},
 206             context_instance=RequestContext(request))