apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from catalogue.fields import dumps
  14 from catalogue.views import JSONResponse
  15 from search import Search, JVM, SearchResult
  16 from lucene import StringReader
  17 from suggest.forms import PublishingSuggestForm
  18 import re
  19 import enchant
  20
  21 dictionary = enchant.Dict('pl_PL')
  22
  23
  24 def match_word_re(word):
  25     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  26         return r"\b%s\b" % word
  27     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  28         return "[[:<:]]%s[[:>:]]" % word
  29
  30
  31 def did_you_mean(query, tokens):
  32     change = {}
  33     for t in tokens:
  34         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  35         if len(authors) > 0:
  36             continue
  37
  38         if not dictionary.check(t):
  39             try:
  40                 change[t] = dictionary.suggest(t)[0]
  41             except IndexError:
  42                 pass
  43
  44     if change == {}:
  45         return None
  46
  47     for frm, to in change.items():
  48         query = query.replace(frm, to)
  49
  50     return query
  51
  52
  53 def hint(request):
  54     prefix = request.GET.get('term', '')
  55     if len(prefix) < 2:
  56         return JSONResponse([])
  57     JVM.attachCurrentThread()
  58     s = Search()
  59
  60     hint = s.hint()
  61     try:
  62         tags = request.GET.get('tags', '')
  63         hint.tags(Tag.get_tag_list(tags))
  64     except:
  65         pass
  66
  67     # tagi beda ograniczac tutaj
  68     # ale tagi moga byc na ksiazce i na fragmentach
  69     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  70     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  71
  72     tags = s.hint_tags(prefix, pdcounter=True)
  73     books = s.hint_books(prefix)
  74
  75     # TODO DODAC TU HINTY
  76
  77     return JSONResponse(
  78         [{'label': t.name,
  79           'category': _(t.category),
  80           'id': t.id,
  81           'url': t.get_absolute_url()}
  82           for t in tags] + \
  83           [{'label': b.title,
  84             'category': _('book'),
  85             'id': b.id,
  86             'url': b.get_absolute_url()}
  87             for b in books])
  88
  89
  90 def main(request):
  91     results = {}
  92     JVM.attachCurrentThread()  # where to put this?
  93     srch = Search()
  94
  95     results = None
  96     query = None
  97     fuzzy = False
  98
  99     if 'q' in request.GET:
 100         # tags = request.GET.get('tags', '')
 101         query = request.GET['q']
 102         # book_id = request.GET.get('book', None)
 103         # book = None
 104         # if book_id is not None:
 105         #     book = get_object_or_404(Book, id=book_id)
 106
 107         # hint = srch.hint()
 108         # try:
 109         #     tag_list = Tag.get_tag_list(tags)
 110         # except:
 111         #     tag_list = []
 112
 113         if len(query) < 2:
 114             return render_to_response('catalogue/search_too_short.html', {'prefix': query},
 115                                       context_instance=RequestContext(request))
 116
 117         # hint.tags(tag_list)
 118         # if book:
 119         #     hint.books(book)
 120         tags = srch.hint_tags(query, pdcounter=True, prefix=False)
 121         tags = split_tags(tags)
 122
 123         toks = StringReader(query)
 124         tokens_cache = {}
 125         fuzzy = 'fuzzy' in request.GET
 126         if fuzzy:
 127             fuzzy = 0.7
 128
 129         author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 130         title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 131
 132         # Boost main author/title results with mixed search, and save some of its results for end of list.
 133         # boost author, title results
 134         author_title_mixed = srch.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 135         author_title_rest = []
 136         for b in author_title_mixed:
 137             bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 138             for b2 in bks:
 139                 b2.boost *= 1.1
 140             if bks is []:
 141                 author_title_rest.append(b)
 142
 143         # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 144         # Because the query is using only one field.
 145         text_phrase = SearchResult.aggregate(
 146             srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
 147             srch.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
 148
 149         everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 150
 151         def already_found(results):
 152             def f(e):
 153                 for r in results:
 154                     if e.book_id == r.book_id:
 155                         e.boost = 0.9
 156                         results.append(e)
 157                         return True
 158                 return False
 159             return f
 160         f = already_found(author_results + title_results + text_phrase)
 161         everywhere = filter(lambda x: not f(x), everywhere)
 162
 163         author_results = SearchResult.aggregate(author_results)
 164         title_results = SearchResult.aggregate(title_results)
 165
 166         everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 167
 168         for res in [author_results, title_results, text_phrase, everywhere]:
 169             res.sort(reverse=True)
 170             for r in res:
 171                 for h in r.hits:
 172                     h['snippets'] = map(lambda s:
 173                                         re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 174                                                 re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 175
 176         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
 177         print "dym? %s" % repr(suggestion).encode('utf-8')
 178
 179         results = author_results + title_results + text_phrase + everywhere
 180         results.sort(reverse=True)
 181
 182         if len(results) == 1:
 183             fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 184             if len(fragment_hits) == 1:
 185                 anchor = fragment_hits[0]['fragment']
 186                 frag = Fragment.objects.get(anchor=anchor)
 187                 return HttpResponseRedirect(frag.get_absolute_url())
 188             return HttpResponseRedirect(results[0].book.get_absolute_url())
 189         elif len(results) == 0:
 190             form = PublishingSuggestForm(initial={"books": query + ", "})
 191             return render_to_response('catalogue/search_no_hits.html',
 192                                       {'tags': tags,
 193                                        'prefix': query,
 194                                        "form": form,
 195                                        'did_you_mean': suggestion},
 196                 context_instance=RequestContext(request))
 197
 198         print "TAGS: %s" % tags
 199         return render_to_response('catalogue/search_multiple_hits.html',
 200                                   {'tags': tags,
 201                                    'prefix': query,
 202                                    'results': { 'author': author_results,
 203                                                 'title': title_results,
 204                                                 'content': text_phrase,
 205                                                 'other': everywhere},
 206                                    'did_you_mean': suggestion},
 207             context_instance=RequestContext(request))