apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from catalogue.fields import dumps
  14 from catalogue.views import JSONResponse
  15 from search import Search, JVM, SearchResult
  16 from lucene import StringReader
  17 from suggest.forms import PublishingSuggestForm
  18 import re
  19 import enchant
  20
  21 dictionary = enchant.Dict('pl_PL')
  22
  23
  24 def match_word_re(word):
  25     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  26         return r"\b%s\b" % word
  27     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  28         return "[[:<:]]%s[[:>:]]" % word
  29
  30
  31 def did_you_mean(query, tokens):
  32     change = {}
  33     for t in tokens:
  34         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  35         if len(authors) > 0:
  36             continue
  37
  38         if not dictionary.check(t):
  39             try:
  40                 change_to = dictionary.suggest(t)[0].lower()
  41                 if change_to != t.lower():
  42                     change[t] = change_to
  43             except IndexError:
  44                 pass
  45
  46     if change == {}:
  47         return None
  48
  49     for frm, to in change.items():
  50         query = query.replace(frm, to)
  51
  52     return query
  53
  54 search = Search()
  55
  56 def hint(request):
  57     prefix = request.GET.get('term', '')
  58     if len(prefix) < 2:
  59         return JSONResponse([])
  60     JVM.attachCurrentThread()
  61
  62     hint = search.hint()
  63     try:
  64         tags = request.GET.get('tags', '')
  65         hint.tags(Tag.get_tag_list(tags))
  66     except:
  67         pass
  68
  69     # tagi beda ograniczac tutaj
  70     # ale tagi moga byc na ksiazce i na fragmentach
  71     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  72     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  73
  74     tags = search.hint_tags(prefix, pdcounter=True)
  75     books = search.hint_books(prefix)
  76
  77     def category_name(c):
  78         if c.startswith('pd_'):
  79             c=c[len('pd_'):]
  80         return _(c)
  81
  82     return JSONResponse(
  83         [{'label': t.name,
  84           'category': category_name(t.category),
  85           'id': t.id,
  86           'url': t.get_absolute_url()}
  87           for t in tags] + \
  88           [{'label': b.title,
  89             'category': _('book'),
  90             'id': b.id,
  91             'url': b.get_absolute_url()}
  92             for b in books])
  93
  94
  95 def main(request):
  96     results = {}
  97     JVM.attachCurrentThread()  # where to put this?
  98
  99     results = None
 100     query = None
 101     fuzzy = False #0.8
 102
 103     if 'q' in request.GET:
 104         # tags = request.GET.get('tags', '')
 105         query = request.GET['q']
 106         # book_id = request.GET.get('book', None)
 107         # book = None
 108         # if book_id is not None:
 109         #     book = get_object_or_404(Book, id=book_id)
 110
 111         # hint = search.hint()
 112         # try:
 113         #     tag_list = Tag.get_tag_list(tags)
 114         # except:
 115         #     tag_list = []
 116
 117         if len(query) < 2:
 118             return render_to_response('catalogue/search_too_short.html', {'prefix': query},
 119                                       context_instance=RequestContext(request))
 120
 121         # hint.tags(tag_list)
 122         # if book:
 123         #     hint.books(book)
 124         tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
 125         tags = split_tags(tags)
 126
 127         toks = StringReader(query)
 128         tokens_cache = {}
 129
 130         author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 131         title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 132
 133         # Boost main author/title results with mixed search, and save some of its results for end of list.
 134         # boost author, title results
 135         author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 136         author_title_rest = []
 137         for b in author_title_mixed:
 138             bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 139             for b2 in bks:
 140                 b2.boost *= 1.1
 141             if bks is []:
 142                 author_title_rest.append(b)
 143
 144         # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 145         # Because the query is using only one field.
 146         text_phrase = SearchResult.aggregate(
 147             search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
 148             search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
 149
 150         everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 151
 152         def already_found(results):
 153             def f(e):
 154                 for r in results:
 155                     if e.book_id == r.book_id:
 156                         e.boost = 0.9
 157                         results.append(e)
 158                         return True
 159                 return False
 160             return f
 161         f = already_found(author_results + title_results + text_phrase)
 162         everywhere = filter(lambda x: not f(x), everywhere)
 163
 164         author_results = SearchResult.aggregate(author_results)
 165         title_results = SearchResult.aggregate(title_results)
 166
 167         everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 168
 169         for res in [author_results, title_results, text_phrase, everywhere]:
 170             res.sort(reverse=True)
 171             for r in res:
 172                 for h in r.hits:
 173                     h['snippets'] = map(lambda s:
 174                                         re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 175                                                 re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 176
 177         suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
 178         print "dym? %s" % repr(suggestion).encode('utf-8')
 179
 180         results = author_results + title_results + text_phrase + everywhere
 181         results.sort(reverse=True)
 182
 183         if len(results) == 1:
 184             fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 185             if len(fragment_hits) == 1:
 186                 #anchor = fragment_hits[0]['fragment']
 187                 #frag = Fragment.objects.get(anchor=anchor)
 188                 return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 189             return HttpResponseRedirect(results[0].book.get_absolute_url())
 190         elif len(results) == 0:
 191             form = PublishingSuggestForm(initial={"books": query + ", "})
 192             return render_to_response('catalogue/search_no_hits.html',
 193                                       {'tags': tags,
 194                                        'prefix': query,
 195                                        "form": form,
 196                                        'did_you_mean': suggestion},
 197                 context_instance=RequestContext(request))
 198
 199         print "TAGS: %s" % tags
 200         return render_to_response('catalogue/search_multiple_hits.html',
 201                                   {'tags': tags,
 202                                    'prefix': query,
 203                                    'results': { 'author': author_results,
 204                                                 'title': title_results,
 205                                                 'content': text_phrase,
 206                                                 'other': everywhere},
 207                                    'did_you_mean': suggestion},
 208             context_instance=RequestContext(request))