apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from catalogue.fields import dumps
  14 from catalogue.views import JSONResponse
  15 from search import Search, JVM, SearchResult
  16 from lucene import StringReader
  17 from suggest.forms import PublishingSuggestForm
  18 import re
  19 import enchant
  20
  21 dictionary = enchant.Dict('pl_PL')
  22
  23
  24 def match_word_re(word):
  25     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  26         return r"\b%s\b" % word
  27     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  28         return "[[:<:]]%s[[:>:]]" % word
  29
  30
  31 def did_you_mean(query, tokens):
  32     change = {}
  33     for t in tokens:
  34         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  35         if len(authors) > 0:
  36             continue
  37
  38         if not dictionary.check(t):
  39             try:
  40                 change_to = dictionary.suggest(t)[0].lower()
  41                 if change_to != t.lower():
  42                     change[t] = change_to
  43             except IndexError:
  44                 pass
  45
  46     if change == {}:
  47         return None
  48
  49     for frm, to in change.items():
  50         query = query.replace(frm, to)
  51
  52     return query
  53
  54 JVM.attachCurrentThread()
  55 search = Search()
  56
  57
  58 def hint(request):
  59     prefix = request.GET.get('term', '')
  60     if len(prefix) < 2:
  61         return JSONResponse([])
  62     JVM.attachCurrentThread()
  63
  64     hint = search.hint()
  65     try:
  66         tags = request.GET.get('tags', '')
  67         hint.tags(Tag.get_tag_list(tags))
  68     except:
  69         pass
  70
  71     # tagi beda ograniczac tutaj
  72     # ale tagi moga byc na ksiazce i na fragmentach
  73     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  74     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  75
  76     tags = search.hint_tags(prefix, pdcounter=True)
  77     books = search.hint_books(prefix)
  78
  79     def category_name(c):
  80         if c.startswith('pd_'):
  81             c = c[len('pd_'):]
  82         return _(c)
  83
  84     return JSONResponse(
  85         [{'label': t.name,
  86           'category': category_name(t.category),
  87           'id': t.id,
  88           'url': t.get_absolute_url()}
  89           for t in tags] + \
  90           [{'label': b.title,
  91             'category': _('book'),
  92             'id': b.id,
  93             'url': b.get_absolute_url()}
  94             for b in books])
  95
  96
  97 def main(request):
  98     results = {}
  99     JVM.attachCurrentThread()  # where to put this?
 100
 101     results = None
 102     query = None
 103     fuzzy = False #0.8
 104
 105     query = request.GET.get('q','')
 106     # book_id = request.GET.get('book', None)
 107     # book = None
 108     # if book_id is not None:
 109     #     book = get_object_or_404(Book, id=book_id)
 110
 111     # hint = search.hint()
 112     # try:
 113     #     tag_list = Tag.get_tag_list(tags)
 114     # except:
 115     #     tag_list = []
 116
 117     if len(query) < 2:
 118         return render_to_response('catalogue/search_too_short.html', {'prefix': query},
 119                                   context_instance=RequestContext(request))
 120
 121     # hint.tags(tag_list)
 122     # if book:
 123     #     hint.books(book)
 124     tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
 125     tags = split_tags(tags)
 126
 127     toks = StringReader(query)
 128     tokens_cache = {}
 129
 130     author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 131     title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 132
 133     # Boost main author/title results with mixed search, and save some of its results for end of list.
 134     # boost author, title results
 135     author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 136     author_title_rest = []
 137     for b in author_title_mixed:
 138         bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 139         for b2 in bks:
 140             b2.boost *= 1.1
 141         if bks is []:
 142             author_title_rest.append(b)
 143
 144     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 145     # Because the query is using only one field.
 146     text_phrase = SearchResult.aggregate(
 147         search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
 148         search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
 149
 150     everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 151
 152     def already_found(results):
 153         def f(e):
 154             for r in results:
 155                 if e.book_id == r.book_id:
 156                     e.boost = 0.9
 157                     results.append(e)
 158                     return True
 159             return False
 160         return f
 161     f = already_found(author_results + title_results + text_phrase)
 162     everywhere = filter(lambda x: not f(x), everywhere)
 163
 164     author_results = SearchResult.aggregate(author_results)
 165     title_results = SearchResult.aggregate(title_results)
 166
 167     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 168
 169     for res in [author_results, title_results, text_phrase, everywhere]:
 170         res.sort(reverse=True)
 171         for r in res:
 172             for h in r.hits:
 173                 h['snippets'] = map(lambda s:
 174                                     re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 175                                             re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 176
 177     suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
 178
 179     def ensure_exists(r):
 180         try:
 181             return r.book
 182         except Book.DoesNotExist:
 183             return False
 184
 185     author_results = filter(ensure_exists, author_results)
 186     title_results = filter(ensure_exists, title_results)
 187     text_phrase = filter(ensure_exists, text_phrase)
 188     everywhere = filter(ensure_exists, everywhere)
 189
 190     results = author_results + title_results + text_phrase + everywhere
 191     # ensure books do exists & sort them
 192     results.sort(reverse=True)
 193
 194     if len(results) == 1:
 195         fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 196         if len(fragment_hits) == 1:
 197             #anchor = fragment_hits[0]['fragment']
 198             #frag = Fragment.objects.get(anchor=anchor)
 199             return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 200         return HttpResponseRedirect(results[0].book.get_absolute_url())
 201     elif len(results) == 0:
 202         form = PublishingSuggestForm(initial={"books": query + ", "})
 203         return render_to_response('catalogue/search_no_hits.html',
 204                                   {'tags': tags,
 205                                    'prefix': query,
 206                                    "form": form,
 207                                    'did_you_mean': suggestion},
 208             context_instance=RequestContext(request))
 209
 210     return render_to_response('catalogue/search_multiple_hits.html',
 211                               {'tags': tags,
 212                                'prefix': query,
 213                                'results': { 'author': author_results,
 214                                             'title': title_results,
 215                                             'content': text_phrase,
 216                                             'other': everywhere},
 217                                'did_you_mean': suggestion},
 218         context_instance=RequestContext(request))