apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from catalogue.views import JSONResponse
  14 from search import Search, JVM, SearchResult
  15 from lucene import StringReader
  16 from suggest.forms import PublishingSuggestForm
  17 import re
  18 import enchant
  19
  20 dictionary = enchant.Dict('pl_PL')
  21
  22
  23 def match_word_re(word):
  24     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  25         return r"\b%s\b" % word
  26     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  27         return "[[:<:]]%s[[:>:]]" % word
  28
  29
  30 def did_you_mean(query, tokens):
  31     change = {}
  32     for t in tokens:
  33         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  34         if len(authors) > 0:
  35             continue
  36
  37         if not dictionary.check(t):
  38             try:
  39                 change_to = dictionary.suggest(t)[0].lower()
  40                 if change_to != t.lower():
  41                     change[t] = change_to
  42             except IndexError:
  43                 pass
  44
  45     if change == {}:
  46         return None
  47
  48     for frm, to in change.items():
  49         query = query.replace(frm, to)
  50
  51     return query
  52
  53 JVM.attachCurrentThread()
  54 search = Search()
  55
  56
  57 def hint(request):
  58     prefix = request.GET.get('term', '')
  59     if len(prefix) < 2:
  60         return JSONResponse([])
  61     JVM.attachCurrentThread()
  62
  63     hint = search.hint()
  64     try:
  65         tags = request.GET.get('tags', '')
  66         hint.tags(Tag.get_tag_list(tags))
  67     except:
  68         pass
  69
  70     # tagi beda ograniczac tutaj
  71     # ale tagi moga byc na ksiazce i na fragmentach
  72     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  73     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  74
  75     tags = search.hint_tags(prefix, pdcounter=True)
  76     books = search.hint_books(prefix)
  77
  78     def category_name(c):
  79         if c.startswith('pd_'):
  80             c = c[len('pd_'):]
  81         return _(c)
  82
  83     return JSONResponse(
  84         [{'label': t.name,
  85           'category': category_name(t.category),
  86           'id': t.id,
  87           'url': t.get_absolute_url()}
  88           for t in tags] + \
  89           [{'label': b.title,
  90             'category': _('book'),
  91             'id': b.id,
  92             'url': b.get_absolute_url()}
  93             for b in books])
  94
  95
  96 def main(request):
  97     results = {}
  98     JVM.attachCurrentThread()  # where to put this?
  99
 100     results = None
 101     query = None
 102     fuzzy = False #0.8
 103
 104     query = request.GET.get('q','')
 105     # book_id = request.GET.get('book', None)
 106     # book = None
 107     # if book_id is not None:
 108     #     book = get_object_or_404(Book, id=book_id)
 109
 110     # hint = search.hint()
 111     # try:
 112     #     tag_list = Tag.get_tag_list(tags)
 113     # except:
 114     #     tag_list = []
 115
 116     if len(query) < 2:
 117         return render_to_response('catalogue/search_too_short.html', {'prefix': query},
 118                                   context_instance=RequestContext(request))
 119
 120     # hint.tags(tag_list)
 121     # if book:
 122     #     hint.books(book)
 123     tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
 124     tags = split_tags(tags)
 125
 126     toks = StringReader(query)
 127     tokens_cache = {}
 128
 129     author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 130     title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 131
 132     # Boost main author/title results with mixed search, and save some of its results for end of list.
 133     # boost author, title results
 134     author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 135     author_title_rest = []
 136     for b in author_title_mixed:
 137         bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 138         for b2 in bks:
 139             b2.boost *= 1.1
 140         if bks is []:
 141             author_title_rest.append(b)
 142
 143     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 144     # Because the query is using only one field.
 145     text_phrase = SearchResult.aggregate(
 146         search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
 147         search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
 148
 149     everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 150
 151     def already_found(results):
 152         def f(e):
 153             for r in results:
 154                 if e.book_id == r.book_id:
 155                     e.boost = 0.9
 156                     results.append(e)
 157                     return True
 158             return False
 159         return f
 160     f = already_found(author_results + title_results + text_phrase)
 161     everywhere = filter(lambda x: not f(x), everywhere)
 162
 163     author_results = SearchResult.aggregate(author_results)
 164     title_results = SearchResult.aggregate(title_results)
 165
 166     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 167
 168     for res in [author_results, title_results, text_phrase, everywhere]:
 169         res.sort(reverse=True)
 170         for r in res:
 171             for h in r.hits:
 172                 h['snippets'] = map(lambda s:
 173                                     re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 174                                             re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 175
 176     suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
 177
 178     def ensure_exists(r):
 179         try:
 180             return r.book
 181         except Book.DoesNotExist:
 182             return False
 183
 184     author_results = filter(ensure_exists, author_results)
 185     title_results = filter(ensure_exists, title_results)
 186     text_phrase = filter(ensure_exists, text_phrase)
 187     everywhere = filter(ensure_exists, everywhere)
 188
 189     results = author_results + title_results + text_phrase + everywhere
 190     # ensure books do exists & sort them
 191     results.sort(reverse=True)
 192
 193     if len(results) == 1:
 194         fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 195         if len(fragment_hits) == 1:
 196             #anchor = fragment_hits[0]['fragment']
 197             #frag = Fragment.objects.get(anchor=anchor)
 198             return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 199         return HttpResponseRedirect(results[0].book.get_absolute_url())
 200     elif len(results) == 0:
 201         form = PublishingSuggestForm(initial={"books": query + ", "})
 202         return render_to_response('catalogue/search_no_hits.html',
 203                                   {'tags': tags,
 204                                    'prefix': query,
 205                                    "form": form,
 206                                    'did_you_mean': suggestion},
 207             context_instance=RequestContext(request))
 208
 209     return render_to_response('catalogue/search_multiple_hits.html',
 210                               {'tags': tags,
 211                                'prefix': query,
 212                                'results': { 'author': author_results,
 213                                             'title': title_results,
 214                                             'content': text_phrase,
 215                                             'other': everywhere},
 216                                'did_you_mean': suggestion},
 217         context_instance=RequestContext(request))