apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  14 from catalogue.views import JSONResponse
  15 from search import Search, JVM, SearchResult
  16 from lucene import StringReader
  17 from suggest.forms import PublishingSuggestForm
  18 from time import sleep
  19 import re
  20 import enchant
  21 import json
  22
  23 dictionary = enchant.Dict('pl_PL')
  24
  25
  26 def match_word_re(word):
  27     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  28         return r"\b%s\b" % word
  29     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  30         return "[[:<:]]%s[[:>:]]" % word
  31
  32
  33 def did_you_mean(query, tokens):
  34     change = {}
  35     for t in tokens:
  36         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  37         if len(authors) > 0:
  38             continue
  39
  40         if not dictionary.check(t):
  41             try:
  42                 change_to = dictionary.suggest(t)[0].lower()
  43                 if change_to != t.lower():
  44                     change[t] = change_to
  45             except IndexError:
  46                 pass
  47
  48     if change == {}:
  49         return None
  50
  51     for frm, to in change.items():
  52         query = query.replace(frm, to)
  53
  54     return query
  55
  56
  57 JVM.attachCurrentThread()
  58 _search = None
  59
  60
  61 def get_search():
  62     global _search
  63
  64     while _search is False:
  65         sleep(1)
  66
  67     if _search is None:
  68         _search = False
  69         _search = Search()
  70     return _search
  71
  72
  73 def hint(request):
  74     prefix = request.GET.get('term', '')
  75     if len(prefix) < 2:
  76         return JSONResponse([])
  77     JVM.attachCurrentThread()
  78
  79     search = get_search()
  80     hint = search.hint()
  81     try:
  82         tags = request.GET.get('tags', '')
  83         hint.tags(Tag.get_tag_list(tags))
  84     except:
  85         pass
  86
  87     # tagi beda ograniczac tutaj
  88     # ale tagi moga byc na ksiazce i na fragmentach
  89     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  90     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  91
  92     tags = search.hint_tags(prefix, pdcounter=True)
  93     books = search.hint_books(prefix)
  94
  95
  96     def is_dupe(tag):
  97         if isinstance(tag, PDCounterAuthor):
  98             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  99                 return True
 100         elif isinstance(tag, PDCounterBook):
 101             if filter(lambda b: b.slug == tag.slug, tags):
 102                 return True
 103         return False
 104
 105     tags = filter(lambda t: not is_dupe(t), tags)
 106
 107     def category_name(c):
 108         if c.startswith('pd_'):
 109             c = c[len('pd_'):]
 110         return _(c)
 111
 112     callback = request.GET.get('callback', None)
 113     data = [{'label': t.name,
 114               'category': category_name(t.category),
 115               'id': t.id,
 116               'url': t.get_absolute_url()}
 117               for t in tags] + \
 118               [{'label': b.title,
 119                 'category': _('book'),
 120                 'id': b.id,
 121                 'url': b.get_absolute_url()}
 122                 for b in books]
 123     if callback:
 124         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 125                             content_type="application/json; charset=utf-8")
 126     else:
 127         return JSONResponse(data)
 128
 129
 130
 131 def main(request):
 132     results = {}
 133     JVM.attachCurrentThread()  # where to put this?
 134
 135     results = None
 136     query = None
 137     fuzzy = False #0.8
 138
 139     query = request.GET.get('q','')
 140     # book_id = request.GET.get('book', None)
 141     # book = None
 142     # if book_id is not None:
 143     #     book = get_object_or_404(Book, id=book_id)
 144
 145     # hint = search.hint()
 146     # try:
 147     #     tag_list = Tag.get_tag_list(tags)
 148     # except:
 149     #     tag_list = []
 150
 151     if len(query) < 2:
 152         return render_to_response('catalogue/search_too_short.html', {'prefix': query},
 153                                   context_instance=RequestContext(request))
 154
 155     search = get_search()
 156     # hint.tags(tag_list)
 157     # if book:
 158     #     hint.books(book)
 159     tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
 160     tags = split_tags(tags)
 161
 162     toks = StringReader(query)
 163     tokens_cache = {}
 164
 165     author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 166     title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 167
 168     # Boost main author/title results with mixed search, and save some of its results for end of list.
 169     # boost author, title results
 170     author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 171     author_title_rest = []
 172     for b in author_title_mixed:
 173         bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 174         for b2 in bks:
 175             b2.boost *= 1.1
 176         if bks is []:
 177             author_title_rest.append(b)
 178
 179     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 180     # Because the query is using only one field.
 181     text_phrase = SearchResult.aggregate(
 182         search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
 183         search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
 184
 185     everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 186
 187     def already_found(results):
 188         def f(e):
 189             for r in results:
 190                 if e.book_id == r.book_id:
 191                     e.boost = 0.9
 192                     results.append(e)
 193                     return True
 194             return False
 195         return f
 196     f = already_found(author_results + title_results + text_phrase)
 197     everywhere = filter(lambda x: not f(x), everywhere)
 198
 199     author_results = SearchResult.aggregate(author_results)
 200     title_results = SearchResult.aggregate(title_results)
 201
 202     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 203
 204     for res in [author_results, title_results, text_phrase, everywhere]:
 205         res.sort(reverse=True)
 206         for r in res:
 207             for h in r.hits:
 208                 h['snippets'] = map(lambda s:
 209                                     re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 210                                             re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 211
 212     suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
 213
 214     def ensure_exists(r):
 215         try:
 216             return r.book
 217         except Book.DoesNotExist:
 218             return False
 219
 220     author_results = filter(ensure_exists, author_results)
 221     title_results = filter(ensure_exists, title_results)
 222     text_phrase = filter(ensure_exists, text_phrase)
 223     everywhere = filter(ensure_exists, everywhere)
 224
 225     results = author_results + title_results + text_phrase + everywhere
 226     # ensure books do exists & sort them
 227     results.sort(reverse=True)
 228
 229     if len(results) == 1:
 230         fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 231         if len(fragment_hits) == 1:
 232             #anchor = fragment_hits[0]['fragment']
 233             #frag = Fragment.objects.get(anchor=anchor)
 234             return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 235         return HttpResponseRedirect(results[0].book.get_absolute_url())
 236     elif len(results) == 0:
 237         form = PublishingSuggestForm(initial={"books": query + ", "})
 238         return render_to_response('catalogue/search_no_hits.html',
 239                                   {'tags': tags,
 240                                    'prefix': query,
 241                                    "form": form,
 242                                    'did_you_mean': suggestion},
 243             context_instance=RequestContext(request))
 244
 245     return render_to_response('catalogue/search_multiple_hits.html',
 246                               {'tags': tags,
 247                                'prefix': query,
 248                                'results': { 'author': author_results,
 249                                             'title': title_results,
 250                                             'content': text_phrase,
 251                                             'other': everywhere},
 252                                'did_you_mean': suggestion},
 253         context_instance=RequestContext(request))