apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  14 from catalogue.views import JSONResponse
  15 from search import Search, JVM, SearchResult
  16 from lucene import StringReader
  17 from suggest.forms import PublishingSuggestForm
  18 from time import sleep
  19 import re
  20 import enchant
  21
  22 dictionary = enchant.Dict('pl_PL')
  23
  24
  25 def match_word_re(word):
  26     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  27         return r"\b%s\b" % word
  28     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  29         return "[[:<:]]%s[[:>:]]" % word
  30
  31
  32 def did_you_mean(query, tokens):
  33     change = {}
  34     for t in tokens:
  35         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  36         if len(authors) > 0:
  37             continue
  38
  39         if not dictionary.check(t):
  40             try:
  41                 change_to = dictionary.suggest(t)[0].lower()
  42                 if change_to != t.lower():
  43                     change[t] = change_to
  44             except IndexError:
  45                 pass
  46
  47     if change == {}:
  48         return None
  49
  50     for frm, to in change.items():
  51         query = query.replace(frm, to)
  52
  53     return query
  54
  55
  56 JVM.attachCurrentThread()
  57 _search = None
  58
  59
  60 def get_search():
  61     global _search
  62
  63     while _search is False:
  64         sleep(1)
  65
  66     if _search is None:
  67         _search = False
  68         _search = Search()
  69     return _search
  70
  71
  72 def hint(request):
  73     prefix = request.GET.get('term', '')
  74     if len(prefix) < 2:
  75         return JSONResponse([])
  76     JVM.attachCurrentThread()
  77
  78     search = get_search()
  79     hint = search.hint()
  80     try:
  81         tags = request.GET.get('tags', '')
  82         hint.tags(Tag.get_tag_list(tags))
  83     except:
  84         pass
  85
  86     # tagi beda ograniczac tutaj
  87     # ale tagi moga byc na ksiazce i na fragmentach
  88     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  89     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  90
  91     tags = search.hint_tags(prefix, pdcounter=True)
  92     books = search.hint_books(prefix)
  93
  94
  95     def is_dupe(tag):
  96         if isinstance(tag, PDCounterAuthor):
  97             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  98                 return True
  99         elif isinstance(tag, PDCounterBook):
 100             if filter(lambda b: b.slug == tag.slug, tags):
 101                 return True
 102         return False
 103
 104     tags = filter(lambda t: not is_dupe(t), tags)
 105
 106     def category_name(c):
 107         if c.startswith('pd_'):
 108             c = c[len('pd_'):]
 109         return _(c)
 110
 111     return JSONResponse(
 112         [{'label': t.name,
 113           'category': category_name(t.category),
 114           'id': t.id,
 115           'url': t.get_absolute_url()}
 116           for t in tags] + \
 117           [{'label': b.title,
 118             'category': _('book'),
 119             'id': b.id,
 120             'url': b.get_absolute_url()}
 121             for b in books])
 122
 123
 124 def main(request):
 125     results = {}
 126     JVM.attachCurrentThread()  # where to put this?
 127
 128     results = None
 129     query = None
 130     fuzzy = False #0.8
 131
 132     query = request.GET.get('q','')
 133     # book_id = request.GET.get('book', None)
 134     # book = None
 135     # if book_id is not None:
 136     #     book = get_object_or_404(Book, id=book_id)
 137
 138     # hint = search.hint()
 139     # try:
 140     #     tag_list = Tag.get_tag_list(tags)
 141     # except:
 142     #     tag_list = []
 143
 144     if len(query) < 2:
 145         return render_to_response('catalogue/search_too_short.html', {'prefix': query},
 146                                   context_instance=RequestContext(request))
 147
 148     search = get_search()
 149     # hint.tags(tag_list)
 150     # if book:
 151     #     hint.books(book)
 152     tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
 153     tags = split_tags(tags)
 154
 155     toks = StringReader(query)
 156     tokens_cache = {}
 157
 158     author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 159     title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 160
 161     # Boost main author/title results with mixed search, and save some of its results for end of list.
 162     # boost author, title results
 163     author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 164     author_title_rest = []
 165     for b in author_title_mixed:
 166         bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 167         for b2 in bks:
 168             b2.boost *= 1.1
 169         if bks is []:
 170             author_title_rest.append(b)
 171
 172     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 173     # Because the query is using only one field.
 174     text_phrase = SearchResult.aggregate(
 175         search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
 176         search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
 177
 178     everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 179
 180     def already_found(results):
 181         def f(e):
 182             for r in results:
 183                 if e.book_id == r.book_id:
 184                     e.boost = 0.9
 185                     results.append(e)
 186                     return True
 187             return False
 188         return f
 189     f = already_found(author_results + title_results + text_phrase)
 190     everywhere = filter(lambda x: not f(x), everywhere)
 191
 192     author_results = SearchResult.aggregate(author_results)
 193     title_results = SearchResult.aggregate(title_results)
 194
 195     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 196
 197     for res in [author_results, title_results, text_phrase, everywhere]:
 198         res.sort(reverse=True)
 199         for r in res:
 200             for h in r.hits:
 201                 h['snippets'] = map(lambda s:
 202                                     re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 203                                             re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 204
 205     suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
 206
 207     def ensure_exists(r):
 208         try:
 209             return r.book
 210         except Book.DoesNotExist:
 211             return False
 212
 213     author_results = filter(ensure_exists, author_results)
 214     title_results = filter(ensure_exists, title_results)
 215     text_phrase = filter(ensure_exists, text_phrase)
 216     everywhere = filter(ensure_exists, everywhere)
 217
 218     results = author_results + title_results + text_phrase + everywhere
 219     # ensure books do exists & sort them
 220     results.sort(reverse=True)
 221
 222     if len(results) == 1:
 223         fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 224         if len(fragment_hits) == 1:
 225             #anchor = fragment_hits[0]['fragment']
 226             #frag = Fragment.objects.get(anchor=anchor)
 227             return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 228         return HttpResponseRedirect(results[0].book.get_absolute_url())
 229     elif len(results) == 0:
 230         form = PublishingSuggestForm(initial={"books": query + ", "})
 231         return render_to_response('catalogue/search_no_hits.html',
 232                                   {'tags': tags,
 233                                    'prefix': query,
 234                                    "form": form,
 235                                    'did_you_mean': suggestion},
 236             context_instance=RequestContext(request))
 237
 238     return render_to_response('catalogue/search_multiple_hits.html',
 239                               {'tags': tags,
 240                                'prefix': query,
 241                                'results': { 'author': author_results,
 242                                             'title': title_results,
 243                                             'content': text_phrase,
 244                                             'other': everywhere},
 245                                'did_you_mean': suggestion},
 246         context_instance=RequestContext(request))