apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from catalogue.fields import dumps
  14 from catalogue.views import JSONResponse
  15 from search import Search, JVM, SearchResult
  16 from lucene import StringReader
  17 from suggest.forms import PublishingSuggestForm
  18 import re
  19 import enchant
  20
  21 dictionary = enchant.Dict('pl_PL')
  22
  23
  24 def match_word_re(word):
  25     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  26         return r"\b%s\b" % word
  27     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  28         return "[[:<:]]%s[[:>:]]" % word
  29
  30
  31 def did_you_mean(query, tokens):
  32     change = {}
  33     for t in tokens:
  34         authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  35         if len(authors) > 0:
  36             continue
  37
  38         if not dictionary.check(t):
  39             try:
  40                 change_to = dictionary.suggest(t)[0].lower()
  41                 if change_to != t.lower():
  42                     change[t] = change_to
  43             except IndexError:
  44                 pass
  45
  46     if change == {}:
  47         return None
  48
  49     for frm, to in change.items():
  50         query = query.replace(frm, to)
  51
  52     return query
  53
  54 JVM.attachCurrentThread()
  55 search = Search()
  56
  57 def hint(request):
  58     prefix = request.GET.get('term', '')
  59     if len(prefix) < 2:
  60         return JSONResponse([])
  61     JVM.attachCurrentThread()
  62
  63     hint = search.hint()
  64     try:
  65         tags = request.GET.get('tags', '')
  66         hint.tags(Tag.get_tag_list(tags))
  67     except:
  68         pass
  69
  70     # tagi beda ograniczac tutaj
  71     # ale tagi moga byc na ksiazce i na fragmentach
  72     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  73     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  74
  75     tags = search.hint_tags(prefix, pdcounter=True)
  76     books = search.hint_books(prefix)
  77
  78     def category_name(c):
  79         if c.startswith('pd_'):
  80             c=c[len('pd_'):]
  81         return _(c)
  82
  83     return JSONResponse(
  84         [{'label': t.name,
  85           'category': category_name(t.category),
  86           'id': t.id,
  87           'url': t.get_absolute_url()}
  88           for t in tags] + \
  89           [{'label': b.title,
  90             'category': _('book'),
  91             'id': b.id,
  92             'url': b.get_absolute_url()}
  93             for b in books])
  94
  95
  96 def main(request):
  97     results = {}
  98     JVM.attachCurrentThread()  # where to put this?
  99
 100     results = None
 101     query = None
 102     fuzzy = False #0.8
 103
 104     if 'q' in request.GET:
 105         # tags = request.GET.get('tags', '')
 106         query = request.GET['q']
 107         # book_id = request.GET.get('book', None)
 108         # book = None
 109         # if book_id is not None:
 110         #     book = get_object_or_404(Book, id=book_id)
 111
 112         # hint = search.hint()
 113         # try:
 114         #     tag_list = Tag.get_tag_list(tags)
 115         # except:
 116         #     tag_list = []
 117
 118         if len(query) < 2:
 119             return render_to_response('catalogue/search_too_short.html', {'prefix': query},
 120                                       context_instance=RequestContext(request))
 121
 122         # hint.tags(tag_list)
 123         # if book:
 124         #     hint.books(book)
 125         tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
 126         tags = split_tags(tags)
 127
 128         toks = StringReader(query)
 129         tokens_cache = {}
 130
 131         author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
 132         title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
 133
 134         # Boost main author/title results with mixed search, and save some of its results for end of list.
 135         # boost author, title results
 136         author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
 137         author_title_rest = []
 138         for b in author_title_mixed:
 139             bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 140             for b2 in bks:
 141                 b2.boost *= 1.1
 142             if bks is []:
 143                 author_title_rest.append(b)
 144
 145         # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 146         # Because the query is using only one field.
 147         text_phrase = SearchResult.aggregate(
 148             search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
 149             search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
 150
 151         everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 152
 153         def already_found(results):
 154             def f(e):
 155                 for r in results:
 156                     if e.book_id == r.book_id:
 157                         e.boost = 0.9
 158                         results.append(e)
 159                         return True
 160                 return False
 161             return f
 162         f = already_found(author_results + title_results + text_phrase)
 163         everywhere = filter(lambda x: not f(x), everywhere)
 164
 165         author_results = SearchResult.aggregate(author_results)
 166         title_results = SearchResult.aggregate(title_results)
 167
 168         everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 169
 170         for res in [author_results, title_results, text_phrase, everywhere]:
 171             res.sort(reverse=True)
 172             for r in res:
 173                 for h in r.hits:
 174                     h['snippets'] = map(lambda s:
 175                                         re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 176                                                 re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 177
 178         suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
 179         print "dym? %s" % repr(suggestion).encode('utf-8')
 180
 181         results = author_results + title_results + text_phrase + everywhere
 182         results.sort(reverse=True)
 183
 184         if len(results) == 1:
 185             fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 186             if len(fragment_hits) == 1:
 187                 #anchor = fragment_hits[0]['fragment']
 188                 #frag = Fragment.objects.get(anchor=anchor)
 189                 return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 190             return HttpResponseRedirect(results[0].book.get_absolute_url())
 191         elif len(results) == 0:
 192             form = PublishingSuggestForm(initial={"books": query + ", "})
 193             return render_to_response('catalogue/search_no_hits.html',
 194                                       {'tags': tags,
 195                                        'prefix': query,
 196                                        "form": form,
 197                                        'did_you_mean': suggestion},
 198                 context_instance=RequestContext(request))
 199
 200         print "TAGS: %s" % tags
 201         return render_to_response('catalogue/search_multiple_hits.html',
 202                                   {'tags': tags,
 203                                    'prefix': query,
 204                                    'results': { 'author': author_results,
 205                                                 'title': title_results,
 206                                                 'content': text_phrase,
 207                                                 'other': everywhere},
 208                                    'did_you_mean': suggestion},
 209             context_instance=RequestContext(request))