src/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.shortcuts import render_to_response
   7 from django.template import RequestContext
   8 from django.views.decorators import cache
   9 from django.http import HttpResponse, JsonResponse
  10 from django.utils.translation import ugettext as _
  11
  12 from catalogue.utils import split_tags
  13 from catalogue.models import Book
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  15 from search.index import Search, SearchResult
  16 from suggest.forms import PublishingSuggestForm
  17 import re
  18 import json
  19
  20
  21 def match_word_re(word):
  22     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  23         return r"\b%s\b" % word
  24     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  25         return "[[:<:]]%s[[:>:]]" % word
  26
  27
  28 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  29
  30
  31 def remove_query_syntax_chars(query, replace=' '):
  32     return query_syntax_chars.sub(' ', query)
  33
  34
  35 def did_you_mean(query, tokens):
  36     return query
  37     # change = {}
  38     # for t in tokens:
  39     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  40     #     if len(authors) > 0:
  41     #         continue
  42
  43     #     if False:
  44     #         if not dictionary.check(t):
  45     #             try:
  46     #                 change_to = dictionary.suggest(t)[0].lower()
  47     #                 if change_to != t.lower():
  48     #                     change[t] = change_to
  49     #             except IndexError:
  50     #                 pass
  51
  52     # if change == {}:
  53     #     return None
  54
  55     # for frm, to in change.items():
  56     #     query = query.replace(frm, to)
  57
  58     # return query
  59
  60
  61 @cache.never_cache
  62 def hint(request):
  63     prefix = request.GET.get('term', '')
  64     if len(prefix) < 2:
  65         return JsonResponse([], safe=False)
  66
  67     prefix = remove_query_syntax_chars(prefix)
  68
  69     search = Search()
  70     # tagi beda ograniczac tutaj
  71     # ale tagi moga byc na ksiazce i na fragmentach
  72     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  73     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  74
  75     def is_dupe(tag):
  76         if isinstance(tag, PDCounterAuthor):
  77             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  78                 return True
  79         elif isinstance(tag, PDCounterBook):
  80             if filter(lambda b: b.slug == tag.slug, tags):
  81                 return True
  82         return False
  83
  84     def category_name(c):
  85         if c.startswith('pd_'):
  86             c = c[len('pd_'):]
  87         return _(c)
  88
  89     try:
  90         limit = int(request.GET.get('max', ''))
  91     except ValueError:
  92         limit = -1
  93     else:
  94         if limit < 1:
  95             limit = -1
  96
  97     data = []
  98
  99     tags = search.hint_tags(prefix, pdcounter=True)
 100     tags = filter(lambda t: not is_dupe(t), tags)
 101     for t in tags:
 102         if not limit:
 103             break
 104         limit -= 1
 105         data.append({
 106             'label': t.name,
 107             'category': category_name(t.category),
 108             'id': t.id,
 109             'url': t.get_absolute_url()
 110             })
 111     if limit:
 112         books = search.hint_books(prefix)
 113         for b in books:
 114             if not limit:
 115                 break
 116             limit -= 1
 117             data.append({
 118                 'label': b.title,
 119                 'category': _('book'),
 120                 'id': b.id,
 121                 'url': b.get_absolute_url()
 122                 })
 123
 124     callback = request.GET.get('callback', None)
 125     if callback:
 126         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 127                             content_type="application/json; charset=utf-8")
 128     else:
 129         return JsonResponse(data, safe=False)
 130
 131
 132 @cache.never_cache
 133 def main(request):
 134     query = request.GET.get('q', '')
 135
 136     if len(query) < 2:
 137         return render_to_response(
 138             'catalogue/search_too_short.html', {'prefix': query},
 139             context_instance=RequestContext(request))
 140
 141     query = remove_query_syntax_chars(query)
 142
 143     search = Search()
 144
 145     theme_terms = search.index.analyze(text=query, field="themes_pl") \
 146         + search.index.analyze(text=query, field="themes")
 147
 148     # change hints
 149     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 150     tags = split_tags(tags)
 151
 152     author_results = search.search_phrase(query, 'authors', book=True)
 153     translator_results = search.search_phrase(query, 'translators', book=True)
 154
 155     title_results = search.search_phrase(query, 'title', book=True)
 156
 157     # Boost main author/title results with mixed search, and save some of its results for end of list.
 158     # boost author, title results
 159     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
 160     author_title_rest = []
 161
 162     for b in author_title_mixed:
 163         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
 164         for b2 in also_in_mixed:
 165             b2.boost *= 1.1
 166         if also_in_mixed is []:
 167             author_title_rest.append(b)
 168
 169     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 170     # Because the query is using only one field.
 171     text_phrase = SearchResult.aggregate(
 172         search.search_phrase(query, 'text', snippets=True, book=False),
 173         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
 174
 175     everywhere = search.search_everywhere(query, query_terms=theme_terms)
 176
 177     def already_found(results):
 178         def f(e):
 179             for r in results:
 180                 if e.book_id == r.book_id:
 181                     e.boost = 0.9
 182                     results.append(e)
 183                     return True
 184             return False
 185         return f
 186     f = already_found(author_results + translator_results + title_results + text_phrase)
 187     everywhere = filter(lambda x: not f(x), everywhere)
 188
 189     author_results = SearchResult.aggregate(author_results)
 190     translator_results = SearchResult.aggregate(translator_results)
 191     title_results = SearchResult.aggregate(title_results)
 192
 193     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 194
 195     for field, res in [('authors', author_results),
 196                        ('translators', translator_results),
 197                        ('title', title_results),
 198                        ('text', text_phrase),
 199                        ('text', everywhere)]:
 200         res.sort(reverse=True)
 201         for r in res:
 202             search.get_snippets(r, query, field, 3)
 203
 204     suggestion = u''
 205
 206     def ensure_exists(r):
 207         try:
 208             return r.book
 209         except Book.DoesNotExist:
 210             return False
 211
 212     author_results = filter(ensure_exists, author_results)
 213     translator_results = filter(ensure_exists, translator_results)
 214     title_results = filter(ensure_exists, title_results)
 215     text_phrase = filter(ensure_exists, text_phrase)
 216     everywhere = filter(ensure_exists, everywhere)
 217
 218     results = author_results + translator_results + title_results + text_phrase + everywhere
 219     # ensure books do exists & sort them
 220     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
 221         res.sort(reverse=True)
 222
 223     # We don't want to redirect to book text, but rather display result page even with one result.
 224     # if len(results) == 1:
 225     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 226     #     if len(fragment_hits) == 1:
 227     #         #anchor = fragment_hits[0]['fragment']
 228     #         #frag = Fragment.objects.get(anchor=anchor)
 229     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 230     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
 231     if len(results) == 0:
 232         form = PublishingSuggestForm(initial={"books": query + ", "})
 233         return render_to_response(
 234             'catalogue/search_no_hits.html',
 235             {
 236                 'tags': tags,
 237                 'prefix': query,
 238                 'form': form,
 239                 'did_you_mean': suggestion
 240             },
 241             context_instance=RequestContext(request))
 242
 243     return render_to_response(
 244         'catalogue/search_multiple_hits.html',
 245         {
 246             'tags': tags,
 247             'prefix': query,
 248             'results': {
 249                 'author': author_results,
 250                 'translator': translator_results,
 251                 'title': title_results,
 252                 'content': text_phrase,
 253                 'other': everywhere
 254             },
 255             'did_you_mean': suggestion
 256         },
 257         context_instance=RequestContext(request))