src/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.shortcuts import render_to_response
   7 from django.template import RequestContext
   8 from django.views.decorators import cache
   9 from django.http import HttpResponse, JsonResponse
  10 from django.utils.translation import ugettext as _
  11
  12 from catalogue.utils import split_tags
  13 from catalogue.models import Book
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  15 from search.index import Search, SearchResult
  16 from suggest.forms import PublishingSuggestForm
  17 import re
  18 import json
  19
  20
  21 def match_word_re(word):
  22     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  23         return r"\b%s\b" % word
  24     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  25         return "[[:<:]]%s[[:>:]]" % word
  26
  27
  28 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  29
  30
  31 def remove_query_syntax_chars(query, replace=' '):
  32     return query_syntax_chars.sub(' ', query)
  33
  34
  35 def did_you_mean(query, tokens):
  36     return query
  37     # change = {}
  38     # for t in tokens:
  39     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  40     #     if len(authors) > 0:
  41     #         continue
  42
  43     #     if False:
  44     #         if not dictionary.check(t):
  45     #             try:
  46     #                 change_to = dictionary.suggest(t)[0].lower()
  47     #                 if change_to != t.lower():
  48     #                     change[t] = change_to
  49     #             except IndexError:
  50     #                 pass
  51
  52     # if change == {}:
  53     #     return None
  54
  55     # for frm, to in change.items():
  56     #     query = query.replace(frm, to)
  57
  58     # return query
  59
  60
  61 @cache.never_cache
  62 def hint(request):
  63     prefix = request.GET.get('term', '')
  64     if len(prefix) < 2:
  65         return JsonResponse([], safe=False)
  66
  67     prefix = remove_query_syntax_chars(prefix)
  68
  69     search = Search()
  70     # tagi beda ograniczac tutaj
  71     # ale tagi moga byc na ksiazce i na fragmentach
  72     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  73     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  74
  75     def is_dupe(tag):
  76         if isinstance(tag, PDCounterAuthor):
  77             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  78                 return True
  79         elif isinstance(tag, PDCounterBook):
  80             if filter(lambda b: b.slug == tag.slug, tags):
  81                 return True
  82         return False
  83
  84     def category_name(c):
  85         if c.startswith('pd_'):
  86             c = c[len('pd_'):]
  87         return _(c)
  88
  89     try:
  90         limit = int(request.GET.get('max', ''))
  91     except ValueError:
  92         limit = -1
  93     else:
  94         if limit < 1:
  95             limit = -1
  96
  97     data = []
  98
  99     tags = search.hint_tags(prefix, pdcounter=True)
 100     tags = filter(lambda t: not is_dupe(t), tags)
 101     for t in tags:
 102         if not limit:
 103             break
 104         limit -= 1
 105         data.append({
 106             'label': t.name,
 107             'category': category_name(t.category),
 108             'id': t.id,
 109             'url': t.get_absolute_url()
 110             })
 111     if limit:
 112         books = search.hint_books(prefix)
 113         for b in books:
 114             if not limit:
 115                 break
 116             limit -= 1
 117             data.append({
 118                 'label': b.title,
 119                 'category': _('book'),
 120                 'id': b.id,
 121                 'url': b.get_absolute_url()
 122                 })
 123
 124     callback = request.GET.get('callback', None)
 125     if callback:
 126         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 127                             content_type="application/json; charset=utf-8")
 128     else:
 129         return JsonResponse(data, safe=False)
 130
 131
 132 @cache.never_cache
 133 def main(request):
 134     query = request.GET.get('q', '')
 135
 136     if len(query) < 2:
 137         return render_to_response(
 138             'catalogue/search_too_short.html', {'prefix': query},
 139             context_instance=RequestContext(request))
 140     elif len(query) > 256:
 141         return render_to_response(
 142             'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
 143
 144     query = remove_query_syntax_chars(query)
 145
 146     search = Search()
 147
 148     theme_terms = search.index.analyze(text=query, field="themes_pl") \
 149         + search.index.analyze(text=query, field="themes")
 150
 151     # change hints
 152     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 153     tags = split_tags(tags)
 154
 155     author_results = search.search_phrase(query, 'authors', book=True)
 156     translator_results = search.search_phrase(query, 'translators', book=True)
 157
 158     title_results = search.search_phrase(query, 'title', book=True)
 159
 160     # Boost main author/title results with mixed search, and save some of its results for end of list.
 161     # boost author, title results
 162     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
 163     author_title_rest = []
 164
 165     for b in author_title_mixed:
 166         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
 167         for b2 in also_in_mixed:
 168             b2.boost *= 1.1
 169         if also_in_mixed is []:
 170             author_title_rest.append(b)
 171
 172     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 173     # Because the query is using only one field.
 174     text_phrase = SearchResult.aggregate(
 175         search.search_phrase(query, 'text', snippets=True, book=False),
 176         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
 177
 178     everywhere = search.search_everywhere(query, query_terms=theme_terms)
 179
 180     def already_found(results):
 181         def f(e):
 182             for r in results:
 183                 if e.book_id == r.book_id:
 184                     e.boost = 0.9
 185                     results.append(e)
 186                     return True
 187             return False
 188         return f
 189     f = already_found(author_results + translator_results + title_results + text_phrase)
 190     everywhere = filter(lambda x: not f(x), everywhere)
 191
 192     author_results = SearchResult.aggregate(author_results)
 193     translator_results = SearchResult.aggregate(translator_results)
 194     title_results = SearchResult.aggregate(title_results)
 195
 196     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 197
 198     for field, res in [('authors', author_results),
 199                        ('translators', translator_results),
 200                        ('title', title_results),
 201                        ('text', text_phrase),
 202                        ('text', everywhere)]:
 203         res.sort(reverse=True)
 204         for r in res:
 205             search.get_snippets(r, query, field, 3)
 206
 207     suggestion = u''
 208
 209     def ensure_exists(r):
 210         try:
 211             return r.book
 212         except Book.DoesNotExist:
 213             return False
 214
 215     author_results = filter(ensure_exists, author_results)
 216     translator_results = filter(ensure_exists, translator_results)
 217     title_results = filter(ensure_exists, title_results)
 218     text_phrase = filter(ensure_exists, text_phrase)
 219     everywhere = filter(ensure_exists, everywhere)
 220
 221     results = author_results + translator_results + title_results + text_phrase + everywhere
 222     # ensure books do exists & sort them
 223     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
 224         res.sort(reverse=True)
 225
 226     # We don't want to redirect to book text, but rather display result page even with one result.
 227     # if len(results) == 1:
 228     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 229     #     if len(fragment_hits) == 1:
 230     #         #anchor = fragment_hits[0]['fragment']
 231     #         #frag = Fragment.objects.get(anchor=anchor)
 232     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 233     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
 234     if len(results) == 0:
 235         form = PublishingSuggestForm(initial={"books": query + ", "})
 236         return render_to_response(
 237             'catalogue/search_no_hits.html',
 238             {
 239                 'tags': tags,
 240                 'prefix': query,
 241                 'form': form,
 242                 'did_you_mean': suggestion
 243             },
 244             context_instance=RequestContext(request))
 245
 246     return render_to_response(
 247         'catalogue/search_multiple_hits.html',
 248         {
 249             'tags': tags,
 250             'prefix': query,
 251             'results': {
 252                 'author': author_results,
 253                 'translator': translator_results,
 254                 'title': title_results,
 255                 'content': text_phrase,
 256                 'other': everywhere
 257             },
 258             'did_you_mean': suggestion
 259         },
 260         context_instance=RequestContext(request))