src/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.shortcuts import render_to_response
   7 from django.template import RequestContext
   8 from django.views.decorators import cache
   9 from django.http import HttpResponse, JsonResponse
  10 from django.utils.translation import ugettext as _
  11
  12 from catalogue.utils import split_tags
  13 from catalogue.models import Book
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  15 from search.index import Search, SearchResult
  16 from suggest.forms import PublishingSuggestForm
  17 import re
  18 import json
  19
  20
  21 def match_word_re(word):
  22     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  23         return r"\b%s\b" % word
  24     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  25         return "[[:<:]]%s[[:>:]]" % word
  26
  27
  28 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  29
  30
  31 def remove_query_syntax_chars(query, replace=' '):
  32     return query_syntax_chars.sub(' ', query)
  33
  34
  35 def did_you_mean(query, tokens):
  36     return query
  37     # change = {}
  38     # for t in tokens:
  39     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  40     #     if len(authors) > 0:
  41     #         continue
  42
  43     #     if False:
  44     #         if not dictionary.check(t):
  45     #             try:
  46     #                 change_to = dictionary.suggest(t)[0].lower()
  47     #                 if change_to != t.lower():
  48     #                     change[t] = change_to
  49     #             except IndexError:
  50     #                 pass
  51
  52     # if change == {}:
  53     #     return None
  54
  55     # for frm, to in change.items():
  56     #     query = query.replace(frm, to)
  57
  58     # return query
  59
  60
  61 @cache.never_cache
  62 def hint(request):
  63     prefix = request.GET.get('term', '')
  64     if len(prefix) < 2:
  65         return JsonResponse([], safe=False)
  66
  67     prefix = remove_query_syntax_chars(prefix)
  68
  69     search = Search()
  70     # tagi beda ograniczac tutaj
  71     # ale tagi moga byc na ksiazce i na fragmentach
  72     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  73     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  74
  75     def is_dupe(tag):
  76         if isinstance(tag, PDCounterAuthor):
  77             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  78                 return True
  79         elif isinstance(tag, PDCounterBook):
  80             if filter(lambda b: b.slug == tag.slug, tags):
  81                 return True
  82         return False
  83
  84     def category_name(c):
  85         if c.startswith('pd_'):
  86             c = c[len('pd_'):]
  87         return _(c)
  88
  89     try:
  90         limit = int(request.GET.get('max', ''))
  91     except ValueError:
  92         limit = -1
  93     else:
  94         if limit < 1:
  95             limit = -1
  96
  97     data = []
  98
  99     tags = search.hint_tags(prefix, pdcounter=True)
 100     tags = filter(lambda t: not is_dupe(t), tags)
 101     for t in tags:
 102         if not limit:
 103             break
 104         limit -= 1
 105         data.append({
 106             'label': t.name,
 107             'category': category_name(t.category),
 108             'id': t.id,
 109             'url': t.get_absolute_url()
 110             })
 111     if limit:
 112         books = search.hint_books(prefix)
 113         for b in books:
 114             if not limit:
 115                 break
 116             limit -= 1
 117             data.append({
 118                 'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
 119                 'category': _('book'),
 120                 'id': b.id,
 121                 'url': b.get_absolute_url()
 122                 })
 123
 124     callback = request.GET.get('callback', None)
 125     if callback:
 126         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 127                             content_type="application/json; charset=utf-8")
 128     else:
 129         return JsonResponse(data, safe=False)
 130
 131
 132 @cache.never_cache
 133 def main(request):
 134     query = request.GET.get('q', '')
 135     query = ' '.join(query.split())
 136     # filter out private use characters
 137     import unicodedata
 138     query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
 139
 140     if len(query) < 2:
 141         return render_to_response(
 142             'catalogue/search_too_short.html', {'prefix': query},
 143             context_instance=RequestContext(request))
 144     elif len(query) > 256:
 145         return render_to_response(
 146             'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
 147
 148     query = remove_query_syntax_chars(query)
 149
 150     words = query.split()
 151     if len(words) > 10:
 152         query = ' '.join(words[:10])
 153
 154     search = Search()
 155
 156     # change hints
 157     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 158     tags = split_tags(tags)
 159
 160     author_results = search.search_words(words, ['authors'])
 161
 162     title_results = search.search_words(words, ['title'])
 163
 164     author_title_mixed = search.search_words(words, ['authors', 'title', 'metadata'])
 165     author_title_rest = []
 166
 167     for b in author_title_mixed:
 168         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 169         for b2 in also_in_mixed:
 170             b2.boost *= 1.1
 171         if not also_in_mixed:
 172             author_title_rest.append(b)
 173
 174     text_phrase = SearchResult.aggregate(search.search_words(words, ['text'], book=False))
 175
 176     everywhere = search.search_words(words, ['metadata', 'text', 'themes_pl'], book=False)
 177
 178     def already_found(results):
 179         def f(e):
 180             for r in results:
 181                 if e.book_id == r.book_id:
 182                     e.boost = 0.9
 183                     results.append(e)
 184                     return True
 185             return False
 186         return f
 187     f = already_found(author_results + title_results + text_phrase)
 188     everywhere = filter(lambda x: not f(x), everywhere)
 189
 190     author_results = SearchResult.aggregate(author_results, author_title_rest)
 191     title_results = SearchResult.aggregate(title_results)
 192
 193     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 194
 195     for field, res in [('authors', author_results),
 196                        ('title', title_results),
 197                        ('text', text_phrase),
 198                        ('text', everywhere)]:
 199         res.sort(reverse=True)
 200         for r in res:
 201             search.get_snippets(r, query, field, 3)
 202
 203     suggestion = u''
 204
 205     def ensure_exists(r):
 206         try:
 207             return r.book
 208         except Book.DoesNotExist:
 209             return False
 210
 211     author_results = filter(ensure_exists, author_results)
 212     title_results = filter(ensure_exists, title_results)
 213     text_phrase = filter(ensure_exists, text_phrase)
 214     everywhere = filter(ensure_exists, everywhere)
 215
 216     # ensure books do exists & sort them
 217     for res in (author_results, title_results, text_phrase):
 218         res.sort(reverse=True)
 219
 220     if not (author_results or title_results or text_phrase or everywhere):
 221         form = PublishingSuggestForm(initial={"books": query + ", "})
 222         return render_to_response(
 223             'catalogue/search_no_hits.html',
 224             {
 225                 'tags': tags,
 226                 'prefix': query,
 227                 'form': form,
 228                 'did_you_mean': suggestion
 229             },
 230             context_instance=RequestContext(request))
 231
 232     return render_to_response(
 233         'catalogue/search_multiple_hits.html',
 234         {
 235             'tags': tags,
 236             'prefix': query,
 237             'results': {
 238                 'author': author_results,
 239                 'title': title_results,
 240                 'content': text_phrase,
 241                 'other': everywhere
 242             },
 243             'did_you_mean': suggestion
 244         },
 245         context_instance=RequestContext(request))