src/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.shortcuts import render_to_response
   7 from django.template import RequestContext
   8 from django.views.decorators import cache
   9 from django.http import HttpResponse, JsonResponse
  10 from django.utils.translation import ugettext as _
  11
  12 from catalogue.utils import split_tags
  13 from catalogue.models import Book, Tag
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  15 from search.index import Search, SearchResult
  16 from suggest.forms import PublishingSuggestForm
  17 import re
  18 import json
  19
  20 from wolnelektury.utils import re_escape
  21
  22
  23 def match_word_re(word):
  24     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  25         return r"\b%s\b" % word
  26     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  27         return "[[:<:]]%s[[:>:]]" % word
  28
  29
  30 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  31
  32
  33 def remove_query_syntax_chars(query, replace=' '):
  34     return query_syntax_chars.sub(replace, query)
  35
  36
  37 def did_you_mean(query, tokens):
  38     return query
  39     # change = {}
  40     # for t in tokens:
  41     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  42     #     if len(authors) > 0:
  43     #         continue
  44
  45     #     if False:
  46     #         if not dictionary.check(t):
  47     #             try:
  48     #                 change_to = dictionary.suggest(t)[0].lower()
  49     #                 if change_to != t.lower():
  50     #                     change[t] = change_to
  51     #             except IndexError:
  52     #                 pass
  53
  54     # if change == {}:
  55     #     return None
  56
  57     # for frm, to in change.items():
  58     #     query = query.replace(frm, to)
  59
  60     # return query
  61
  62
  63 @cache.never_cache
  64 def hint(request):
  65     prefix = request.GET.get('term', '')
  66     if len(prefix) < 2:
  67         return JsonResponse([], safe=False)
  68
  69     prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
  70
  71     try:
  72         limit = int(request.GET.get('max', ''))
  73     except ValueError:
  74         limit = -1
  75     else:
  76         if limit < 1:
  77             limit = -1
  78
  79     data = [
  80         {
  81             'label': author.name,
  82             'category': _('author'),
  83             'id': author.id,
  84             'url': author.get_absolute_url(),
  85         }
  86         for author in Tag.objects.filter(category='author', name__iregex=u'\m' + prefix)[:10]
  87     ]
  88     if len(data) < limit:
  89         data += [
  90             {
  91                 'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
  92                 'category': _('book'),
  93                 'id': b.id,
  94                 'url': b.get_absolute_url()
  95             }
  96             for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
  97         ]
  98     callback = request.GET.get('callback', None)
  99     if callback:
 100         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 101                             content_type="application/json; charset=utf-8")
 102     else:
 103         return JsonResponse(data, safe=False)
 104
 105
 106 @cache.never_cache
 107 def main(request):
 108     query = request.GET.get('q', '')
 109     query = ' '.join(query.split())
 110     # filter out private use characters
 111     import unicodedata
 112     query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
 113
 114     if len(query) < 2:
 115         return render_to_response(
 116             'catalogue/search_too_short.html', {'prefix': query},
 117             context_instance=RequestContext(request))
 118     elif len(query) > 256:
 119         return render_to_response(
 120             'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
 121
 122     query = remove_query_syntax_chars(query)
 123
 124     words = query.split()
 125     if len(words) > 10:
 126         query = ' '.join(words[:10])
 127
 128     search = Search()
 129
 130     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 131     tags = split_tags(tags)
 132
 133     results_parts = []
 134
 135     search_fields = []
 136     fieldsets = (
 137         (['authors'], True),
 138         (['title'], True),
 139         (['metadata'], True),
 140         (['text', 'themes_pl'], False),
 141     )
 142     for fieldset, is_book in fieldsets:
 143         search_fields += fieldset
 144         results_parts.append(search.search_words(words, search_fields, book=is_book))
 145
 146     results = []
 147     ids_results = {}
 148     for results_part in results_parts:
 149         for result in sorted(SearchResult.aggregate(results_part), reverse=True):
 150             book_id = result.book_id
 151             if book_id in ids_results:
 152                 ids_results[book_id].merge(result)
 153             else:
 154                 results.append(result)
 155                 ids_results[book_id] = result
 156
 157     for result in results:
 158         search.get_snippets(result, query, num=3)
 159
 160     suggestion = u''
 161
 162     def ensure_exists(r):
 163         try:
 164             return r.book
 165         except Book.DoesNotExist:
 166             return False
 167
 168     results = filter(ensure_exists, results)
 169
 170     if not results:
 171         form = PublishingSuggestForm(initial={"books": query + ", "})
 172         return render_to_response(
 173             'catalogue/search_no_hits.html',
 174             {
 175                 'tags': tags,
 176                 'prefix': query,
 177                 'form': form,
 178                 'did_you_mean': suggestion
 179             },
 180             context_instance=RequestContext(request))
 181
 182     return render_to_response(
 183         'catalogue/search_multiple_hits.html',
 184         {
 185             'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
 186             'prefix': query,
 187             'results': results,
 188             'did_you_mean': suggestion
 189         },
 190         context_instance=RequestContext(request))