src/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.shortcuts import render_to_response
   7 from django.template import RequestContext
   8 from django.views.decorators import cache
   9 from django.http import HttpResponse, JsonResponse
  10 from django.utils.translation import ugettext as _
  11
  12 from catalogue.utils import split_tags
  13 from catalogue.models import Book, Tag
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  15 from search.index import Search, SearchResult
  16 from suggest.forms import PublishingSuggestForm
  17 import re
  18 import json
  19
  20
  21 def match_word_re(word):
  22     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  23         return r"\b%s\b" % word
  24     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  25         return "[[:<:]]%s[[:>:]]" % word
  26
  27
  28 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  29
  30
  31 def remove_query_syntax_chars(query, replace=' '):
  32     return query_syntax_chars.sub(' ', query)
  33
  34
  35 def did_you_mean(query, tokens):
  36     return query
  37     # change = {}
  38     # for t in tokens:
  39     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  40     #     if len(authors) > 0:
  41     #         continue
  42
  43     #     if False:
  44     #         if not dictionary.check(t):
  45     #             try:
  46     #                 change_to = dictionary.suggest(t)[0].lower()
  47     #                 if change_to != t.lower():
  48     #                     change[t] = change_to
  49     #             except IndexError:
  50     #                 pass
  51
  52     # if change == {}:
  53     #     return None
  54
  55     # for frm, to in change.items():
  56     #     query = query.replace(frm, to)
  57
  58     # return query
  59
  60
  61 @cache.never_cache
  62 def hint(request):
  63     prefix = request.GET.get('term', '')
  64     if len(prefix) < 2:
  65         return JsonResponse([], safe=False)
  66
  67     prefix = remove_query_syntax_chars(prefix)
  68
  69     try:
  70         limit = int(request.GET.get('max', ''))
  71     except ValueError:
  72         limit = -1
  73     else:
  74         if limit < 1:
  75             limit = -1
  76
  77     data = [
  78         {
  79             'label': author.name,
  80             'category': _('author'),
  81             'id': author.id,
  82             'url': author.get_absolute_url(),
  83         }
  84         for author in Tag.objects.filter(category='author', name__iregex='\m' + prefix)[:10]
  85     ]
  86     if len(data) < limit:
  87         data += [
  88             {
  89                 'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
  90                 'category': _('book'),
  91                 'id': b.id,
  92                 'url': b.get_absolute_url()
  93             }
  94             for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
  95         ]
  96     callback = request.GET.get('callback', None)
  97     if callback:
  98         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
  99                             content_type="application/json; charset=utf-8")
 100     else:
 101         return JsonResponse(data, safe=False)
 102
 103
 104 @cache.never_cache
 105 def main(request):
 106     query = request.GET.get('q', '')
 107     query = ' '.join(query.split())
 108     # filter out private use characters
 109     import unicodedata
 110     query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
 111
 112     if len(query) < 2:
 113         return render_to_response(
 114             'catalogue/search_too_short.html', {'prefix': query},
 115             context_instance=RequestContext(request))
 116     elif len(query) > 256:
 117         return render_to_response(
 118             'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
 119
 120     query = remove_query_syntax_chars(query)
 121
 122     words = query.split()
 123     if len(words) > 10:
 124         query = ' '.join(words[:10])
 125
 126     search = Search()
 127
 128     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 129     tags = split_tags(tags)
 130
 131     results_parts = []
 132
 133     search_fields = []
 134     fieldsets = (
 135         (['authors'], True),
 136         (['title'], True),
 137         (['metadata'], True),
 138         (['text', 'themes_pl'], False),
 139     )
 140     for fieldset, is_book in fieldsets:
 141         search_fields += fieldset
 142         results_parts.append(search.search_words(words, search_fields, book=is_book))
 143
 144     results = []
 145     ids_results = {}
 146     for results_part in results_parts:
 147         for result in sorted(SearchResult.aggregate(results_part), reverse=True):
 148             book_id = result.book_id
 149             if book_id in ids_results:
 150                 ids_results[book_id].merge(result)
 151             else:
 152                 results.append(result)
 153                 ids_results[book_id] = result
 154
 155     for result in results:
 156         search.get_snippets(result, query, num=3)
 157
 158     suggestion = u''
 159
 160     def ensure_exists(r):
 161         try:
 162             return r.book
 163         except Book.DoesNotExist:
 164             return False
 165
 166     results = filter(ensure_exists, results)
 167
 168     if not results:
 169         form = PublishingSuggestForm(initial={"books": query + ", "})
 170         return render_to_response(
 171             'catalogue/search_no_hits.html',
 172             {
 173                 'tags': tags,
 174                 'prefix': query,
 175                 'form': form,
 176                 'did_you_mean': suggestion
 177             },
 178             context_instance=RequestContext(request))
 179
 180     return render_to_response(
 181         'catalogue/search_multiple_hits.html',
 182         {
 183             'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
 184             'prefix': query,
 185             'results': results,
 186             'did_you_mean': suggestion
 187         },
 188         context_instance=RequestContext(request))