src/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.http.response import HttpResponseRedirect
   7 from django.shortcuts import render_to_response
   8 from django.template import RequestContext
   9 from django.views.decorators import cache
  10 from django.http import HttpResponse, JsonResponse
  11
  12 from catalogue.models import Book, Tag
  13 from pdcounter.models import Author
  14 from search.index import Search, SearchResult
  15 from suggest.forms import PublishingSuggestForm
  16 import re
  17 import json
  18
  19 from wolnelektury.utils import re_escape
  20
  21
  22 def match_word_re(word):
  23     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  24         return r"\b%s\b" % word
  25     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  26         return "[[:<:]]%s[[:>:]]" % word
  27
  28
  29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  30
  31
  32 def remove_query_syntax_chars(query, replace=' '):
  33     return query_syntax_chars.sub(replace, query)
  34
  35
  36 def did_you_mean(query, tokens):
  37     return query
  38     # change = {}
  39     # for t in tokens:
  40     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  41     #     if len(authors) > 0:
  42     #         continue
  43
  44     #     if False:
  45     #         if not dictionary.check(t):
  46     #             try:
  47     #                 change_to = dictionary.suggest(t)[0].lower()
  48     #                 if change_to != t.lower():
  49     #                     change[t] = change_to
  50     #             except IndexError:
  51     #                 pass
  52
  53     # if change == {}:
  54     #     return None
  55
  56     # for frm, to in change.items():
  57     #     query = query.replace(frm, to)
  58
  59     # return query
  60
  61
  62 @cache.never_cache
  63 def hint(request):
  64     prefix = request.GET.get('term', '')
  65     if len(prefix) < 2:
  66         return JsonResponse([], safe=False)
  67
  68     prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
  69
  70     try:
  71         limit = int(request.GET.get('max', ''))
  72     except ValueError:
  73         limit = 20
  74     else:
  75         if limit < 1:
  76             limit = 20
  77
  78     authors = Tag.objects.filter(
  79         category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
  80     data = [
  81         {
  82             'label': author.name,
  83             'id': author.id,
  84             'url': author.get_absolute_url(),
  85         }
  86         for author in authors[:limit]
  87     ]
  88     if len(data) < limit:
  89         data += [
  90             {
  91                 'label': b.title,
  92                 'author': b.author_unicode(),
  93                 'id': b.id,
  94                 'url': b.get_absolute_url()
  95             }
  96             for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
  97         ]
  98     callback = request.GET.get('callback', None)
  99     if callback:
 100         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 101                             content_type="application/json; charset=utf-8")
 102     else:
 103         return JsonResponse(data, safe=False)
 104
 105
 106 @cache.never_cache
 107 def main(request):
 108     query = request.GET.get('q', '')
 109     query = ' '.join(query.split())
 110     # filter out private use characters
 111     import unicodedata
 112     query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
 113
 114     if len(query) < 2:
 115         return render_to_response(
 116             'catalogue/search_too_short.html', {'prefix': query},
 117             context_instance=RequestContext(request))
 118     elif len(query) > 256:
 119         return render_to_response(
 120             'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
 121
 122     query = remove_query_syntax_chars(query)
 123
 124     words = query.split()
 125     if len(words) > 10:
 126         query = ' '.join(words[:10])
 127
 128     search = Search()
 129
 130     pd_authors = Author.objects.filter(name__icontains=query)
 131     existing_slugs = Tag.objects.filter(
 132         category='author', slug__in=list(pd_authors.values_list('slug', flat=True)))\
 133         .values_list('slug', flat=True)
 134     pd_authors = pd_authors.exclude(slug__in=existing_slugs)
 135
 136     results_parts = []
 137
 138     search_fields = []
 139     fieldsets = (
 140         (['authors'], True),
 141         (['title'], True),
 142         (['metadata'], True),
 143         (['text', 'themes_pl'], False),
 144     )
 145     for fields, is_book in fieldsets:
 146         search_fields += fields
 147         results_parts.append(search.search_words(words, search_fields, book=is_book))
 148
 149     results = []
 150     ids_results = {}
 151     for results_part in results_parts:
 152         for result in sorted(SearchResult.aggregate(results_part), reverse=True):
 153             book_id = result.book_id
 154             if book_id in ids_results:
 155                 ids_results[book_id].merge(result)
 156             else:
 157                 results.append(result)
 158                 ids_results[book_id] = result
 159
 160     descendant_ids = set(
 161         Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
 162     results = [result for result in results if result.book_id not in descendant_ids]
 163
 164     for result in results:
 165         search.get_snippets(result, query, num=3)
 166
 167     suggestion = u''
 168
 169     def ensure_exists(r):
 170         try:
 171             return r.book
 172         except Book.DoesNotExist:
 173             return False
 174
 175     results = filter(ensure_exists, results)
 176
 177     if not results and not pd_authors:
 178         form = PublishingSuggestForm(initial={"books": query + ", "})
 179         return render_to_response(
 180             'catalogue/search_no_hits.html',
 181             {
 182                 'form': form,
 183                 'did_you_mean': suggestion
 184             },
 185             context_instance=RequestContext(request))
 186
 187     if not results and len(pd_authors) == 1:
 188         return HttpResponseRedirect(pd_authors[0].get_absolute_url())
 189
 190     return render_to_response(
 191         'catalogue/search_multiple_hits.html',
 192         {
 193             'pd_authors': pd_authors,
 194             'results': results,
 195             'did_you_mean': suggestion
 196         },
 197         context_instance=RequestContext(request))