apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.shortcuts import render_to_response, get_object_or_404
   7 from django.template import RequestContext
   8 from django.views.decorators import cache
   9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
  10 from django.utils.translation import ugettext as _
  11
  12 from catalogue.utils import split_tags
  13 from catalogue.models import Book, Tag, Fragment
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  15 from search.index import Search, SearchResult
  16 from suggest.forms import PublishingSuggestForm
  17 import re
  18 #import enchant
  19 import json
  20
  21
  22 def match_word_re(word):
  23     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  24         return r"\b%s\b" % word
  25     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  26         return "[[:<:]]%s[[:>:]]" % word
  27
  28
  29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  30
  31
  32 def remove_query_syntax_chars(query, replace=' '):
  33     return query_syntax_chars.sub(' ', query)
  34
  35
  36 def did_you_mean(query, tokens):
  37     return query
  38     # change = {}
  39     # for t in tokens:
  40     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  41     #     if len(authors) > 0:
  42     #         continue
  43
  44     #     if False:
  45     #         if not dictionary.check(t):
  46     #             try:
  47     #                 change_to = dictionary.suggest(t)[0].lower()
  48     #                 if change_to != t.lower():
  49     #                     change[t] = change_to
  50     #             except IndexError:
  51     #                 pass
  52
  53     # if change == {}:
  54     #     return None
  55
  56     # for frm, to in change.items():
  57     #     query = query.replace(frm, to)
  58
  59     # return query
  60
  61
  62 @cache.never_cache
  63 def hint(request):
  64     prefix = request.GET.get('term', '')
  65     if len(prefix) < 2:
  66         return JsonResponse([], safe=False)
  67
  68     prefix = remove_query_syntax_chars(prefix)
  69
  70     search = Search()
  71     # tagi beda ograniczac tutaj
  72     # ale tagi moga byc na ksiazce i na fragmentach
  73     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  74     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  75
  76     def is_dupe(tag):
  77         if isinstance(tag, PDCounterAuthor):
  78             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  79                 return True
  80         elif isinstance(tag, PDCounterBook):
  81             if filter(lambda b: b.slug == tag.slug, tags):
  82                 return True
  83         return False
  84
  85     def category_name(c):
  86         if c.startswith('pd_'):
  87             c = c[len('pd_'):]
  88         return _(c)
  89
  90     try:
  91         limit = int(request.GET.get('max', ''))
  92     except ValueError:
  93         limit = -1
  94     else:
  95         if limit < 1:
  96             limit = -1
  97
  98     data = []
  99
 100     tags = search.hint_tags(prefix, pdcounter=True)
 101     tags = filter(lambda t: not is_dupe(t), tags)
 102     for t in tags:
 103         if not limit:
 104             break
 105         limit -= 1
 106         data.append({
 107             'label': t.name,
 108             'category': category_name(t.category),
 109             'id': t.id,
 110             'url': t.get_absolute_url()
 111             })
 112     if limit:
 113         books = search.hint_books(prefix)
 114         for b in books:
 115             if not limit:
 116                 break
 117             limit -= 1
 118             data.append({
 119                 'label': b.title,
 120                 'category': _('book'),
 121                 'id': b.id,
 122                 'url': b.get_absolute_url()
 123                 })
 124
 125     callback = request.GET.get('callback', None)
 126     if callback:
 127         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 128                             content_type="application/json; charset=utf-8")
 129     else:
 130         return JsonResponse(data, safe=False)
 131
 132
 133 @cache.never_cache
 134 def main(request):
 135     results = {}
 136
 137     results = None
 138     query = None
 139
 140     query = request.GET.get('q', '')
 141
 142     if len(query) < 2:
 143         return render_to_response('catalogue/search_too_short.html',
 144                                   {'prefix': query},
 145             context_instance=RequestContext(request))
 146
 147     query = remove_query_syntax_chars(query)
 148
 149     search = Search()
 150
 151     theme_terms = search.index.analyze(text=query, field="themes_pl") \
 152         + search.index.analyze(text=query, field="themes")
 153
 154             # change hints
 155     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 156     tags = split_tags(tags)
 157
 158     author_results = search.search_phrase(query, 'authors', book=True)
 159     translator_results = search.search_phrase(query, 'translators', book=True)
 160
 161     title_results = search.search_phrase(query, 'title', book=True)
 162
 163     # Boost main author/title results with mixed search, and save some of its results for end of list.
 164     # boost author, title results
 165     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
 166     author_title_rest = []
 167
 168     for b in author_title_mixed:
 169         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
 170         for b2 in also_in_mixed:
 171             b2.boost *= 1.1
 172         if also_in_mixed is []:
 173             author_title_rest.append(b)
 174
 175     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 176     # Because the query is using only one field.
 177     text_phrase = SearchResult.aggregate(
 178         search.search_phrase(query, 'text', snippets=True, book=False),
 179         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
 180
 181     everywhere = search.search_everywhere(query, query_terms=theme_terms)
 182
 183     def already_found(results):
 184         def f(e):
 185             for r in results:
 186                 if e.book_id == r.book_id:
 187                     e.boost = 0.9
 188                     results.append(e)
 189                     return True
 190             return False
 191         return f
 192     f = already_found(author_results + translator_results + title_results + text_phrase)
 193     everywhere = filter(lambda x: not f(x), everywhere)
 194
 195     author_results = SearchResult.aggregate(author_results)
 196     translator_results = SearchResult.aggregate(translator_results)
 197     title_results = SearchResult.aggregate(title_results)
 198
 199     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 200
 201     for field, res in [('authors', author_results),
 202                        ('translators', translator_results),
 203                        ('title', title_results),
 204                        ('text', text_phrase),
 205                        ('text', everywhere)]:
 206         res.sort(reverse=True)
 207         for r in res:
 208             search.get_snippets(r, query, field, 3)
 209
 210     suggestion = u''
 211
 212     def ensure_exists(r):
 213         try:
 214             return r.book
 215         except Book.DoesNotExist:
 216             return False
 217
 218     author_results = filter(ensure_exists, author_results)
 219     translator_results = filter(ensure_exists, translator_results)
 220     title_results = filter(ensure_exists, title_results)
 221     text_phrase = filter(ensure_exists, text_phrase)
 222     everywhere = filter(ensure_exists, everywhere)
 223
 224     results = author_results + translator_results + title_results + text_phrase + everywhere
 225     # ensure books do exists & sort them
 226     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
 227         res.sort(reverse=True)
 228
 229     # We don't want to redirect to book text, but rather display result page even with one result.
 230     # if len(results) == 1:
 231     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 232     #     if len(fragment_hits) == 1:
 233     #         #anchor = fragment_hits[0]['fragment']
 234     #         #frag = Fragment.objects.get(anchor=anchor)
 235     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 236     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
 237     if len(results) == 0:
 238         form = PublishingSuggestForm(initial={"books": query + ", "})
 239         return render_to_response('catalogue/search_no_hits.html',
 240                                   {'tags': tags,
 241                                    'prefix': query,
 242                                    "form": form,
 243                                    'did_you_mean': suggestion},
 244             context_instance=RequestContext(request))
 245
 246     return render_to_response('catalogue/search_multiple_hits.html',
 247                               {'tags': tags,
 248                                'prefix': query,
 249                                'results': {'author': author_results,
 250                                            'translator': translator_results,
 251                                            'title': title_results,
 252                                            'content': text_phrase,
 253                                            'other': everywhere},
 254                                'did_you_mean': suggestion},
 255         context_instance=RequestContext(request))