apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.shortcuts import render_to_response, get_object_or_404
   7 from django.template import RequestContext
   8 from django.views.decorators import cache
   9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
  10 from django.utils.translation import ugettext as _
  11
  12 from catalogue.utils import split_tags
  13 from catalogue.models import Book, Tag, Fragment
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  15 from search.index import Search, SearchResult
  16 from suggest.forms import PublishingSuggestForm
  17 import re
  18 #import enchant
  19 import json
  20
  21
  22 def match_word_re(word):
  23     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  24         return r"\b%s\b" % word
  25     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  26         return "[[:<:]]%s[[:>:]]" % word
  27
  28
  29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  30
  31
  32 def remove_query_syntax_chars(query, replace=' '):
  33     return query_syntax_chars.sub(' ', query)
  34
  35
  36 def did_you_mean(query, tokens):
  37     return query
  38     # change = {}
  39     # for t in tokens:
  40     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  41     #     if len(authors) > 0:
  42     #         continue
  43
  44     #     if False:
  45     #         if not dictionary.check(t):
  46     #             try:
  47     #                 change_to = dictionary.suggest(t)[0].lower()
  48     #                 if change_to != t.lower():
  49     #                     change[t] = change_to
  50     #             except IndexError:
  51     #                 pass
  52
  53     # if change == {}:
  54     #     return None
  55
  56     # for frm, to in change.items():
  57     #     query = query.replace(frm, to)
  58
  59     # return query
  60
  61
  62 @cache.never_cache
  63 def hint(request):
  64     prefix = request.GET.get('term', '')
  65     if len(prefix) < 2:
  66         return JsonResponse([], safe=False)
  67
  68     prefix = remove_query_syntax_chars(prefix)
  69
  70     search = Search()
  71     # tagi beda ograniczac tutaj
  72     # ale tagi moga byc na ksiazce i na fragmentach
  73     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  74     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  75
  76     tags = search.hint_tags(prefix, pdcounter=True)
  77     books = search.hint_books(prefix)
  78
  79     def is_dupe(tag):
  80         if isinstance(tag, PDCounterAuthor):
  81             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  82                 return True
  83         elif isinstance(tag, PDCounterBook):
  84             if filter(lambda b: b.slug == tag.slug, tags):
  85                 return True
  86         return False
  87
  88     tags = filter(lambda t: not is_dupe(t), tags)
  89
  90     def category_name(c):
  91         if c.startswith('pd_'):
  92             c = c[len('pd_'):]
  93         return _(c)
  94
  95     callback = request.GET.get('callback', None)
  96     data = [{'label': t.name,
  97               'category': category_name(t.category),
  98               'id': t.id,
  99               'url': t.get_absolute_url()}
 100               for t in tags] + \
 101               [{'label': b.title,
 102                 'category': _('book'),
 103                 'id': b.id,
 104                 'url': b.get_absolute_url()}
 105                 for b in books]
 106     if callback:
 107         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 108                             content_type="application/json; charset=utf-8")
 109     else:
 110         return JsonResponse(data, safe=False)
 111
 112
 113 @cache.never_cache
 114 def main(request):
 115     results = {}
 116
 117     results = None
 118     query = None
 119
 120     query = request.GET.get('q', '')
 121
 122     if len(query) < 2:
 123         return render_to_response('catalogue/search_too_short.html',
 124                                   {'prefix': query},
 125             context_instance=RequestContext(request))
 126
 127     query = remove_query_syntax_chars(query)
 128
 129     search = Search()
 130
 131     theme_terms = search.index.analyze(text=query, field="themes_pl") \
 132         + search.index.analyze(text=query, field="themes")
 133
 134             # change hints
 135     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 136     tags = split_tags(tags)
 137
 138     author_results = search.search_phrase(query, 'authors', book=True)
 139     translator_results = search.search_phrase(query, 'translators', book=True)
 140
 141     title_results = search.search_phrase(query, 'title', book=True)
 142
 143     # Boost main author/title results with mixed search, and save some of its results for end of list.
 144     # boost author, title results
 145     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
 146     author_title_rest = []
 147
 148     for b in author_title_mixed:
 149         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
 150         for b2 in also_in_mixed:
 151             b2.boost *= 1.1
 152         if also_in_mixed is []:
 153             author_title_rest.append(b)
 154
 155     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 156     # Because the query is using only one field.
 157     text_phrase = SearchResult.aggregate(
 158         search.search_phrase(query, 'text', snippets=True, book=False),
 159         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
 160
 161     everywhere = search.search_everywhere(query, query_terms=theme_terms)
 162
 163     def already_found(results):
 164         def f(e):
 165             for r in results:
 166                 if e.book_id == r.book_id:
 167                     e.boost = 0.9
 168                     results.append(e)
 169                     return True
 170             return False
 171         return f
 172     f = already_found(author_results + translator_results + title_results + text_phrase)
 173     everywhere = filter(lambda x: not f(x), everywhere)
 174
 175     author_results = SearchResult.aggregate(author_results)
 176     translator_results = SearchResult.aggregate(translator_results)
 177     title_results = SearchResult.aggregate(title_results)
 178
 179     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 180
 181     for field, res in [('authors', author_results),
 182                        ('translators', translator_results),
 183                        ('title', title_results),
 184                        ('text', text_phrase),
 185                        ('text', everywhere)]:
 186         res.sort(reverse=True)
 187         for r in res:
 188             search.get_snippets(r, query, field, 3)
 189
 190     suggestion = u''
 191
 192     def ensure_exists(r):
 193         try:
 194             return r.book
 195         except Book.DoesNotExist:
 196             return False
 197
 198     author_results = filter(ensure_exists, author_results)
 199     translator_results = filter(ensure_exists, translator_results)
 200     title_results = filter(ensure_exists, title_results)
 201     text_phrase = filter(ensure_exists, text_phrase)
 202     everywhere = filter(ensure_exists, everywhere)
 203
 204     results = author_results + translator_results + title_results + text_phrase + everywhere
 205     # ensure books do exists & sort them
 206     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
 207         res.sort(reverse=True)
 208
 209     # We don't want to redirect to book text, but rather display result page even with one result.
 210     # if len(results) == 1:
 211     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 212     #     if len(fragment_hits) == 1:
 213     #         #anchor = fragment_hits[0]['fragment']
 214     #         #frag = Fragment.objects.get(anchor=anchor)
 215     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 216     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
 217     if len(results) == 0:
 218         form = PublishingSuggestForm(initial={"books": query + ", "})
 219         return render_to_response('catalogue/search_no_hits.html',
 220                                   {'tags': tags,
 221                                    'prefix': query,
 222                                    "form": form,
 223                                    'did_you_mean': suggestion},
 224             context_instance=RequestContext(request))
 225
 226     return render_to_response('catalogue/search_multiple_hits.html',
 227                               {'tags': tags,
 228                                'prefix': query,
 229                                'results': {'author': author_results,
 230                                            'translator': translator_results,
 231                                            'title': title_results,
 232                                            'content': text_phrase,
 233                                            'other': everywhere},
 234                                'did_you_mean': suggestion},
 235         context_instance=RequestContext(request))