apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.shortcuts import render_to_response, get_object_or_404
   7 from django.template import RequestContext
   8 from django.views.decorators import cache
   9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
  10 from django.utils.translation import ugettext as _
  11
  12 from catalogue.utils import split_tags
  13 from catalogue.models import Book, Tag, Fragment
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  15 from search.index import Search, SearchResult
  16 from suggest.forms import PublishingSuggestForm
  17 import re
  18 #import enchant
  19 import json
  20
  21
  22 def match_word_re(word):
  23     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  24         return r"\b%s\b" % word
  25     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  26         return "[[:<:]]%s[[:>:]]" % word
  27
  28
  29 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  30
  31
  32 def remove_query_syntax_chars(query, replace=' '):
  33     return query_syntax_chars.sub(' ', query)
  34
  35
  36 def did_you_mean(query, tokens):
  37     return query
  38     # change = {}
  39     # for t in tokens:
  40     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  41     #     if len(authors) > 0:
  42     #         continue
  43
  44     #     if False:
  45     #         if not dictionary.check(t):
  46     #             try:
  47     #                 change_to = dictionary.suggest(t)[0].lower()
  48     #                 if change_to != t.lower():
  49     #                     change[t] = change_to
  50     #             except IndexError:
  51     #                 pass
  52
  53     # if change == {}:
  54     #     return None
  55
  56     # for frm, to in change.items():
  57     #     query = query.replace(frm, to)
  58
  59     # return query
  60
  61
  62 def hint(request):
  63     prefix = request.GET.get('term', '')
  64     if len(prefix) < 2:
  65         return JsonResponse([])
  66
  67     prefix = remove_query_syntax_chars(prefix)
  68
  69     search = Search()
  70     # tagi beda ograniczac tutaj
  71     # ale tagi moga byc na ksiazce i na fragmentach
  72     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  73     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  74
  75     tags = search.hint_tags(prefix, pdcounter=True)
  76     books = search.hint_books(prefix)
  77
  78     def is_dupe(tag):
  79         if isinstance(tag, PDCounterAuthor):
  80             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  81                 return True
  82         elif isinstance(tag, PDCounterBook):
  83             if filter(lambda b: b.slug == tag.slug, tags):
  84                 return True
  85         return False
  86
  87     tags = filter(lambda t: not is_dupe(t), tags)
  88
  89     def category_name(c):
  90         if c.startswith('pd_'):
  91             c = c[len('pd_'):]
  92         return _(c)
  93
  94     callback = request.GET.get('callback', None)
  95     data = [{'label': t.name,
  96               'category': category_name(t.category),
  97               'id': t.id,
  98               'url': t.get_absolute_url()}
  99               for t in tags] + \
 100               [{'label': b.title,
 101                 'category': _('book'),
 102                 'id': b.id,
 103                 'url': b.get_absolute_url()}
 104                 for b in books]
 105     if callback:
 106         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 107                             content_type="application/json; charset=utf-8")
 108     else:
 109         return JsonResponse(data)
 110
 111
 112 def main(request):
 113     results = {}
 114
 115     results = None
 116     query = None
 117
 118     query = request.GET.get('q', '')
 119
 120     if len(query) < 2:
 121         return render_to_response('catalogue/search_too_short.html',
 122                                   {'prefix': query},
 123             context_instance=RequestContext(request))
 124
 125     query = remove_query_syntax_chars(query)
 126
 127     search = Search()
 128
 129     theme_terms = search.index.analyze(text=query, field="themes_pl") \
 130         + search.index.analyze(text=query, field="themes")
 131
 132             # change hints
 133     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 134     tags = split_tags(tags)
 135
 136     author_results = search.search_phrase(query, 'authors', book=True)
 137     translator_results = search.search_phrase(query, 'translators', book=True)
 138
 139     title_results = search.search_phrase(query, 'title', book=True)
 140
 141     # Boost main author/title results with mixed search, and save some of its results for end of list.
 142     # boost author, title results
 143     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
 144     author_title_rest = []
 145
 146     for b in author_title_mixed:
 147         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
 148         for b2 in also_in_mixed:
 149             b2.boost *= 1.1
 150         if also_in_mixed is []:
 151             author_title_rest.append(b)
 152
 153     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 154     # Because the query is using only one field.
 155     text_phrase = SearchResult.aggregate(
 156         search.search_phrase(query, 'text', snippets=True, book=False),
 157         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
 158
 159     everywhere = search.search_everywhere(query, query_terms=theme_terms)
 160
 161     def already_found(results):
 162         def f(e):
 163             for r in results:
 164                 if e.book_id == r.book_id:
 165                     e.boost = 0.9
 166                     results.append(e)
 167                     return True
 168             return False
 169         return f
 170     f = already_found(author_results + translator_results + title_results + text_phrase)
 171     everywhere = filter(lambda x: not f(x), everywhere)
 172
 173     author_results = SearchResult.aggregate(author_results)
 174     translator_results = SearchResult.aggregate(translator_results)
 175     title_results = SearchResult.aggregate(title_results)
 176
 177     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 178
 179     for field, res in [('authors', author_results),
 180                        ('translators', translator_results),
 181                        ('title', title_results),
 182                        ('text', text_phrase),
 183                        ('text', everywhere)]:
 184         res.sort(reverse=True)
 185         for r in res:
 186             search.get_snippets(r, query, field, 3)
 187
 188     suggestion = u''
 189
 190     def ensure_exists(r):
 191         try:
 192             return r.book
 193         except Book.DoesNotExist:
 194             return False
 195
 196     author_results = filter(ensure_exists, author_results)
 197     translator_results = filter(ensure_exists, translator_results)
 198     title_results = filter(ensure_exists, title_results)
 199     text_phrase = filter(ensure_exists, text_phrase)
 200     everywhere = filter(ensure_exists, everywhere)
 201
 202     results = author_results + translator_results + title_results + text_phrase + everywhere
 203     # ensure books do exists & sort them
 204     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
 205         res.sort(reverse=True)
 206
 207     # We don't want to redirect to book text, but rather display result page even with one result.
 208     # if len(results) == 1:
 209     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 210     #     if len(fragment_hits) == 1:
 211     #         #anchor = fragment_hits[0]['fragment']
 212     #         #frag = Fragment.objects.get(anchor=anchor)
 213     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 214     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
 215     if len(results) == 0:
 216         form = PublishingSuggestForm(initial={"books": query + ", "})
 217         return render_to_response('catalogue/search_no_hits.html',
 218                                   {'tags': tags,
 219                                    'prefix': query,
 220                                    "form": form,
 221                                    'did_you_mean': suggestion},
 222             context_instance=RequestContext(request))
 223
 224     return render_to_response('catalogue/search_multiple_hits.html',
 225                               {'tags': tags,
 226                                'prefix': query,
 227                                'results': {'author': author_results,
 228                                            'translator': translator_results,
 229                                            'title': title_results,
 230                                            'content': text_phrase,
 231                                            'other': everywhere},
 232                                'did_you_mean': suggestion},
 233         context_instance=RequestContext(request))