apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.shortcuts import render_to_response, get_object_or_404
   7 from django.template import RequestContext
   8 from django.views.decorators import cache
   9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
  10 from django.utils.translation import ugettext as _
  11
  12 from catalogue.utils import split_tags
  13 from catalogue.models import Book, Tag, Fragment
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  15 from catalogue.views import JSONResponse
  16 from search import Search, SearchResult
  17 from suggest.forms import PublishingSuggestForm
  18 import re
  19 #import enchant
  20 import json
  21
  22
  23 def match_word_re(word):
  24     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  25         return r"\b%s\b" % word
  26     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  27         return "[[:<:]]%s[[:>:]]" % word
  28
  29
  30 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  31
  32
  33 def remove_query_syntax_chars(query, replace=' '):
  34     return query_syntax_chars.sub(' ', query)
  35
  36
  37 def did_you_mean(query, tokens):
  38     return query
  39     # change = {}
  40     # for t in tokens:
  41     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  42     #     if len(authors) > 0:
  43     #         continue
  44
  45     #     if False:
  46     #         if not dictionary.check(t):
  47     #             try:
  48     #                 change_to = dictionary.suggest(t)[0].lower()
  49     #                 if change_to != t.lower():
  50     #                     change[t] = change_to
  51     #             except IndexError:
  52     #                 pass
  53
  54     # if change == {}:
  55     #     return None
  56
  57     # for frm, to in change.items():
  58     #     query = query.replace(frm, to)
  59
  60     # return query
  61
  62
  63 def hint(request):
  64     prefix = request.GET.get('term', '')
  65     if len(prefix) < 2:
  66         return JSONResponse([])
  67
  68     prefix = remove_query_syntax_chars(prefix)
  69
  70     search = Search()
  71     # tagi beda ograniczac tutaj
  72     # ale tagi moga byc na ksiazce i na fragmentach
  73     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  74     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  75
  76     tags = search.hint_tags(prefix, pdcounter=True)
  77     books = search.hint_books(prefix)
  78
  79     def is_dupe(tag):
  80         if isinstance(tag, PDCounterAuthor):
  81             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  82                 return True
  83         elif isinstance(tag, PDCounterBook):
  84             if filter(lambda b: b.slug == tag.slug, tags):
  85                 return True
  86         return False
  87
  88     tags = filter(lambda t: not is_dupe(t), tags)
  89
  90     def category_name(c):
  91         if c.startswith('pd_'):
  92             c = c[len('pd_'):]
  93         return _(c)
  94
  95     callback = request.GET.get('callback', None)
  96     data = [{'label': t.name,
  97               'category': category_name(t.category),
  98               'id': t.id,
  99               'url': t.get_absolute_url()}
 100               for t in tags] + \
 101               [{'label': b.title,
 102                 'category': _('book'),
 103                 'id': b.id,
 104                 'url': b.get_absolute_url()}
 105                 for b in books]
 106     if callback:
 107         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 108                             content_type="application/json; charset=utf-8")
 109     else:
 110         return JSONResponse(data)
 111
 112
 113 def main(request):
 114     results = {}
 115
 116     results = None
 117     query = None
 118
 119     query = request.GET.get('q', '')
 120
 121     if len(query) < 2:
 122         return render_to_response('catalogue/search_too_short.html',
 123                                   {'prefix': query},
 124             context_instance=RequestContext(request))
 125
 126     query = remove_query_syntax_chars(query)
 127
 128     search = Search()
 129
 130     theme_terms = search.index.analyze(text=query, field="themes_pl") \
 131         + search.index.analyze(text=query, field="themes")
 132
 133             # change hints
 134     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 135     tags = split_tags(tags)
 136
 137     author_results = search.search_phrase(query, 'authors', book=True)
 138     translator_results = search.search_phrase(query, 'translators', book=True)
 139
 140     title_results = search.search_phrase(query, 'title', book=True)
 141
 142     # Boost main author/title results with mixed search, and save some of its results for end of list.
 143     # boost author, title results
 144     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
 145     author_title_rest = []
 146
 147     for b in author_title_mixed:
 148         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
 149         for b2 in also_in_mixed:
 150             b2.boost *= 1.1
 151         if also_in_mixed is []:
 152             author_title_rest.append(b)
 153
 154     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 155     # Because the query is using only one field.
 156     text_phrase = SearchResult.aggregate(
 157         search.search_phrase(query, 'text', snippets=True, book=False),
 158         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
 159
 160     everywhere = search.search_everywhere(query, query_terms=theme_terms)
 161
 162     def already_found(results):
 163         def f(e):
 164             for r in results:
 165                 if e.book_id == r.book_id:
 166                     e.boost = 0.9
 167                     results.append(e)
 168                     return True
 169             return False
 170         return f
 171     f = already_found(author_results + translator_results + title_results + text_phrase)
 172     everywhere = filter(lambda x: not f(x), everywhere)
 173
 174     author_results = SearchResult.aggregate(author_results)
 175     translator_results = SearchResult.aggregate(translator_results)
 176     title_results = SearchResult.aggregate(title_results)
 177
 178     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 179
 180     for field, res in [('authors', author_results),
 181                        ('translators', translator_results),
 182                        ('title', title_results),
 183                        ('text', text_phrase),
 184                        ('text', everywhere)]:
 185         res.sort(reverse=True)
 186         for r in res:
 187             search.get_snippets(r, query, field, 3)
 188
 189     suggestion = u''
 190
 191     def ensure_exists(r):
 192         try:
 193             return r.book
 194         except Book.DoesNotExist:
 195             return False
 196
 197     author_results = filter(ensure_exists, author_results)
 198     translator_results = filter(ensure_exists, translator_results)
 199     title_results = filter(ensure_exists, title_results)
 200     text_phrase = filter(ensure_exists, text_phrase)
 201     everywhere = filter(ensure_exists, everywhere)
 202
 203     results = author_results + translator_results + title_results + text_phrase + everywhere
 204     # ensure books do exists & sort them
 205     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
 206         res.sort(reverse=True)
 207
 208     # We don't want to redirect to book text, but rather display result page even with one result.
 209     # if len(results) == 1:
 210     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 211     #     if len(fragment_hits) == 1:
 212     #         #anchor = fragment_hits[0]['fragment']
 213     #         #frag = Fragment.objects.get(anchor=anchor)
 214     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 215     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
 216     if len(results) == 0:
 217         form = PublishingSuggestForm(initial={"books": query + ", "})
 218         return render_to_response('catalogue/search_no_hits.html',
 219                                   {'tags': tags,
 220                                    'prefix': query,
 221                                    "form": form,
 222                                    'did_you_mean': suggestion},
 223             context_instance=RequestContext(request))
 224
 225     return render_to_response('catalogue/search_multiple_hits.html',
 226                               {'tags': tags,
 227                                'prefix': query,
 228                                'results': {'author': author_results,
 229                                            'translator': translator_results,
 230                                            'title': title_results,
 231                                            'content': text_phrase,
 232                                            'other': everywhere},
 233                                'did_you_mean': suggestion},
 234         context_instance=RequestContext(request))