apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.views.decorators import cache
   7 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   8 from django.utils.translation import ugettext as _
   9
  10 from catalogue.utils import split_tags
  11 from catalogue.models import Book, Tag, Fragment
  12 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  13 from catalogue.views import JSONResponse
  14 from search import Search, SearchResult
  15 from suggest.forms import PublishingSuggestForm
  16 import re
  17 #import enchant
  18 import json
  19
  20
  21 def match_word_re(word):
  22     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  23         return r"\b%s\b" % word
  24     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  25         return "[[:<:]]%s[[:>:]]" % word
  26
  27
  28 query_syntax_chars = re.compile(r"[\\/*:(){}]")
  29
  30
  31 def remove_query_syntax_chars(query, replace=' '):
  32     return query_syntax_chars.sub(' ', query)
  33
  34
  35 def did_you_mean(query, tokens):
  36     return query
  37     # change = {}
  38     # for t in tokens:
  39     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  40     #     if len(authors) > 0:
  41     #         continue
  42
  43     #     if False:
  44     #         if not dictionary.check(t):
  45     #             try:
  46     #                 change_to = dictionary.suggest(t)[0].lower()
  47     #                 if change_to != t.lower():
  48     #                     change[t] = change_to
  49     #             except IndexError:
  50     #                 pass
  51
  52     # if change == {}:
  53     #     return None
  54
  55     # for frm, to in change.items():
  56     #     query = query.replace(frm, to)
  57
  58     # return query
  59
  60
  61 def hint(request):
  62     prefix = request.GET.get('term', '')
  63     if len(prefix) < 2:
  64         return JSONResponse([])
  65
  66     prefix = remove_query_syntax_chars(prefix)
  67
  68     search = Search()
  69     # tagi beda ograniczac tutaj
  70     # ale tagi moga byc na ksiazce i na fragmentach
  71     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  72     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  73
  74     tags = search.hint_tags(prefix, pdcounter=True)
  75     books = search.hint_books(prefix)
  76
  77     def is_dupe(tag):
  78         if isinstance(tag, PDCounterAuthor):
  79             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  80                 return True
  81         elif isinstance(tag, PDCounterBook):
  82             if filter(lambda b: b.slug == tag.slug, tags):
  83                 return True
  84         return False
  85
  86     tags = filter(lambda t: not is_dupe(t), tags)
  87
  88     def category_name(c):
  89         if c.startswith('pd_'):
  90             c = c[len('pd_'):]
  91         return _(c)
  92
  93     callback = request.GET.get('callback', None)
  94     data = [{'label': t.name,
  95               'category': category_name(t.category),
  96               'id': t.id,
  97               'url': t.get_absolute_url()}
  98               for t in tags] + \
  99               [{'label': b.title,
 100                 'category': _('book'),
 101                 'id': b.id,
 102                 'url': b.get_absolute_url()}
 103                 for b in books]
 104     if callback:
 105         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 106                             content_type="application/json; charset=utf-8")
 107     else:
 108         return JSONResponse(data)
 109
 110
 111 def main(request):
 112     results = {}
 113
 114     results = None
 115     query = None
 116
 117     query = request.GET.get('q', '')
 118
 119     if len(query) < 2:
 120         return render_to_response('catalogue/search_too_short.html',
 121                                   {'prefix': query},
 122             context_instance=RequestContext(request))
 123
 124     query = remove_query_syntax_chars(query)
 125
 126     search = Search()
 127
 128     theme_terms = search.index.analyze(text=query, field="themes_pl") \
 129         + search.index.analyze(text=query, field="themes")
 130
 131             # change hints
 132     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 133     tags = split_tags(tags)
 134
 135     author_results = search.search_phrase(query, 'authors', book=True)
 136     translator_results = search.search_phrase(query, 'translators', book=True)
 137
 138     title_results = search.search_phrase(query, 'title', book=True)
 139
 140     # Boost main author/title results with mixed search, and save some of its results for end of list.
 141     # boost author, title results
 142     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
 143     author_title_rest = []
 144
 145     for b in author_title_mixed:
 146         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
 147         for b2 in also_in_mixed:
 148             b2.boost *= 1.1
 149         if also_in_mixed is []:
 150             author_title_rest.append(b)
 151
 152     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 153     # Because the query is using only one field.
 154     text_phrase = SearchResult.aggregate(
 155         search.search_phrase(query, 'text', snippets=True, book=False),
 156         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
 157
 158     everywhere = search.search_everywhere(query, query_terms=theme_terms)
 159
 160     def already_found(results):
 161         def f(e):
 162             for r in results:
 163                 if e.book_id == r.book_id:
 164                     e.boost = 0.9
 165                     results.append(e)
 166                     return True
 167             return False
 168         return f
 169     f = already_found(author_results + translator_results + title_results + text_phrase)
 170     everywhere = filter(lambda x: not f(x), everywhere)
 171
 172     author_results = SearchResult.aggregate(author_results)
 173     translator_results = SearchResult.aggregate(translator_results)
 174     title_results = SearchResult.aggregate(title_results)
 175
 176     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 177
 178     for field, res in [('authors', author_results),
 179                        ('translators', translator_results),
 180                        ('title', title_results),
 181                        ('text', text_phrase),
 182                        ('text', everywhere)]:
 183         res.sort(reverse=True)
 184         for r in res:
 185             search.get_snippets(r, query, field, 3)
 186
 187     suggestion = u''
 188
 189     def ensure_exists(r):
 190         try:
 191             return r.book
 192         except Book.DoesNotExist:
 193             return False
 194
 195     author_results = filter(ensure_exists, author_results)
 196     translator_results = filter(ensure_exists, translator_results)
 197     title_results = filter(ensure_exists, title_results)
 198     text_phrase = filter(ensure_exists, text_phrase)
 199     everywhere = filter(ensure_exists, everywhere)
 200
 201     results = author_results + translator_results + title_results + text_phrase + everywhere
 202     # ensure books do exists & sort them
 203     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
 204         res.sort(reverse=True)
 205
 206     # We don't want to redirect to book text, but rather display result page even with one result.
 207     # if len(results) == 1:
 208     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 209     #     if len(fragment_hits) == 1:
 210     #         #anchor = fragment_hits[0]['fragment']
 211     #         #frag = Fragment.objects.get(anchor=anchor)
 212     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 213     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
 214     if len(results) == 0:
 215         form = PublishingSuggestForm(initial={"books": query + ", "})
 216         return render_to_response('catalogue/search_no_hits.html',
 217                                   {'tags': tags,
 218                                    'prefix': query,
 219                                    "form": form,
 220                                    'did_you_mean': suggestion},
 221             context_instance=RequestContext(request))
 222
 223     return render_to_response('catalogue/search_multiple_hits.html',
 224                               {'tags': tags,
 225                                'prefix': query,
 226                                'results': {'author': author_results,
 227                                            'translator': translator_results,
 228                                            'title': title_results,
 229                                            'content': text_phrase,
 230                                            'other': everywhere},
 231                                'did_you_mean': suggestion},
 232         context_instance=RequestContext(request))