apps/search/views.py

   1 # -*- coding: utf-8 -*-
   2
   3 from django.conf import settings
   4 from django.shortcuts import render_to_response, get_object_or_404
   5 from django.template import RequestContext
   6 from django.contrib.auth.decorators import login_required
   7 from django.views.decorators import cache
   8 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
   9 from django.utils.translation import ugettext as _
  10
  11 from catalogue.utils import split_tags
  12 from catalogue.models import Book, Tag, Fragment
  13 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  14 from catalogue.views import JSONResponse
  15 from search import Search, SearchResult
  16 from lucene import StringReader
  17 from suggest.forms import PublishingSuggestForm
  18 from time import sleep
  19 import re
  20 #import enchant
  21 import json
  22
  23
  24 def match_word_re(word):
  25     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
  26         return r"\b%s\b" % word
  27     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
  28         return "[[:<:]]%s[[:>:]]" % word
  29
  30
  31 def did_you_mean(query, tokens):
  32     return query
  33     # change = {}
  34     # for t in tokens:
  35     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
  36     #     if len(authors) > 0:
  37     #         continue
  38
  39     #     if False:
  40     #         if not dictionary.check(t):
  41     #             try:
  42     #                 change_to = dictionary.suggest(t)[0].lower()
  43     #                 if change_to != t.lower():
  44     #                     change[t] = change_to
  45     #             except IndexError:
  46     #                 pass
  47
  48     # if change == {}:
  49     #     return None
  50
  51     # for frm, to in change.items():
  52     #     query = query.replace(frm, to)
  53
  54     # return query
  55
  56
  57 def hint(request):
  58     prefix = request.GET.get('term', '')
  59     if len(prefix) < 2:
  60         return JSONResponse([])
  61
  62     search = Search()
  63     # tagi beda ograniczac tutaj
  64     # ale tagi moga byc na ksiazce i na fragmentach
  65     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
  66     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
  67
  68     tags = search.hint_tags(prefix, pdcounter=True)
  69     books = search.hint_books(prefix)
  70
  71     def is_dupe(tag):
  72         if isinstance(tag, PDCounterAuthor):
  73             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
  74                 return True
  75         elif isinstance(tag, PDCounterBook):
  76             if filter(lambda b: b.slug == tag.slug, tags):
  77                 return True
  78         return False
  79
  80     tags = filter(lambda t: not is_dupe(t), tags)
  81
  82     def category_name(c):
  83         if c.startswith('pd_'):
  84             c = c[len('pd_'):]
  85         return _(c)
  86
  87     callback = request.GET.get('callback', None)
  88     data = [{'label': t.name,
  89               'category': category_name(t.category),
  90               'id': t.id,
  91               'url': t.get_absolute_url()}
  92               for t in tags] + \
  93               [{'label': b.title,
  94                 'category': _('book'),
  95                 'id': b.id,
  96                 'url': b.get_absolute_url()}
  97                 for b in books]
  98     if callback:
  99         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 100                             content_type="application/json; charset=utf-8")
 101     else:
 102         return JSONResponse(data)
 103
 104
 105 def main(request):
 106     results = {}
 107
 108     results = None
 109     query = None
 110
 111     query = request.GET.get('q', '')
 112
 113     if len(query) < 2:
 114         return render_to_response('catalogue/search_too_short.html',
 115                                   {'prefix': query},
 116             context_instance=RequestContext(request))
 117     search = Search()
 118
 119     theme_terms = search.index.analyze(text=query, field="themes_pl") \
 120         + search.index.analyze(text=query, field="themes")
 121
 122             # change hints
 123     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 124     tags = split_tags(tags)
 125
 126     author_results = search.search_phrase(query, 'authors', book=True)
 127     title_results = search.search_phrase(query, 'title', book=True)
 128
 129     # Boost main author/title results with mixed search, and save some of its results for end of list.
 130     # boost author, title results
 131     author_title_mixed = search.search_some(query, ['authors', 'title', 'tags'], query_terms=theme_terms)
 132     author_title_rest = []
 133
 134     for b in author_title_mixed:
 135         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
 136         for b2 in also_in_mixed:
 137             b2.boost *= 1.1
 138         if also_in_mixed is []:
 139             author_title_rest.append(b)
 140
 141     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 142     # Because the query is using only one field.
 143     text_phrase = SearchResult.aggregate(
 144         search.search_phrase(query, 'text', snippets=True, book=False),
 145         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
 146
 147     everywhere = search.search_everywhere(query, query_terms=theme_terms)
 148
 149     def already_found(results):
 150         def f(e):
 151             for r in results:
 152                 if e.book_id == r.book_id:
 153                     e.boost = 0.9
 154                     results.append(e)
 155                     return True
 156             return False
 157         return f
 158     f = already_found(author_results + title_results + text_phrase)
 159     everywhere = filter(lambda x: not f(x), everywhere)
 160
 161     author_results = SearchResult.aggregate(author_results)
 162     title_results = SearchResult.aggregate(title_results)
 163
 164     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 165
 166     for field, res in [('authors', author_results),
 167                        ('title', title_results),
 168                        ('text', text_phrase),
 169                        ('text', everywhere)]:
 170         res.sort(reverse=True)
 171         print "get snips %s, res size %d" % (field, len(res))
 172         for r in res:
 173             print "Get snippets for %s" % r
 174             search.get_snippets(r, query, field, 3)
 175         # for r in res:
 176         #     for h in r.hits:
 177         #         h['snippets'] = map(lambda s:
 178         #                             re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
 179         #                                     re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
 180
 181     # suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
 182     suggestion = u''
 183
 184     def ensure_exists(r):
 185         try:
 186             return r.book
 187         except Book.DoesNotExist:
 188             return False
 189
 190     author_results = filter(ensure_exists, author_results)
 191     title_results = filter(ensure_exists, title_results)
 192     text_phrase = filter(ensure_exists, text_phrase)
 193     everywhere = filter(ensure_exists, everywhere)
 194
 195     results = author_results + title_results + text_phrase + everywhere
 196     # ensure books do exists & sort them
 197     results.sort(reverse=True)
 198
 199     if len(results) == 1:
 200         fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 201         if len(fragment_hits) == 1:
 202             #anchor = fragment_hits[0]['fragment']
 203             #frag = Fragment.objects.get(anchor=anchor)
 204             return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 205         return HttpResponseRedirect(results[0].book.get_absolute_url())
 206     elif len(results) == 0:
 207         form = PublishingSuggestForm(initial={"books": query + ", "})
 208         return render_to_response('catalogue/search_no_hits.html',
 209                                   {'tags': tags,
 210                                    'prefix': query,
 211                                    "form": form,
 212                                    'did_you_mean': suggestion},
 213             context_instance=RequestContext(request))
 214
 215     return render_to_response('catalogue/search_multiple_hits.html',
 216                               {'tags': tags,
 217                                'prefix': query,
 218                                'results': {'author': author_results,
 219                                            'title': title_results,
 220                                            'content': text_phrase,
 221                                            'other': everywhere},
 222                                'did_you_mean': suggestion},
 223         context_instance=RequestContext(request))