1 # -*- coding: utf-8 -*-
 
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 
   5 from django.conf import settings
 
   6 from django.shortcuts import render_to_response, get_object_or_404
 
   7 from django.template import RequestContext
 
   8 from django.views.decorators import cache
 
   9 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
 
  10 from django.utils.translation import ugettext as _
 
  12 from catalogue.utils import split_tags
 
  13 from catalogue.models import Book, Tag, Fragment
 
  14 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
 
  15 from catalogue.views import JSONResponse
 
  16 from search import Search, SearchResult
 
  17 from suggest.forms import PublishingSuggestForm
 
  23 def match_word_re(word):
 
  24     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
 
  25         return r"\b%s\b" % word
 
  26     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
 
  27         return "[[:<:]]%s[[:>:]]" % word
 
  30 query_syntax_chars = re.compile(r"[\\/*:(){}]")
 
  33 def remove_query_syntax_chars(query, replace=' '):
 
  34     return query_syntax_chars.sub(' ', query)
 
  37 def did_you_mean(query, tokens):
 
  41     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
 
  42     #     if len(authors) > 0:
 
  46     #         if not dictionary.check(t):
 
  48     #                 change_to = dictionary.suggest(t)[0].lower()
 
  49     #                 if change_to != t.lower():
 
  50     #                     change[t] = change_to
 
  57     # for frm, to in change.items():
 
  58     #     query = query.replace(frm, to)
 
  64     prefix = request.GET.get('term', '')
 
  66         return JSONResponse([])
 
  68     prefix = remove_query_syntax_chars(prefix)
 
  71     # tagi beda ograniczac tutaj
 
  72     # ale tagi moga byc na ksiazce i na fragmentach
 
  73     # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
 
  74     # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
 
  76     tags = search.hint_tags(prefix, pdcounter=True)
 
  77     books = search.hint_books(prefix)
 
  80         if isinstance(tag, PDCounterAuthor):
 
  81             if filter(lambda t: t.slug == tag.slug and t != tag, tags):
 
  83         elif isinstance(tag, PDCounterBook):
 
  84             if filter(lambda b: b.slug == tag.slug, tags):
 
  88     tags = filter(lambda t: not is_dupe(t), tags)
 
  91         if c.startswith('pd_'):
 
  95     callback = request.GET.get('callback', None)
 
  96     data = [{'label': t.name,
 
  97               'category': category_name(t.category),
 
  99               'url': t.get_absolute_url()}
 
 102                 'category': _('book'),
 
 104                 'url': b.get_absolute_url()}
 
 107         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 
 108                             content_type="application/json; charset=utf-8")
 
 110         return JSONResponse(data)
 
 119     query = request.GET.get('q', '')
 
 122         return render_to_response('catalogue/search_too_short.html',
 
 124             context_instance=RequestContext(request))
 
 126     query = remove_query_syntax_chars(query)
 
 130     theme_terms = search.index.analyze(text=query, field="themes_pl") \
 
 131         + search.index.analyze(text=query, field="themes")
 
 134     tags = search.hint_tags(query, pdcounter=True, prefix=False)
 
 135     tags = split_tags(tags)
 
 137     author_results = search.search_phrase(query, 'authors', book=True)
 
 138     translator_results = search.search_phrase(query, 'translators', book=True)
 
 140     title_results = search.search_phrase(query, 'title', book=True)
 
 142     # Boost main author/title results with mixed search, and save some of its results for end of list.
 
 143     # boost author, title results
 
 144     author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
 
 145     author_title_rest = []
 
 147     for b in author_title_mixed:
 
 148         also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
 
 149         for b2 in also_in_mixed:
 
 151         if also_in_mixed is []:
 
 152             author_title_rest.append(b)
 
 154     # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
 
 155     # Because the query is using only one field.
 
 156     text_phrase = SearchResult.aggregate(
 
 157         search.search_phrase(query, 'text', snippets=True, book=False),
 
 158         search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
 
 160     everywhere = search.search_everywhere(query, query_terms=theme_terms)
 
 162     def already_found(results):
 
 165                 if e.book_id == r.book_id:
 
 171     f = already_found(author_results + translator_results + title_results + text_phrase)
 
 172     everywhere = filter(lambda x: not f(x), everywhere)
 
 174     author_results = SearchResult.aggregate(author_results)
 
 175     translator_results = SearchResult.aggregate(translator_results)
 
 176     title_results = SearchResult.aggregate(title_results)
 
 178     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 
 180     for field, res in [('authors', author_results),
 
 181                        ('translators', translator_results),
 
 182                        ('title', title_results),
 
 183                        ('text', text_phrase),
 
 184                        ('text', everywhere)]:
 
 185         res.sort(reverse=True)
 
 187             search.get_snippets(r, query, field, 3)
 
 191     def ensure_exists(r):
 
 194         except Book.DoesNotExist:
 
 197     author_results = filter(ensure_exists, author_results)
 
 198     translator_results = filter(ensure_exists, translator_results)
 
 199     title_results = filter(ensure_exists, title_results)
 
 200     text_phrase = filter(ensure_exists, text_phrase)
 
 201     everywhere = filter(ensure_exists, everywhere)
 
 203     results = author_results + translator_results + title_results + text_phrase + everywhere
 
 204     # ensure books do exists & sort them
 
 205     for res in (author_results, translator_results, title_results, text_phrase, everywhere):
 
 206         res.sort(reverse=True)
 
 208     # We don't want to redirect to book text, but rather display result page even with one result.
 
 209     # if len(results) == 1:
 
 210     #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
 
 211     #     if len(fragment_hits) == 1:
 
 212     #         #anchor = fragment_hits[0]['fragment']
 
 213     #         #frag = Fragment.objects.get(anchor=anchor)
 
 214     #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
 
 215     #     return HttpResponseRedirect(results[0].book.get_absolute_url())
 
 216     if len(results) == 0:
 
 217         form = PublishingSuggestForm(initial={"books": query + ", "})
 
 218         return render_to_response('catalogue/search_no_hits.html',
 
 222                                    'did_you_mean': suggestion},
 
 223             context_instance=RequestContext(request))
 
 225     return render_to_response('catalogue/search_multiple_hits.html',
 
 228                                'results': {'author': author_results,
 
 229                                            'translator': translator_results,
 
 230                                            'title': title_results,
 
 231                                            'content': text_phrase,
 
 232                                            'other': everywhere},
 
 233                                'did_you_mean': suggestion},
 
 234         context_instance=RequestContext(request))