1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 
   4 from django.conf import settings
 
   5 from django.http.response import HttpResponseRedirect
 
   6 from django.shortcuts import render
 
   7 from django.views.decorators import cache
 
   8 from django.http import HttpResponse, JsonResponse
 
  10 from catalogue.models import Book, Tag
 
  11 from pdcounter.models import Author
 
  12 from picture.models import Picture
 
  13 from search.index import Search, SearchResult, PictureResult
 
  14 from suggest.forms import PublishingSuggestForm
 
  18 from wolnelektury.utils import re_escape
 
  21 def match_word_re(word):
 
  22     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
 
  23         return r"\b%s\b" % word
 
  24     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
 
  25         return "[[:<:]]%s[[:>:]]" % word
 
  28 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
 
  31 def remove_query_syntax_chars(query, replace=' '):
 
  32     return query_syntax_chars.sub(replace, query)
 
  35 def did_you_mean(query, tokens):
 
  39     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
 
  40     #     if len(authors) > 0:
 
  44     #         if not dictionary.check(t):
 
  46     #                 change_to = dictionary.suggest(t)[0].lower()
 
  47     #                 if change_to != t.lower():
 
  48     #                     change[t] = change_to
 
  55     # for frm, to in change.items():
 
  56     #     query = query.replace(frm, to)
 
  63     prefix = request.GET.get('term', '')
 
  65         return JsonResponse([], safe=False)
 
  67     prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
 
  70         limit = int(request.GET.get('max', ''))
 
  77     authors = Tag.objects.filter(
 
  78         category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
 
  83             'url': author.get_absolute_url(),
 
  85         for author in authors[:limit]
 
  91                 'author': b.author_unicode(),
 
  93                 'url': b.get_absolute_url()
 
  95             for b in Book.objects.filter(findable=True, title__iregex='\m' + prefix)[:limit-len(data)]
 
  97     callback = request.GET.get('callback', None)
 
  99         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 
 100                             content_type="application/json; charset=utf-8")
 
 102         return JsonResponse(data, safe=False)
 
 107     query = request.GET.get('q', '')
 
 110             request, 'catalogue/search_too_short.html',
 
 112     elif len(query) > 256:
 
 114             request, 'catalogue/search_too_long.html',
 
 117     query = prepare_query(query)
 
 118     pd_authors = search_pd_authors(query)
 
 119     books = search_books(query)
 
 120     pictures = search_pictures(query)
 
 123     if not (books or pictures or pd_authors):
 
 124         form = PublishingSuggestForm(initial={"books": query + ", "})
 
 127             'catalogue/search_no_hits.html',
 
 130                 'did_you_mean': suggestion
 
 133     if not (books or pictures) and len(pd_authors) == 1:
 
 134         return HttpResponseRedirect(pd_authors[0].get_absolute_url())
 
 138         'catalogue/search_multiple_hits.html',
 
 140             'pd_authors': pd_authors,
 
 142             'pictures': pictures,
 
 143             'did_you_mean': suggestion
 
 146 def search_books(query):
 
 150     words = query.split()
 
 152         (['authors', 'authors_nonstem'], True),
 
 153         (['title', 'title_nonstem'], True),
 
 154         (['metadata', 'metadata_nonstem'], True),
 
 155         (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
 
 157     for fields, is_book in fieldsets:
 
 158         search_fields += fields
 
 159         results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
 
 162     for results_part in results_parts:
 
 163         for result in sorted(SearchResult.aggregate(results_part), reverse=True):
 
 164             book_id = result.book_id
 
 165             if book_id in ids_results:
 
 166                 ids_results[book_id].merge(result)
 
 168                 results.append(result)
 
 169                 ids_results[book_id] = result
 
 170     descendant_ids = set(
 
 171         Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
 
 172     results = [result for result in results if result.book_id not in descendant_ids]
 
 173     for result in results:
 
 174         search.get_snippets(result, query, num=3)
 
 176     def ensure_exists(r):
 
 179         except Book.DoesNotExist:
 
 182     results = [r for r in results if ensure_exists(r)]
 
 186 def search_pictures(query):
 
 190     words = query.split()
 
 192         (['authors', 'authors_nonstem'], True),
 
 193         (['title', 'title_nonstem'], True),
 
 194         (['metadata', 'metadata_nonstem'], True),
 
 195         (['themes_pl', 'themes_pl_nonstem'], False),
 
 197     for fields, is_book in fieldsets:
 
 198         search_fields += fields
 
 199         results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
 
 202     for results_part in results_parts:
 
 203         for result in sorted(PictureResult.aggregate(results_part), reverse=True):
 
 204             picture_id = result.picture_id
 
 205             if picture_id in ids_results:
 
 206                 ids_results[picture_id].merge(result)
 
 208                 results.append(result)
 
 209                 ids_results[picture_id] = result
 
 211     def ensure_exists(r):
 
 214         except Picture.DoesNotExist:
 
 217     results = [r for r in results if ensure_exists(r)]
 
 221 def search_pd_authors(query):
 
 222     pd_authors = Author.objects.filter(name__icontains=query)
 
 223     existing_slugs = Tag.objects.filter(
 
 224         category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
 
 225         .values_list('slug', flat=True)
 
 226     pd_authors = pd_authors.exclude(slug__in=existing_slugs)
 
 230 def prepare_query(query):
 
 231     query = ' '.join(query.split())
 
 232     # filter out private use characters
 
 234     query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
 
 235     query = remove_query_syntax_chars(query)
 
 237     words = query.split()
 
 239         query = ' '.join(words[:10])