1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 
   4 from django.conf import settings
 
   5 from django.http.response import HttpResponseRedirect
 
   6 from django.shortcuts import render
 
   7 from django.views.decorators import cache
 
   8 from django.http import HttpResponse, JsonResponse
 
  10 from catalogue.models import Book, Tag
 
  11 from pdcounter.models import Author
 
  12 from picture.models import Picture
 
  13 from search.index import Search, SearchResult, PictureResult
 
  14 from .forms import SearchFilters
 
  15 from suggest.forms import PublishingSuggestForm
 
  19 from wolnelektury.utils import re_escape
 
  22 def match_word_re(word):
 
  23     if 'sqlite' in settings.DATABASES['default']['ENGINE']:
 
  24         return r"\b%s\b" % word
 
  25     elif 'mysql' in settings.DATABASES['default']['ENGINE']:
 
  26         return "[[:<:]]%s[[:>:]]" % word
 
  29 query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
 
  32 def remove_query_syntax_chars(query, replace=' '):
 
  33     return query_syntax_chars.sub(replace, query)
 
  36 def did_you_mean(query, tokens):
 
  40     #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
 
  41     #     if len(authors) > 0:
 
  45     #         if not dictionary.check(t):
 
  47     #                 change_to = dictionary.suggest(t)[0].lower()
 
  48     #                 if change_to != t.lower():
 
  49     #                     change[t] = change_to
 
  56     # for frm, to in change.items():
 
  57     #     query = query.replace(frm, to)
 
  63 def hint(request, mozhint=False, param='term'):
 
  64     prefix = request.GET.get(param, '')
 
  66         return JsonResponse([], safe=False)
 
  68     prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
 
  71         limit = int(request.GET.get('max', ''))
 
  78     authors = Tag.objects.filter(
 
  79         category='author', name_pl__iregex='\m' + prefix).only('name', 'id', 'slug', 'category')
 
  84             'url': author.get_absolute_url(),
 
  86         for author in authors[:limit]
 
  89         for b in Book.objects.filter(findable=True, title__iregex='\m' + prefix)[:limit-len(data)]:
 
  90             author_str = b.author_unicode()
 
  91             translator = b.translator()
 
  93                 author_str += ' (tłum. ' + translator + ')'
 
  99                     'url': b.get_absolute_url()
 
 112     callback = request.GET.get('callback', None)
 
 114         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
 
 115                             content_type="application/json; charset=utf-8")
 
 117         return JsonResponse(data, safe=False)
 
 123     filters = SearchFilters(request.GET)
 
 125         'title': 'Wynik wyszukiwania',
 
 126         'query': request.GET.get('q', ''),
 
 129     if filters.is_valid():
 
 130         ctx['results'] = filters.results()
 
 131         for k, v in ctx['results'].items():
 
 133                 ctx['hasresults'] = True
 
 135     return render(request, 'search/results.html', ctx)
 
 140     if request.EXPERIMENTS['layout'].value:
 
 141         return search(request)
 
 143     query = request.GET.get('q', '')
 
 145     format = request.GET.get('format')
 
 146     lang = request.GET.get('lang')
 
 147     epoch = request.GET.get('epoch')
 
 148     kind = request.GET.get('kind')
 
 149     genre = request.GET.get('genre')
 
 153             request, 'catalogue/search_too_short.html',
 
 155     elif len(query) > 256:
 
 157             request, 'catalogue/search_too_long.html',
 
 160     query = prepare_query(query)
 
 161     if not (format or lang or epoch or kind or genre):
 
 162         pd_authors = search_pd_authors(query)
 
 165     if not format or format != 'obraz':
 
 166         books = search_books(
 
 169             only_audio=format=='audio',
 
 170             only_synchro=format=='synchro',
 
 177     if (not format or format == 'obraz') and not lang:
 
 178         pictures = search_pictures(
 
 189     if not (books or pictures or pd_authors):
 
 190         form = PublishingSuggestForm(initial={"books": query + ", "})
 
 193             'catalogue/search_no_hits.html',
 
 196                 'did_you_mean': suggestion
 
 199     if not (books or pictures) and len(pd_authors) == 1:
 
 200         return HttpResponseRedirect(pd_authors[0].get_absolute_url())
 
 204         'catalogue/search_multiple_hits.html',
 
 206             'pd_authors': pd_authors,
 
 208             'pictures': pictures,
 
 209             'did_you_mean': suggestion,
 
 218                 'epoch': Tag.objects.filter(category='epoch', for_books=True),
 
 219                 'genre': Tag.objects.filter(category='genre', for_books=True),
 
 220                 'kind': Tag.objects.filter(category='kind', for_books=True),
 
 224 def search_books(query, lang=None, only_audio=False, only_synchro=False, epoch=None, kind=None, genre=None):
 
 228     words = query.split()
 
 230         (['authors', 'authors_nonstem'], True),
 
 231         (['title', 'title_nonstem'], True),
 
 232         (['metadata', 'metadata_nonstem'], True),
 
 233         (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
 
 235     for fields, is_book in fieldsets:
 
 236         search_fields += fields
 
 237         results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
 
 240     for results_part in results_parts:
 
 241         for result in sorted(SearchResult.aggregate(results_part), reverse=True):
 
 242             book_id = result.book_id
 
 243             if book_id in ids_results:
 
 244                 ids_results[book_id].merge(result)
 
 246                 results.append(result)
 
 247                 ids_results[book_id] = result
 
 248     descendant_ids = set(
 
 249         Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
 
 250     results = [result for result in results if result.book_id not in descendant_ids]
 
 251     for result in results:
 
 252         search.get_snippets(result, query, num=3)
 
 254     def ensure_exists(r):
 
 258         except Book.DoesNotExist:
 
 261         if lang and r.book.language != lang:
 
 263         if only_audio and not r.book.has_mp3_file():
 
 265         if only_synchro and not r.book.has_daisy_file():
 
 267         if epoch and not r.book.tags.filter(category='epoch', slug=epoch).exists():
 
 269         if kind and not r.book.tags.filter(category='kind', slug=kind).exists():
 
 271         if genre and not r.book.tags.filter(category='genre', slug=genre).exists():
 
 276     results = [r for r in results if ensure_exists(r)]
 
 280 def search_pictures(query, epoch=None, kind=None, genre=None):
 
 284     words = query.split()
 
 286         (['authors', 'authors_nonstem'], True),
 
 287         (['title', 'title_nonstem'], True),
 
 288         (['metadata', 'metadata_nonstem'], True),
 
 289         (['themes_pl', 'themes_pl_nonstem'], False),
 
 291     for fields, is_book in fieldsets:
 
 292         search_fields += fields
 
 293         results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
 
 296     for results_part in results_parts:
 
 297         for result in sorted(PictureResult.aggregate(results_part), reverse=True):
 
 298             picture_id = result.picture_id
 
 299             if picture_id in ids_results:
 
 300                 ids_results[picture_id].merge(result)
 
 302                 results.append(result)
 
 303                 ids_results[picture_id] = result
 
 305     def ensure_exists(r):
 
 309         except Picture.DoesNotExist:
 
 312         if epoch and not r.picture.tags.filter(category='epoch', slug=epoch).exists():
 
 314         if kind and not r.picture.tags.filter(category='kind', slug=kind).exists():
 
 316         if genre and not r.picture.tags.filter(category='genre', slug=genre).exists():
 
 321     results = [r for r in results if ensure_exists(r)]
 
 325 def search_pd_authors(query):
 
 326     pd_authors = Author.objects.filter(name__icontains=query)
 
 327     existing_slugs = Tag.objects.filter(
 
 328         category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
 
 329         .values_list('slug', flat=True)
 
 330     pd_authors = pd_authors.exclude(slug__in=existing_slugs)
 
 334 def prepare_query(query):
 
 335     query = ' '.join(query.split())
 
 336     # filter out private use characters
 
 338     query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
 
 339     query = remove_query_syntax_chars(query)
 
 341     words = query.split()
 
 343         query = ' '.join(words[:10])