Fixes and experiments.
[wolnelektury.git] / src / search / views.py
index f7f6040..5acbffa 100644 (file)
@@ -1,17 +1,16 @@
-# -*- coding: utf-8 -*-
 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from django.conf import settings
 from django.http.response import HttpResponseRedirect
 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from django.conf import settings
 from django.http.response import HttpResponseRedirect
-from django.shortcuts import render_to_response
-from django.template import RequestContext
+from django.shortcuts import render
 from django.views.decorators import cache
 from django.http import HttpResponse, JsonResponse
 
 from catalogue.models import Book, Tag
 from pdcounter.models import Author
 from django.views.decorators import cache
 from django.http import HttpResponse, JsonResponse
 
 from catalogue.models import Book, Tag
 from pdcounter.models import Author
-from search.index import Search, SearchResult
+from picture.models import Picture
+from search.index import Search, SearchResult, PictureResult
 from suggest.forms import PublishingSuggestForm
 import re
 import json
 from suggest.forms import PublishingSuggestForm
 import re
 import json
@@ -26,7 +25,7 @@ def match_word_re(word):
         return "[[:<:]]%s[[:>:]]" % word
 
 
         return "[[:<:]]%s[[:>:]]" % word
 
 
-query_syntax_chars = re.compile(r"[\\/*:(){}]")
+query_syntax_chars = re.compile(r"[\\/*:(){}?.[\]+]")
 
 
 def remove_query_syntax_chars(query, replace=' '):
 
 
 def remove_query_syntax_chars(query, replace=' '):
@@ -60,8 +59,8 @@ def did_you_mean(query, tokens):
 
 
 @cache.never_cache
 
 
 @cache.never_cache
-def hint(request):
-    prefix = request.GET.get('term', '')
+def hint(request, mozhint=False, param='term'):
+    prefix = request.GET.get(param, '')
     if len(prefix) < 2:
         return JsonResponse([], safe=False)
 
     if len(prefix) < 2:
         return JsonResponse([], safe=False)
 
@@ -86,15 +85,29 @@ def hint(request):
         for author in authors[:limit]
     ]
     if len(data) < limit:
         for author in authors[:limit]
     ]
     if len(data) < limit:
-        data += [
-            {
-                'label': b.title,
-                'author': b.author_unicode(),
-                'id': b.id,
-                'url': b.get_absolute_url()
-            }
-            for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
+        for b in Book.objects.filter(findable=True, title__iregex='\m' + prefix)[:limit-len(data)]:
+            author_str = b.author_unicode()
+            translator = b.translator()
+            if translator:
+                author_str += ' (tłum. ' + translator + ')'
+            data.append(
+                {
+                    'label': b.title,
+                    'author': author_str,
+                    'id': b.id,
+                    'url': b.get_absolute_url()
+                }
+            )
+
+    if mozhint:
+        data = [
+            prefix,
+            [
+                item['label']
+                for item in data
+            ]
         ]
         ]
+
     callback = request.GET.get('callback', None)
     if callback:
         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
     callback = request.GET.get('callback', None)
     if callback:
         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
@@ -106,46 +119,100 @@ def hint(request):
 @cache.never_cache
 def main(request):
     query = request.GET.get('q', '')
 @cache.never_cache
 def main(request):
     query = request.GET.get('q', '')
-    query = ' '.join(query.split())
-    # filter out private use characters
-    import unicodedata
-    query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
+
+    format = request.GET.get('format')
+    lang = request.GET.get('lang')
+    epoch = request.GET.get('epoch')
+    kind = request.GET.get('kind')
+    genre = request.GET.get('genre')
 
     if len(query) < 2:
 
     if len(query) < 2:
-        return render_to_response(
-            'catalogue/search_too_short.html', {'prefix': query},
-            context_instance=RequestContext(request))
+        return render(
+            request, 'catalogue/search_too_short.html',
+            {'prefix': query})
     elif len(query) > 256:
     elif len(query) > 256:
-        return render_to_response(
-            'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
+        return render(
+            request, 'catalogue/search_too_long.html',
+            {'prefix': query})
 
 
-    query = remove_query_syntax_chars(query)
+    query = prepare_query(query)
+    if not (format or lang or epoch or kind or genre):
+        pd_authors = search_pd_authors(query)
+    else:
+        pd_authors = []
+    if not format or format != 'obraz':
+        books = search_books(
+            query,
+            lang=lang,
+            only_audio=format=='audio',
+            only_synchro=format=='synchro',
+            epoch=epoch,
+            kind=kind,
+            genre=genre
+        )
+    else:
+        books = []
+    if (not format or format == 'obraz') and not lang:
+        pictures = search_pictures(
+            query,
+            epoch=epoch,
+            kind=kind,
+            genre=genre
+        )
+    else:
+        pictures = []
+    
+    suggestion = ''
 
 
-    words = query.split()
-    if len(words) > 10:
-        query = ' '.join(words[:10])
+    if not (books or pictures or pd_authors):
+        form = PublishingSuggestForm(initial={"books": query + ", "})
+        return render(
+            request,
+            'catalogue/search_no_hits.html',
+            {
+                'form': form,
+                'did_you_mean': suggestion
+            })
 
 
-    search = Search()
+    if not (books or pictures) and len(pd_authors) == 1:
+        return HttpResponseRedirect(pd_authors[0].get_absolute_url())
 
 
-    pd_authors = Author.objects.filter(name__icontains=query)
-    existing_slugs = Tag.objects.filter(
-        category='author', slug__in=list(pd_authors.values_list('slug', flat=True)))\
-        .values_list('slug', flat=True)
-    pd_authors = pd_authors.exclude(slug__in=existing_slugs)
+    return render(
+        request,
+        'catalogue/search_multiple_hits.html',
+        {
+            'pd_authors': pd_authors,
+            'books': books,
+            'pictures': pictures,
+            'did_you_mean': suggestion,
+            'set': {
+                'lang': lang,
+                'format': format,
+                'epoch': epoch,
+                'kind': kind,
+                'genre': genre,
+            },
+            'tags': {
+                'epoch': Tag.objects.filter(category='epoch', for_books=True),
+                'genre': Tag.objects.filter(category='genre', for_books=True),
+                'kind': Tag.objects.filter(category='kind', for_books=True),
+            },
+        })
 
 
+def search_books(query, lang=None, only_audio=False, only_synchro=False, epoch=None, kind=None, genre=None):
+    search = Search()
     results_parts = []
     results_parts = []
-
     search_fields = []
     search_fields = []
+    words = query.split()
     fieldsets = (
     fieldsets = (
-        (['authors'], True),
-        (['title'], True),
-        (['metadata'], True),
-        (['text', 'themes_pl'], False),
+        (['authors', 'authors_nonstem'], True),
+        (['title', 'title_nonstem'], True),
+        (['metadata', 'metadata_nonstem'], True),
+        (['text', 'text_nonstem', 'themes_pl', 'themes_pl_nonstem'], False),
     )
     for fields, is_book in fieldsets:
         search_fields += fields
     )
     for fields, is_book in fieldsets:
         search_fields += fields
-        results_parts.append(search.search_words(words, search_fields, book=is_book))
-
+        results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
     results = []
     ids_results = {}
     for results_part in results_parts:
     results = []
     ids_results = {}
     for results_part in results_parts:
@@ -156,42 +223,100 @@ def main(request):
             else:
                 results.append(result)
                 ids_results[book_id] = result
             else:
                 results.append(result)
                 ids_results[book_id] = result
-
     descendant_ids = set(
         Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
     results = [result for result in results if result.book_id not in descendant_ids]
     descendant_ids = set(
         Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
     results = [result for result in results if result.book_id not in descendant_ids]
-
     for result in results:
         search.get_snippets(result, query, num=3)
 
     for result in results:
         search.get_snippets(result, query, num=3)
 
-    suggestion = u''
-
     def ensure_exists(r):
         try:
     def ensure_exists(r):
         try:
-            return r.book
+            if not r.book:
+                return False
         except Book.DoesNotExist:
             return False
 
         except Book.DoesNotExist:
             return False
 
-    results = filter(ensure_exists, results)
+        if lang and r.book.language != lang:
+            return False
+        if only_audio and not r.book.has_mp3_file():
+            return False
+        if only_synchro and not r.book.has_daisy_file():
+            return False
+        if epoch and not r.book.tags.filter(category='epoch', slug=epoch).exists():
+            return False
+        if kind and not r.book.tags.filter(category='kind', slug=kind).exists():
+            return False
+        if genre and not r.book.tags.filter(category='genre', slug=genre).exists():
+            return False
 
 
-    if not results and not pd_authors:
-        form = PublishingSuggestForm(initial={"books": query + ", "})
-        return render_to_response(
-            'catalogue/search_no_hits.html',
-            {
-                'form': form,
-                'did_you_mean': suggestion
-            },
-            context_instance=RequestContext(request))
+        return True
 
 
-    if not results and len(pd_authors) == 1:
-        return HttpResponseRedirect(pd_authors[0].get_absolute_url())
+    results = [r for r in results if ensure_exists(r)]
+    return results
 
 
-    return render_to_response(
-        'catalogue/search_multiple_hits.html',
-        {
-            'pd_authors': pd_authors,
-            'results': results,
-            'did_you_mean': suggestion
-        },
-        context_instance=RequestContext(request))
+
+def search_pictures(query, epoch=None, kind=None, genre=None):
+    search = Search()
+    results_parts = []
+    search_fields = []
+    words = query.split()
+    fieldsets = (
+        (['authors', 'authors_nonstem'], True),
+        (['title', 'title_nonstem'], True),
+        (['metadata', 'metadata_nonstem'], True),
+        (['themes_pl', 'themes_pl_nonstem'], False),
+    )
+    for fields, is_book in fieldsets:
+        search_fields += fields
+        results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
+    results = []
+    ids_results = {}
+    for results_part in results_parts:
+        for result in sorted(PictureResult.aggregate(results_part), reverse=True):
+            picture_id = result.picture_id
+            if picture_id in ids_results:
+                ids_results[picture_id].merge(result)
+            else:
+                results.append(result)
+                ids_results[picture_id] = result
+
+    def ensure_exists(r):
+        try:
+            if not r.picture:
+                return False
+        except Picture.DoesNotExist:
+            return False
+
+        if epoch and not r.picture.tags.filter(category='epoch', slug=epoch).exists():
+            return False
+        if kind and not r.picture.tags.filter(category='kind', slug=kind).exists():
+            return False
+        if genre and not r.picture.tags.filter(category='genre', slug=genre).exists():
+            return False
+
+        return True
+
+    results = [r for r in results if ensure_exists(r)]
+    return results
+
+
+def search_pd_authors(query):
+    pd_authors = Author.objects.filter(name__icontains=query)
+    existing_slugs = Tag.objects.filter(
+        category='author', slug__in=list(pd_authors.values_list('slug', flat=True))) \
+        .values_list('slug', flat=True)
+    pd_authors = pd_authors.exclude(slug__in=existing_slugs)
+    return pd_authors
+
+
+def prepare_query(query):
+    query = ' '.join(query.split())
+    # filter out private use characters
+    import unicodedata
+    query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
+    query = remove_query_syntax_chars(query)
+
+    words = query.split()
+    if len(words) > 10:
+        query = ' '.join(words[:10])
+    return query