Searching, filtering fixes.
[wolnelektury.git] / src / search / forms.py
index 7efd747..176c73e 100644 (file)
@@ -2,12 +2,16 @@
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from django.apps import apps
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from django.apps import apps
-from django.contrib.postgres.search import SearchHeadline, SearchRank, SearchQuery
+from django.conf import settings
+from django.contrib.postgres.search import SearchHeadline, SearchQuery
 from django import forms
 from django.utils.translation import gettext_lazy as _
 from django import forms
 from django.utils.translation import gettext_lazy as _
-
+from catalogue.constants import LANGUAGES_3TO2
+import catalogue.models
+import pdcounter.models
+import picture.models
 from .fields import JQueryAutoCompleteSearchField, InlineRadioWidget
 from .fields import JQueryAutoCompleteSearchField, InlineRadioWidget
-from .utils import build_search_query
+from .utils import UnaccentSearchQuery, UnaccentSearchVector
 
 
 class SearchForm(forms.Form):
 
 
 class SearchForm(forms.Form):
@@ -25,7 +29,10 @@ class SearchForm(forms.Form):
 
 
 class SearchFilters(forms.Form):
 
 
 class SearchFilters(forms.Form):
-    q = forms.CharField(required=False, widget=forms.HiddenInput())
+    q = forms.CharField(
+        required=False, widget=forms.HiddenInput(),
+        min_length=2, max_length=256,
+    )
     format = forms.ChoiceField(required=False, choices=[
         ('', 'wszystkie'),
         ('text', 'tekst'),
     format = forms.ChoiceField(required=False, choices=[
         ('', 'wszystkie'),
         ('text', 'tekst'),
@@ -51,89 +58,98 @@ class SearchFilters(forms.Form):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        from catalogue.models import Book, Tag
 
 
+        langs = dict(settings.LANGUAGES)
         self.fields['lang'].choices = [('', 'wszystkie')] + [
         self.fields['lang'].choices = [('', 'wszystkie')] + [
-            (b, b)
-            for b in Book.objects.values_list(
+            (
+                b,
+                langs.get(LANGUAGES_3TO2.get(b, b), b)
+            )
+            for b in catalogue.models.Book.objects.values_list(
                     'language', flat=True
             ).distinct().order_by()
         ]
         self.fields['epoch'].choices = [('', 'wszystkie')] + [
             (b.slug, b.name)
                     'language', flat=True
             ).distinct().order_by()
         ]
         self.fields['epoch'].choices = [('', 'wszystkie')] + [
             (b.slug, b.name)
-            for b in Tag.objects.filter(category='epoch')
+            for b in catalogue.models.Tag.objects.filter(category='epoch')
         ]
         self.fields['genre'].choices = [('', 'wszystkie')] + [
             (b.slug, b.name)
         ]
         self.fields['genre'].choices = [('', 'wszystkie')] + [
             (b.slug, b.name)
-            for b in Tag.objects.filter(category='genre')
+            for b in catalogue.models.Tag.objects.filter(category='genre')
         ]
 
     def get_querysets(self):
         ]
 
     def get_querysets(self):
-        Tag = apps.get_model('catalogue', 'Tag')
-        Book = apps.get_model('catalogue', 'Book')
-        Picture = apps.get_model('picture', 'Picture')
-        Snippet = apps.get_model('catalogue', 'Snippet')
-        Collection = apps.get_model('catalogue', 'Collection')
         qs = {
         qs = {
-            'author': Tag.objects.filter(category='author'),
-            'theme': Tag.objects.filter(category='theme'),
-            'genre': Tag.objects.filter(category='genre'),
-            'collection': Collection.objects.all(),
-            'book': Book.objects.all(), #findable
-            'snippet': Snippet.objects.all(),
-            'art': Picture.objects.all(),
+            'author': catalogue.models.Tag.objects.filter(category='author'),
+            'pdauthor': pdcounter.models.Author.objects.all(),
+            'theme': catalogue.models.Tag.objects.filter(category='theme'),
+            'genre': catalogue.models.Tag.objects.filter(category='genre'),
+            'collection': catalogue.models.Collection.objects.all(),
+            'book': catalogue.models.Book.objects.filter(findable=True),
+            'pdbook': pdcounter.models.BookStub.objects.all(),
+            'snippet': catalogue.models.Snippet.objects.filter(book__findable=True),
+            'art': picture.models.Picture.objects.all(),
             # art pieces
             # art pieces
-            # pdbooks
-            # pdauthors
         }
         if self.cleaned_data['category']:
             c = self.cleaned_data['category']
         }
         if self.cleaned_data['category']:
             c = self.cleaned_data['category']
-            if c != 'author': qs['author'] = Tag.objects.none()
-            if c != 'theme': qs['theme'] = Tag.objects.none()
-            if c != 'genre': qs['genre'] = Tag.objects.none()
-            if c != 'collection': qs['collection'] = Collection.objects.none()
-            if c != 'book': qs['book'] = Book.objects.none()
-            if c != 'quote': qs['snippet'] = Snippet.objects.none()
-            if c != 'art': qs['art'] = Picture.objects.none()
-            qs['art'] = Picture.objects.none()
+            if c != 'author':
+                qs['author'] = qs['author'].none()
+                qs['pdauthor'] = qs['pdauthor'].none()
+            if c != 'theme': qs['theme'] = qs['theme'].none()
+            if c != 'genre': qs['genre'] = qs['genre'].none()
+            if c != 'collection': qs['collection'] = qs['collection'].none()
+            if c != 'book':
+                qs['book'] = qs['book'].none()
+                qs['pdbook'] = qs['pdbook'].none()
+            if c != 'quote': qs['snippet'] = qs['snippet'].none()
+            if c != 'art': qs['art'] = qs['art'].none()
+            qs['art'] = picture.models.Picture.objects.none()
 
         if self.cleaned_data['format']:
             c = self.cleaned_data['format']
 
         if self.cleaned_data['format']:
             c = self.cleaned_data['format']
-            qs['author'] = Tag.objects.none()
-            qs['theme'] = Tag.objects.none()
-            qs['genre'] = Tag.objects.none()
-            qs['collection'] = Collection.objects.none()
+            qs['author'] = qs['author'].none()
+            qs['pdauthor'] = qs['pdauthor'].none()
+            qs['theme'] = qs['theme'].none()
+            qs['genre'] = qs['genre'].none()
+            qs['collection'] = qs['collection'].none()
             if c == 'art':
             if c == 'art':
-                qs['book'] = Book.objects.none()
-                qs['snippet'] = Snippet.objects.none()
+                qs['book'] = qs['book'].none()
+                qs['pdbook'] = qs['pdbook'].none()
+                qs['snippet'] = qs['snippet'].none()
             if c in ('text', 'audio', 'daisy'):
             if c in ('text', 'audio', 'daisy'):
-                qs['art'] = Picture.objects.none()
+                qs['art'] = qs['art'].none()
                 if c == 'audio':
                     qs['book'] = qs['book'].filter(media__type='mp3')
                 if c == 'audio':
                     qs['book'] = qs['book'].filter(media__type='mp3')
+                    qs['pdbook'] = qs['book'].none()
                     qs['snippet'] = qs['snippet'].filter(book__media__type='mp3')
                 elif c == 'daisy':
                     qs['book'] = qs['book'].filter(media__type='daisy')
                     qs['snippet'] = qs['snippet'].filter(book__media__type='daisy')
 
         if self.cleaned_data['lang']:
                     qs['snippet'] = qs['snippet'].filter(book__media__type='mp3')
                 elif c == 'daisy':
                     qs['book'] = qs['book'].filter(media__type='daisy')
                     qs['snippet'] = qs['snippet'].filter(book__media__type='daisy')
 
         if self.cleaned_data['lang']:
-            qs['author'] = Tag.objects.none()
-            qs['theme'] = Tag.objects.none()
-            qs['genre'] = Tag.objects.none()
-            qs['art'] = Picture.objects.none()
-            qs['collection'] = Collection.objects.none()
+            qs['author'] = qs['author'].none()
+            qs['pdauthor'] = qs['pdauthor'].none()
+            qs['theme'] = qs['theme'].none()
+            qs['genre'] = qs['genre'].none()
+            qs['art'] = qs['art'].none()
+            qs['collection'] = qs['collection'].none()
             qs['book'] = qs['book'].filter(language=self.cleaned_data['lang'])
             qs['book'] = qs['book'].filter(language=self.cleaned_data['lang'])
+            qs['pdbook'] = qs['pdbook'].none()
             qs['snippet'] = qs['snippet'].filter(book__language=self.cleaned_data['lang'])
 
         for tag_cat in ('epoch', 'genre'):
             c = self.cleaned_data[tag_cat]
             if c:
                 # FIXME nonexistent
             qs['snippet'] = qs['snippet'].filter(book__language=self.cleaned_data['lang'])
 
         for tag_cat in ('epoch', 'genre'):
             c = self.cleaned_data[tag_cat]
             if c:
                 # FIXME nonexistent
-                t = Tag.objects.get(category=tag_cat, slug=c)
-                qs['author'] = Tag.objects.none()
-                qs['theme'] = Tag.objects.none()
-                qs['genre'] = Tag.objects.none()
-                qs['collection'] = Collection.objects.none()
+                t = catalogue.models.Tag.objects.get(category=tag_cat, slug=c)
+                qs['author'] = qs['author'].none()
+                qs['pdauthor'] = qs['pdauthor'].none()
+                qs['theme'] = qs['theme'].none()
+                qs['genre'] = qs['genre'].none()
+                qs['collection'] = qs['collection'].none()
                 qs['book'] = qs['book'].filter(tag_relations__tag=t)
                 qs['book'] = qs['book'].filter(tag_relations__tag=t)
+                qs['pdbook'] = qs['pdbook'].none()
                 qs['snippet'] = qs['snippet'].filter(book__tag_relations__tag=t)
                 qs['art'] = qs['art'].filter(tag_relations__tag=t)
             
                 qs['snippet'] = qs['snippet'].filter(book__tag_relations__tag=t)
                 qs['art'] = qs['art'].filter(tag_relations__tag=t)
             
@@ -142,28 +158,47 @@ class SearchFilters(forms.Form):
     def results(self):
         qs = self.get_querysets()
         query = self.cleaned_data['q']
     def results(self):
         qs = self.get_querysets()
         query = self.cleaned_data['q']
-        squery = build_search_query(query, config='polish')
+        squery = UnaccentSearchQuery(query, config='polish')
         query = SearchQuery(query, config='polish')
         query = SearchQuery(query, config='polish')
-        books = qs['book'].filter(title__search=query)
-        books = books.exclude(ancestor__in=books)
-        return {
-            'author': qs['author'].filter(slug__search=query),
-            'theme': qs['theme'].filter(slug__search=query),
-            'genre': qs['genre'].filter(slug__search=query),
-            'collection': qs['collection'].filter(title__search=query),
-            'book': books[:100],
-            'snippet': qs['snippet'].annotate(
-                    rank=SearchRank('search_vector', squery)
-                ).filter(rank__gt=0).order_by('-rank').annotate(
+        books = qs['book'].annotate(
+            search_vector=UnaccentSearchVector('title')
+        ).filter(search_vector=squery)
+        books = books.exclude(ancestor__in=books).order_by('-popularity__count')
+
+        snippets = qs['snippet'].filter(search_vector=squery).annotate(
                     headline=SearchHeadline(
                         'text',
                         query,
                         config='polish',
                         start_sel='<strong>',
                         stop_sel='</strong>',
                     headline=SearchHeadline(
                         'text',
                         query,
                         config='polish',
                         start_sel='<strong>',
                         stop_sel='</strong>',
-                        highlight_all=True
                     )
                     )
-                )[:100],
-            'art': qs['art'].filter(title__search=query)[:100],
+                ).order_by('-book__popularity__count', 'sec')[:100]
+        snippets_by_book = {}
+        for snippet in snippets:
+            snippet_list = snippets_by_book.setdefault(snippet.book, [])
+            if len(snippet_list) < 3:
+                snippet_list.append(snippet)
+
+        return {
+            'author': qs['author'].annotate(
+                search_vector=UnaccentSearchVector('name_pl')
+            ).filter(search_vector=squery),
+            'theme': qs['theme'].annotate(
+                search_vector=UnaccentSearchVector('name_pl')
+            ).filter(search_vector=squery),
+            'genre': qs['genre'].annotate(
+                search_vector=UnaccentSearchVector('name_pl')
+            ).filter(search_vector=squery),
+            'collection': qs['collection'].annotate(
+                search_vector=UnaccentSearchVector('title')
+            ).filter(search_vector=squery),
+            'book': books[:100],
+            'art': qs['art'].annotate(
+                search_vector=UnaccentSearchVector('title')
+            ).filter(search_vector=squery)[:100],
+            'snippet': snippets_by_book,
+            'pdauthor': pdcounter.models.Author.search(squery, qs=qs['pdauthor']),
+            'pdbook': pdcounter.models.BookStub.search(squery, qs=qs['pdbook']),
         }
 
         }