From 0a7090f11131631647db366ff87976407e788412 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Thu, 1 Jun 2023 15:09:21 +0200 Subject: [PATCH 1/1] Search. --- src/catalogue/admin.py | 5 +- src/catalogue/migrations/0044_snippet.py | 25 +++ src/catalogue/models/__init__.py | 1 + src/catalogue/models/collection.py | 3 + src/catalogue/models/snippet.py | 25 +++ .../catalogue/2022/collection_box.html | 10 ++ src/search/fields.py | 6 + src/search/forms.py | 150 ++++++++++++++++- src/search/index.py | 39 +++-- .../search/inline_radio_widget_option.html | 1 + src/search/templates/search/results.html | 156 ++++++++++++++++++ src/wolnelektury/settings/apps.py | 1 + src/wolnelektury/static/2021/scripts/main.js | 12 ++ .../styles/components/_collectionbox.scss | 22 +++ .../static/2022/styles/components/_form.scss | 54 ++++++ .../2022/styles/components/_header.scss | 12 ++ .../2022/styles/components/_module.scss | 4 + .../2022/styles/components/_search.scss | 136 +++++++++++++++ 18 files changed, 649 insertions(+), 13 deletions(-) create mode 100644 src/catalogue/migrations/0044_snippet.py create mode 100644 src/catalogue/models/snippet.py create mode 100644 src/catalogue/templates/catalogue/2022/collection_box.html create mode 100644 src/search/templates/search/inline_radio_widget_option.html create mode 100644 src/search/templates/search/results.html create mode 100644 src/wolnelektury/static/2022/styles/components/_collectionbox.scss create mode 100644 src/wolnelektury/static/2022/styles/components/_form.scss create mode 100644 src/wolnelektury/static/2022/styles/components/_header.scss create mode 100644 src/wolnelektury/static/2022/styles/components/_search.scss diff --git a/src/catalogue/admin.py b/src/catalogue/admin.py index 420cd3f49..ebd14b9cb 100644 --- a/src/catalogue/admin.py +++ b/src/catalogue/admin.py @@ -2,7 +2,7 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # from django.contrib import admin -from catalogue.models import Tag, Book, Fragment, BookMedia, Collection, Source +from catalogue.models import Tag, Book, Fragment, BookMedia, Collection, Source, Snippet from pz.admin import EmptyFieldListFilter @@ -67,3 +67,6 @@ admin.site.register(Book, BookAdmin) admin.site.register(Fragment, FragmentAdmin) admin.site.register(Collection, CollectionAdmin) admin.site.register(Source, SourceAdmin) + + +admin.site.register(Snippet) diff --git a/src/catalogue/migrations/0044_snippet.py b/src/catalogue/migrations/0044_snippet.py new file mode 100644 index 000000000..2f9fdd60e --- /dev/null +++ b/src/catalogue/migrations/0044_snippet.py @@ -0,0 +1,25 @@ +# Generated by Django 4.0.8 on 2023-05-29 09:06 + +import django.contrib.postgres.search +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('catalogue', '0043_alter_bookmedia_duration_alter_bookmedia_type'), + ] + + operations = [ + migrations.CreateModel( + name='Snippet', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('sec', models.IntegerField()), + ('text', models.TextField()), + ('search_vector', django.contrib.postgres.search.SearchVectorField()), + ('book', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='catalogue.book')), + ], + ), + ] diff --git a/src/catalogue/models/__init__.py b/src/catalogue/models/__init__.py index 0881cb793..81f36d1ea 100644 --- a/src/catalogue/models/__init__.py +++ b/src/catalogue/models/__init__.py @@ -7,3 +7,4 @@ from catalogue.models.fragment import Fragment from catalogue.models.book import Book, BookPopularity from catalogue.models.collection import Collection from catalogue.models.source import Source +from .snippet import Snippet diff --git a/src/catalogue/models/collection.py b/src/catalogue/models/collection.py index f1a582cbe..8510b2cb1 100644 --- a/src/catalogue/models/collection.py +++ b/src/catalogue/models/collection.py @@ -59,6 +59,9 @@ class Collection(models.Model): def get_5_books(self): return self.get_books()[:5] + def example3(self): + return self.get_books()[:3] + @cached_render('catalogue/collection_box.html') def box(self): return { diff --git a/src/catalogue/models/snippet.py b/src/catalogue/models/snippet.py new file mode 100644 index 000000000..aa5ed6822 --- /dev/null +++ b/src/catalogue/models/snippet.py @@ -0,0 +1,25 @@ +from django.db import models +from django.contrib.postgres.search import SearchVector, SearchVectorField +from search.utils import build_search_vector + + +class Snippet(models.Model): + book = models.ForeignKey('Book', models.CASCADE) + sec = models.IntegerField() + # header_type ? + # header_span ? + text = models.TextField() + search_vector = SearchVectorField() + + def save(self, *args, **kwargs): + super().save(*args, **kwargs) + if not self.search_vector: + self.update() + + def update(self): + self.search_vector = build_search_vector('text', config='polish') # config=polish + self.save() + + @classmethod + def update_all(cls): + cls.objects.all().update(search_vector = build_search_vector('text')) diff --git a/src/catalogue/templates/catalogue/2022/collection_box.html b/src/catalogue/templates/catalogue/2022/collection_box.html new file mode 100644 index 000000000..495fe231e --- /dev/null +++ b/src/catalogue/templates/catalogue/2022/collection_box.html @@ -0,0 +1,10 @@ +
+ +
+ {% for c in collection.example3 %} + + {% endfor %} +
+ {{ collection.title }} +
+
diff --git a/src/search/fields.py b/src/search/fields.py index 07e50d7ba..18a5c1564 100644 --- a/src/search/fields.py +++ b/src/search/fields.py @@ -3,6 +3,7 @@ # from django import forms from django.forms.utils import flatatt +from django.forms.widgets import RadioSelect from django.utils.encoding import smart_str from django.utils.safestring import mark_safe from json import dumps @@ -63,3 +64,8 @@ class JQueryAutoCompleteSearchField(forms.CharField): kwargs['widget'] = JQueryAutoCompleteSearchWidget(options) super(JQueryAutoCompleteSearchField, self).__init__(*args, **kwargs) + + + +class InlineRadioWidget(RadioSelect): + option_template_name = 'search/inline_radio_widget_option.html' diff --git a/src/search/forms.py b/src/search/forms.py index 4fa051d7a..7efd7479d 100644 --- a/src/search/forms.py +++ b/src/search/forms.py @@ -1,10 +1,13 @@ # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from django.apps import apps +from django.contrib.postgres.search import SearchHeadline, SearchRank, SearchQuery from django import forms from django.utils.translation import gettext_lazy as _ -from search.fields import JQueryAutoCompleteSearchField +from .fields import JQueryAutoCompleteSearchField, InlineRadioWidget +from .utils import build_search_query class SearchForm(forms.Form): @@ -19,3 +22,148 @@ class SearchForm(forms.Form): self.fields['q'].widget.attrs['data-source'] = source if 'q' not in self.data: self.fields['q'].widget.attrs['placeholder'] = _('title, author, epoch, kind, genre, phrase') + + +class SearchFilters(forms.Form): + q = forms.CharField(required=False, widget=forms.HiddenInput()) + format = forms.ChoiceField(required=False, choices=[ + ('', 'wszystkie'), + ('text', 'tekst'), + ('audio', 'audiobook'), + ('daisy', 'Daisy'), + ('art', 'obraz'), + #('theme', 'motywy'), + ], widget=InlineRadioWidget()) + lang = forms.ChoiceField(required=False) + epoch = forms.ChoiceField(required=False) + genre = forms.ChoiceField(required=False) + category = forms.ChoiceField(required=False, choices=[ + ('', 'wszystkie'), + ('author', 'autor'), + #('translator', 'tłumacz'), + ('theme', 'motyw'), + ('genre', 'gatunek'), + ('book', 'tytuł'), + ('art', 'obraz'), + ('collection', 'kolekcja'), + ('quote', 'cytat'), + ], widget=InlineRadioWidget()) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + from catalogue.models import Book, Tag + + self.fields['lang'].choices = [('', 'wszystkie')] + [ + (b, b) + for b in Book.objects.values_list( + 'language', flat=True + ).distinct().order_by() + ] + self.fields['epoch'].choices = [('', 'wszystkie')] + [ + (b.slug, b.name) + for b in Tag.objects.filter(category='epoch') + ] + self.fields['genre'].choices = [('', 'wszystkie')] + [ + (b.slug, b.name) + for b in Tag.objects.filter(category='genre') + ] + + def get_querysets(self): + Tag = apps.get_model('catalogue', 'Tag') + Book = apps.get_model('catalogue', 'Book') + Picture = apps.get_model('picture', 'Picture') + Snippet = apps.get_model('catalogue', 'Snippet') + Collection = apps.get_model('catalogue', 'Collection') + qs = { + 'author': Tag.objects.filter(category='author'), + 'theme': Tag.objects.filter(category='theme'), + 'genre': Tag.objects.filter(category='genre'), + 'collection': Collection.objects.all(), + 'book': Book.objects.all(), #findable + 'snippet': Snippet.objects.all(), + 'art': Picture.objects.all(), + # art pieces + # pdbooks + # pdauthors + } + if self.cleaned_data['category']: + c = self.cleaned_data['category'] + if c != 'author': qs['author'] = Tag.objects.none() + if c != 'theme': qs['theme'] = Tag.objects.none() + if c != 'genre': qs['genre'] = Tag.objects.none() + if c != 'collection': qs['collection'] = Collection.objects.none() + if c != 'book': qs['book'] = Book.objects.none() + if c != 'quote': qs['snippet'] = Snippet.objects.none() + if c != 'art': qs['art'] = Picture.objects.none() + qs['art'] = Picture.objects.none() + + if self.cleaned_data['format']: + c = self.cleaned_data['format'] + qs['author'] = Tag.objects.none() + qs['theme'] = Tag.objects.none() + qs['genre'] = Tag.objects.none() + qs['collection'] = Collection.objects.none() + if c == 'art': + qs['book'] = Book.objects.none() + qs['snippet'] = Snippet.objects.none() + if c in ('text', 'audio', 'daisy'): + qs['art'] = Picture.objects.none() + if c == 'audio': + qs['book'] = qs['book'].filter(media__type='mp3') + qs['snippet'] = qs['snippet'].filter(book__media__type='mp3') + elif c == 'daisy': + qs['book'] = qs['book'].filter(media__type='daisy') + qs['snippet'] = qs['snippet'].filter(book__media__type='daisy') + + if self.cleaned_data['lang']: + qs['author'] = Tag.objects.none() + qs['theme'] = Tag.objects.none() + qs['genre'] = Tag.objects.none() + qs['art'] = Picture.objects.none() + qs['collection'] = Collection.objects.none() + qs['book'] = qs['book'].filter(language=self.cleaned_data['lang']) + qs['snippet'] = qs['snippet'].filter(book__language=self.cleaned_data['lang']) + + for tag_cat in ('epoch', 'genre'): + c = self.cleaned_data[tag_cat] + if c: + # FIXME nonexistent + t = Tag.objects.get(category=tag_cat, slug=c) + qs['author'] = Tag.objects.none() + qs['theme'] = Tag.objects.none() + qs['genre'] = Tag.objects.none() + qs['collection'] = Collection.objects.none() + qs['book'] = qs['book'].filter(tag_relations__tag=t) + qs['snippet'] = qs['snippet'].filter(book__tag_relations__tag=t) + qs['art'] = qs['art'].filter(tag_relations__tag=t) + + return qs + + def results(self): + qs = self.get_querysets() + query = self.cleaned_data['q'] + squery = build_search_query(query, config='polish') + query = SearchQuery(query, config='polish') + books = qs['book'].filter(title__search=query) + books = books.exclude(ancestor__in=books) + return { + 'author': qs['author'].filter(slug__search=query), + 'theme': qs['theme'].filter(slug__search=query), + 'genre': qs['genre'].filter(slug__search=query), + 'collection': qs['collection'].filter(title__search=query), + 'book': books[:100], + 'snippet': qs['snippet'].annotate( + rank=SearchRank('search_vector', squery) + ).filter(rank__gt=0).order_by('-rank').annotate( + headline=SearchHeadline( + 'text', + query, + config='polish', + start_sel='', + stop_sel='', + highlight_all=True + ) + )[:100], + 'art': qs['art'].filter(title__search=query)[:100], + } + diff --git a/src/search/index.py b/src/search/index.py index 68a2b3b18..9784d49a7 100644 --- a/src/search/index.py +++ b/src/search/index.py @@ -128,6 +128,22 @@ class Index(SolrIndex): def __init__(self): super(Index, self).__init__(mode='rw') + def remove_snippets(self, book): + book.snippet_set.all().delete() + + def add_snippet(self, book, doc): + assert book.id == doc.pop('book_id') + # Fragments already exist and can be indexed where they live. + if 'fragment_anchor' in doc: + return + + text = doc.pop('text') + header_index = doc.pop('header_index') + book.snippet_set.create( + sec=header_index, + text=text, + ) + def delete_query(self, *queries): """ index.delete(queries=...) doesn't work, so let's reimplement it @@ -229,19 +245,15 @@ class Index(SolrIndex): doc['parent_id'] = int(book.parent.id) return doc - def remove_book(self, book_or_id, remove_snippets=True): + def remove_book(self, book, remove_snippets=True): """Removes a book from search index. book - Book instance.""" - if isinstance(book_or_id, catalogue.models.Book): - book_id = book_or_id.id - else: - book_id = book_or_id - - self.delete_query(self.index.Q(book_id=book_id)) + self.delete_query(self.index.Q(book_id=book.id)) if remove_snippets: - snippets = Snippets(book_id) + snippets = Snippets(book.id) snippets.remove() + self.remove_snippets(book) def index_book(self, book, book_info=None, overwrite=True): """ @@ -249,6 +261,8 @@ class Index(SolrIndex): Creates a lucene document for extracted metadata and calls self.index_content() to index the contents of the book. """ + if not book.xml_file: return + if overwrite: # we don't remove snippets, since they might be still needed by # threads using not reopened index @@ -309,7 +323,7 @@ class Index(SolrIndex): fields = {} if book_info is None: - book_info = dcparser.parse(open(book.xml_file.path)) + book_info = dcparser.parse(open(book.xml_file.path, 'rb')) fields['slug'] = book.slug fields['is_book'] = True @@ -468,8 +482,8 @@ class Index(SolrIndex): elif end is not None and footnote is not [] and end.tag in self.footnote_tags: handle_text.pop() doc = add_part(snippets, header_index=position, header_type=header.tag, - text=''.join(footnote), - is_footnote=True) + text=''.join(footnote)) + self.add_snippet(book, doc) self.index.add(doc) footnote = [] @@ -504,6 +518,8 @@ class Index(SolrIndex): fragment_anchor=fid, text=fix_format(frag['text']), themes=frag['themes']) + # Add searchable fragment + self.add_snippet(book, doc) self.index.add(doc) # Collect content. @@ -516,6 +532,7 @@ class Index(SolrIndex): doc = add_part(snippets, header_index=position, header_type=header.tag, text=fix_format(content)) + self.add_snippet(book, doc) self.index.add(doc) finally: diff --git a/src/search/templates/search/inline_radio_widget_option.html b/src/search/templates/search/inline_radio_widget_option.html new file mode 100644 index 000000000..28aba2f78 --- /dev/null +++ b/src/search/templates/search/inline_radio_widget_option.html @@ -0,0 +1 @@ +{% if widget.wrap_label %}{% endif %}{% include "django/forms/widgets/input.html" %}{% if widget.wrap_label %} {{ widget.label }}{% endif %} diff --git a/src/search/templates/search/results.html b/src/search/templates/search/results.html new file mode 100644 index 000000000..c6ea3836d --- /dev/null +++ b/src/search/templates/search/results.html @@ -0,0 +1,156 @@ +{% extends "2022/base.html" %} + + +{% block main %} +
+
+
+

Wynik wyszukiwania dla: {{ query }}

+
+
+ +
+
+ format: {{ filters.format }} +
+
+ + + +
+
+ kategoria: + {{ filters.category }} +
+ {{ filters.q }} + +
+ + {% if results.author %} +
+

Autorzy

+ +
+ {% endif %} + + {% if results.theme %} +
+

Motywy

+ +
+ {% endif %} + + {% if results.book %} +
+

Książki

+
+
+
+ {% for book in results.book %} + {% include 'catalogue/2022/book_box.html' %} + {% endfor %} +
+
+ {% endif %} + + {% if results.art %} +
+

Obrazy

+
+
+
+ {% for book in results.art %} + {% include 'catalogue/2022/book_box.html' %} + {% endfor %} +
+
+ {% endif %} + + {% if results.fragment or results.snippet %} +
+

W treści

+ {% for f in results.snippet %} +
+ {% for author in f.book.authors %} + {{ author }} + {% endfor %} + + {{ f.book.title }} + + + {{ f.headline|safe }} + +
+ {% endfor %} +
+ {% endif %} + + {% if results.collection %} +
+

Kolekcje

+
+ {% for collection in results.collection %} + {% include 'catalogue/2022/collection_box.html' %} + {% include 'catalogue/2022/collection_box.html' %} + {% endfor %} +
+
+ {% endif %} + + {% if pd_authors %} +
+
+

Domena publiczna?

+

+ Dzieła tych autorów przejdą do zasobów domeny publicznej i będą mogły + być publikowane bez żadnych ograniczeń. + Dowiedz się, dlaczego biblioteki internetowe nie mogą udostępniać dzieł tego autora. +

+ +
+
+ {% endif %} +
+{% endblock %} diff --git a/src/wolnelektury/settings/apps.py b/src/wolnelektury/settings/apps.py index fe42e04c1..cac86d8ff 100644 --- a/src/wolnelektury/settings/apps.py +++ b/src/wolnelektury/settings/apps.py @@ -55,6 +55,7 @@ INSTALLED_APPS_CONTRIB = [ 'django.contrib.admin', 'django.contrib.admindocs', 'django.contrib.staticfiles', + 'django.contrib.postgres', 'admin_ordering', 'rest_framework', 'fnp_django_pagination', diff --git a/src/wolnelektury/static/2021/scripts/main.js b/src/wolnelektury/static/2021/scripts/main.js index 31bb927d2..843cddbbf 100644 --- a/src/wolnelektury/static/2021/scripts/main.js +++ b/src/wolnelektury/static/2021/scripts/main.js @@ -536,3 +536,15 @@ clearTimeout(timer); }); })(); + + + +// Update search form filters. +(function() { + $('.j-form-auto').each(function() { + let $form = $(this); + $('input', $form).change(function() {$form.submit()}); + $('select', $form).change(function() {$form.submit()}); + $('textarea', $form).change(function() {$form.submit()}); + }); +})(); diff --git a/src/wolnelektury/static/2022/styles/components/_collectionbox.scss b/src/wolnelektury/static/2022/styles/components/_collectionbox.scss new file mode 100644 index 000000000..5cf7dcb8a --- /dev/null +++ b/src/wolnelektury/static/2022/styles/components/_collectionbox.scss @@ -0,0 +1,22 @@ +.c-collectionbox { + border: 1px solid #D9D9D9; + border-radius: 10px; + padding: 21px; + width: 3*172px + 2*21px + 2px; + font-size: 18px; + line-height: 24px; + + a { + display: block; + } + .c-collectionbox-covers { + display: flex; + margin-bottom: 15px; + img { + width: 172px; + @media screen and (max-width: 3*172px + 2*21px + 2px + 2*16px) { + width: calc((100vw - 2*16px - 2px - 2*21px) / 3); + } + } + } +} diff --git a/src/wolnelektury/static/2022/styles/components/_form.scss b/src/wolnelektury/static/2022/styles/components/_form.scss new file mode 100644 index 000000000..5185d9661 --- /dev/null +++ b/src/wolnelektury/static/2022/styles/components/_form.scss @@ -0,0 +1,54 @@ +.c-form { + padding: 0 16px; + + .c-form__hidden-submit { + font-size: 0; + border: 0; + opacity: 0; + } + + div.c-form__inline-radio, div.c-form__inline-radio > div { + display: flex; + gap: 8px; + flex-wrap: wrap; + align-items: center; + padding: 10px 0; + label { + display: flex; + span { + display: block; + padding: 11px 14px; + } + input { + width: 0px; + opacity: 0; + + &:checked + span { + background: #083F4D; + border-radius: 4px; + color: white; + font-weight: bold; + } + } + } + } + + .c-form__controls-row { + display: flex; + gap: 20px; + flex-wrap: wrap; + } + + .c-form__control { + display: flex; + flex-direction: column; + gap: 3px; + + select { + padding: 8px 10px; + background: white; + border: 1px solid #ddd; + border-radius: 4px; + } + } +} diff --git a/src/wolnelektury/static/2022/styles/components/_header.scss b/src/wolnelektury/static/2022/styles/components/_header.scss new file mode 100644 index 000000000..392ab9c6b --- /dev/null +++ b/src/wolnelektury/static/2022/styles/components/_header.scss @@ -0,0 +1,12 @@ +h2.header { + margin: 0; + font-weight: 600; + font-size: 21.5px; + line-height: 140%; + border-bottom: 1px solid #D9D9D9; + padding-bottom: 15px; + padding-top: 5px; + letter-spacing: -0.01em; + color: #007880; + margin-top: 23px; +} diff --git a/src/wolnelektury/static/2022/styles/components/_module.scss b/src/wolnelektury/static/2022/styles/components/_module.scss index 55c5a0d35..4350f82bf 100644 --- a/src/wolnelektury/static/2022/styles/components/_module.scss +++ b/src/wolnelektury/static/2022/styles/components/_module.scss @@ -13,3 +13,7 @@ @import "lang"; @import "avatar"; @import "read_more"; +@import "form"; +@import "search"; +@import "header"; +@import "collectionbox"; diff --git a/src/wolnelektury/static/2022/styles/components/_search.scss b/src/wolnelektury/static/2022/styles/components/_search.scss new file mode 100644 index 000000000..1063d6b24 --- /dev/null +++ b/src/wolnelektury/static/2022/styles/components/_search.scss @@ -0,0 +1,136 @@ +.c-search-result-fragment { + display: block; + padding: 21px; + margin-top: 20px; + border: 1px solid #D9D9D9; + border-radius: 10px; + + .c-search-result-fragment-title { + display: block; + font-size: 21.5px; + line-height: 1.4em; + color: #474747; + } + + .c-search-result-fragment-author { + display: block; + font-size: 15px; + line-height: 1.2em; + color: #808080; + } + + .c-search-result-fragment-text { + margin-top: 16px; + padding: 6px 12px; + display: block; + color: #474747; + background: #F2F2F2; + border-radius: 4px; + font-size: 18px; + line-height: 1.5em; + + strong { + font-weight: normal; + background: #FFEA00; + } + } +} + + +.c-search-result { + margin: 20px 0; + padding: 0; + list-style: none; + font-size: 18px; + line-height: 27px; + + &.c-search-result-author { + li { + padding-left: 52px; + figure { + font-size: 0; + display: flex; + align-items: center; + justify-content: center; + width: 40px; + height: 40px; + margin-left: -52px; + margin-right: 12px; + overflow: hidden; + border-radius: 50%; + img { + width: 100%; + } + } + } + } + + li { + margin-bottom: 5px; + a { + display: flex; + align-items: center; + } + } +} + + +.c-search-result-collection { + display: flex; + margin-top: 20px; + gap: 20px; + flex-wrap: wrap; +} + + + +.c-search-result-pd { + margin-top: 64px; + padding: 34px; + font-size: 18px; + line-height: 24px; + background: #E1F1F2; + border-radius: 10px; + + h2 { + color: #007880; + font-size: 21px; + line-height: 30px; + font-weight: bold; + margin: 0; + } + + p { + font-size: 18px; + line-height: 27px; + } + + > div { + display: flex; + gap: 20px; + margin-top: 26px; + > div { + background: white; + padding: 21px; + border-radius: 10px; + width: 343px; + a { + color: #474747; + line-height: 28px; + } + + strong { + display: block; + margin-bottom: 10px; + color: #083F4D; + font-size: 25px; + line-height: 30px; + } + + em { + font-style: normal; + font-weight: bold; + } + } + } +} -- 2.20.1