# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from django.apps import apps
+from django.conf import settings
from django.contrib.postgres.search import SearchHeadline, SearchRank, SearchQuery
from django import forms
from django.utils.translation import gettext_lazy as _
-
+from catalogue.constants import LANGUAGES_3TO2
+import catalogue.models
+import pdcounter.models
+import picture.models
from .fields import JQueryAutoCompleteSearchField, InlineRadioWidget
-from .utils import build_search_query
+from .utils import UnaccentSearchQuery, UnaccentSearchVector
class SearchForm(forms.Form):
class SearchFilters(forms.Form):
- q = forms.CharField(required=False, widget=forms.HiddenInput())
+ q = forms.CharField(
+ required=False, widget=forms.HiddenInput(),
+ min_length=2, max_length=256,
+ )
format = forms.ChoiceField(required=False, choices=[
('', 'wszystkie'),
('text', 'tekst'),
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
- from catalogue.models import Book, Tag
+ langs = dict(settings.LANGUAGES)
self.fields['lang'].choices = [('', 'wszystkie')] + [
- (b, b)
- for b in Book.objects.values_list(
+ (
+ b,
+ langs.get(LANGUAGES_3TO2.get(b, b), b)
+ )
+ for b in catalogue.models.Book.objects.values_list(
'language', flat=True
).distinct().order_by()
]
self.fields['epoch'].choices = [('', 'wszystkie')] + [
(b.slug, b.name)
- for b in Tag.objects.filter(category='epoch')
+ for b in catalogue.models.Tag.objects.filter(category='epoch')
]
self.fields['genre'].choices = [('', 'wszystkie')] + [
(b.slug, b.name)
- for b in Tag.objects.filter(category='genre')
+ for b in catalogue.models.Tag.objects.filter(category='genre')
]
def get_querysets(self):
- Tag = apps.get_model('catalogue', 'Tag')
- Book = apps.get_model('catalogue', 'Book')
- Picture = apps.get_model('picture', 'Picture')
- Snippet = apps.get_model('catalogue', 'Snippet')
- Collection = apps.get_model('catalogue', 'Collection')
qs = {
- 'author': Tag.objects.filter(category='author'),
- 'theme': Tag.objects.filter(category='theme'),
- 'genre': Tag.objects.filter(category='genre'),
- 'collection': Collection.objects.all(),
- 'book': Book.objects.all(), #findable
- 'snippet': Snippet.objects.all(),
- 'art': Picture.objects.all(),
+ 'author': catalogue.models.Tag.objects.filter(category='author'),
+ 'pdauthor': pdcounter.models.Author.objects.all(),
+ 'theme': catalogue.models.Tag.objects.filter(category='theme'),
+ 'genre': catalogue.models.Tag.objects.filter(category='genre'),
+ 'collection': catalogue.models.Collection.objects.all(),
+ 'book': catalogue.models.Book.objects.all(), #findable
+ 'pdbook': pdcounter.models.BookStub.objects.all(),
+ 'snippet': catalogue.models.Snippet.objects.all(),
+ 'art': picture.models.Picture.objects.all(),
# art pieces
- # pdbooks
- # pdauthors
}
if self.cleaned_data['category']:
c = self.cleaned_data['category']
- if c != 'author': qs['author'] = Tag.objects.none()
- if c != 'theme': qs['theme'] = Tag.objects.none()
- if c != 'genre': qs['genre'] = Tag.objects.none()
- if c != 'collection': qs['collection'] = Collection.objects.none()
- if c != 'book': qs['book'] = Book.objects.none()
- if c != 'quote': qs['snippet'] = Snippet.objects.none()
- if c != 'art': qs['art'] = Picture.objects.none()
+ if c != 'author':
+ qs['author'] = qs['author'].none()
+ qs['pdauthor'] = qs['pdauthor'].none()
+ if c != 'theme': qs['theme'] = qs['theme'].none()
+ if c != 'genre': qs['genre'] = qs['genre'].none()
+ if c != 'collection': qs['collection'] = qs['collection'].none()
+ if c != 'book':
+ qs['book'] = qs['book'].none()
+ qs['pdbook'] = qs['pdbook'].none()
+ if c != 'quote': qs['snippet'] = qs['snippet'].none()
+ if c != 'art': qs['art'] = qs['art'].none()
qs['art'] = Picture.objects.none()
if self.cleaned_data['format']:
c = self.cleaned_data['format']
- qs['author'] = Tag.objects.none()
- qs['theme'] = Tag.objects.none()
- qs['genre'] = Tag.objects.none()
- qs['collection'] = Collection.objects.none()
+ qs['author'] = qs['author'].none()
+ qs['pdauthor'] = qs['pdauthor'].none()
+ qs['theme'] = qs['theme'].none()
+ qs['genre'] = qs['genrer'].none()
+ qs['collection'] = qs['collection'].none()
if c == 'art':
- qs['book'] = Book.objects.none()
- qs['snippet'] = Snippet.objects.none()
+ qs['book'] = qs['book'].none()
+ qs['pdbook'] = qs['pdbook'].none()
+ qs['snippet'] = qs['snippet'].none()
if c in ('text', 'audio', 'daisy'):
- qs['art'] = Picture.objects.none()
+ qs['art'] = qs['art'].none()
if c == 'audio':
qs['book'] = qs['book'].filter(media__type='mp3')
+ qs['pdbook'] = qs['book'].none()
qs['snippet'] = qs['snippet'].filter(book__media__type='mp3')
elif c == 'daisy':
qs['book'] = qs['book'].filter(media__type='daisy')
qs['snippet'] = qs['snippet'].filter(book__media__type='daisy')
if self.cleaned_data['lang']:
- qs['author'] = Tag.objects.none()
- qs['theme'] = Tag.objects.none()
- qs['genre'] = Tag.objects.none()
- qs['art'] = Picture.objects.none()
- qs['collection'] = Collection.objects.none()
+ qs['author'] = qs['author'].none()
+ qs['pdauthor'] = qs['pdauthor'].none()
+ qs['theme'] = qs['theme'].none()
+ qs['genre'] = qs['genre'].none()
+ qs['art'] = qs['art'].none()
+ qs['collection'] = qs['collection'].none()
qs['book'] = qs['book'].filter(language=self.cleaned_data['lang'])
+ qs['pdbook'] = qs['pdbook'].none()
qs['snippet'] = qs['snippet'].filter(book__language=self.cleaned_data['lang'])
for tag_cat in ('epoch', 'genre'):
c = self.cleaned_data[tag_cat]
if c:
# FIXME nonexistent
- t = Tag.objects.get(category=tag_cat, slug=c)
- qs['author'] = Tag.objects.none()
- qs['theme'] = Tag.objects.none()
- qs['genre'] = Tag.objects.none()
- qs['collection'] = Collection.objects.none()
+ t = catalogue.models.Tag.objects.get(category=tag_cat, slug=c)
+ qs['author'] = qs['author'].none()
+ qs['pdauthor'] = qs['pdauthor'].none()
+ qs['theme'] = qs['theme'].none()
+ qs['genre'] = qs['genre'].none()
+ qs['collection'] = qs['collection'].none()
qs['book'] = qs['book'].filter(tag_relations__tag=t)
+ qs['pdbook'] = qs['pdbook'].none()
qs['snippet'] = qs['snippet'].filter(book__tag_relations__tag=t)
qs['art'] = qs['art'].filter(tag_relations__tag=t)
def results(self):
qs = self.get_querysets()
query = self.cleaned_data['q']
- squery = build_search_query(query, config='polish')
+ squery = UnaccentSearchQuery(query, config='polish')
query = SearchQuery(query, config='polish')
- books = qs['book'].filter(title__search=query)
+ books = qs['book'].annotate(
+ search_vector=UnaccentSearchVector('title')
+ ).filter(search_vector=squery)
books = books.exclude(ancestor__in=books)
- return {
- 'author': qs['author'].filter(slug__search=query),
- 'theme': qs['theme'].filter(slug__search=query),
- 'genre': qs['genre'].filter(slug__search=query),
- 'collection': qs['collection'].filter(title__search=query),
- 'book': books[:100],
- 'snippet': qs['snippet'].annotate(
+
+ snippets = qs['snippet'].annotate(
rank=SearchRank('search_vector', squery)
).filter(rank__gt=0).order_by('-rank').annotate(
headline=SearchHeadline(
stop_sel='</strong>',
highlight_all=True
)
- )[:100],
- 'art': qs['art'].filter(title__search=query)[:100],
+ )[:100]
+ snippets_by_book = {}
+ for snippet in snippets:
+ snippet_list = snippets_by_book.setdefault(snippet.book, [])
+ if len(snippet_list) < 3:
+ snippet_list.append(snippet)
+
+ return {
+ 'author': qs['author'].annotate(
+ search_vector=UnaccentSearchVector('name_pl')
+ ).filter(search_vector=squery),
+ 'theme': qs['theme'].annotate(
+ search_vector=UnaccentSearchVector('name_pl')
+ ).filter(search_vector=squery),
+ 'genre': qs['genre'].annotate(
+ search_vector=UnaccentSearchVector('name_pl')
+ ).filter(search_vector=squery),
+ 'collection': qs['collection'].annotate(
+ search_vector=UnaccentSearchVector('title')
+ ).filter(search_vector=squery),
+ 'book': books[:100],
+ 'art': qs['art'].annotate(
+ search_vector=UnaccentSearchVector('title')
+ ).filter(search_vector=squery)[:100],
+ 'snippet': snippets_by_book,
+ 'pdauthor': pdcounter.models.Author.search(squery, qs=qs['pdauthor']),
+ 'pdbook': pdcounter.models.BookStub.search(squery, qs=qs['pdbook']),
}
<button type="submit" class="c-form__hidden-submit">wyślij</button>
</form>
+ {% if not hasresults %}
+ <p class="l-change-pop show">
+ Brak wyników.
+ </p>
+ {% endif %}
+
{% if results.author %}
<div class="l-container">
<h2 class="header">Autorzy</h2>
</div>
{% endif %}
- {% if results.fragment or results.snippet %}
+ {% if results.snippet %}
<div class="l-container">
<h2 class="header">W treści</h2>
- {% for f in results.snippet %}
+ {% for book, snippets in results.snippet.items %}
<div class="c-search-result-fragment">
- {% for author in f.book.authors %}
+ {% for author in book.authors %}
<a class="c-search-result-fragment-author" href="{{ author.get_absolute_url }}">{{ author }}</a>
{% endfor %}
- <a class="c-search-result-fragment-title" href="{{ f.book.get_absolute_url }}">
- {{ f.book.title }}
- </a>
- <a class="c-search-result-fragment-text" href='{% url 'book_text' f.book.slug %}#sec{{ f.sec }}'>
- {{ f.headline|safe }}
+ <a class="c-search-result-fragment-title" href="{{ book.get_absolute_url }}">
+ {{ book.title }}
</a>
+ {% for f in snippets %}
+ <a class="c-search-result-fragment-text" href='{% url 'book_text' f.book.slug %}#sec{{ f.sec }}'>
+ {{ f.headline|safe }}
+ </a>
+ {% endfor %}
</div>
{% endfor %}
</div>
</div>
{% endif %}
- {% if pd_authors %}
+ {% if results.pdauthor or results.pdbook %}
<div class="l-container">
<div class="c-search-result-pd">
<h2>Domena publiczna?</h2>
Dowiedz się, dlaczego biblioteki internetowe nie mogą udostępniać dzieł tego autora.
</p>
<div>
- {% for tag in pd_authors %}
+ {% for tag in results.pdauthor %}
<div><a href="{{ tag.get_absolute_url }}">
<strong>{{ tag }}</strong>
- Dzieła tego autora będą mogły być publikowane bez ograniczeń w roku <em>{{ tag.goes_to_pd }}</em>.
+ {% if tag.death %}
+ {% if tag.in_pd %}
+ Dzieła tego autora są w domenie publicznej i czekają na publikację.
+ {% else %}
+ Dzieła tego autora będą mogły być publikowane bez ograniczeń w roku <em>{{ tag.goes_to_pd }}</em>.
+ {% endif %}
+ {% else %}
+ Dzieła tego autora są objęte prawem autorskim.
+ {% endif %}
+ </a></div>
+ {% endfor %}
+ {% for book in results.pdbook %}
+ <div><a href="{{ book.get_absolute_url }}">
+ <strong>{{ book }}</strong>
+ {% if book.pd %}
+ {% if book.in_pd %}
+ Ten utwór jest w domenie publicznej i czeka na publikację.
+ {% else %}
+ Ten utwór będzie mógł być publikowany bez ograniczeń w roku <em>{{ book.pd }}</em>.
+ {% endif %}
+ {% else %}
+ Ten utwór nie jest jeszcze w domenie publicznej.
+ {% endif %}
</a></div>
{% endfor %}
</div>
from django.db.models import Func
-from django.contrib.postgres.search import SearchVector, SearchQuery, SearchQueryField, SearchHeadline as SH
+from django.contrib.postgres.search import SearchQuery, SearchVectorField
+class UnaccentSearchQuery(SearchQuery):
+ '''
+ The idea is to run unaccent *after* the query is already passed through the language dictionary.
+ '''
+ def as_sql(self, *args, **kwargs):
+ sql, params = super().as_sql(*args, **kwargs)
+ sql = f'unaccent({sql}::text)::tsquery'
+ return sql, params
-class UnaccentTSVector(Func):
- function = 'UNACCENT'
- template = '%(function)s(%(expressions)s::text)::tsvector'
-
-class Unaccent(Func):
- function = 'UNACCENT'
-
-
-class ConcatTSVector(Func):
- function = 'CONCAT'
- template = '%(function)s(%(expressions)s)::tsvector'
-
-
-class UnaccentTSQuery(Func):
- function = 'UNACCENT'
- template = '%(function)s(%(expressions)s::text)::tsquery'
- output_field = SearchQueryField()
-
-
-class TSV(Func):
+class UnaccentSearchVector(Func):
+ '''
+ We do the indexing twice, to account for non-diacritic versions.
+ For example: user enters 'róże' -> stem to 'róża' -> unaccent to 'roza'.
+ But user enters 'roze' -> stem leaves it as is, so we need original form in the vector.
+ '''
function='to_tsvector'
template = '''unaccent(
%(function)s('polish', %(expressions)s)::text)::tsvector ||
'polish_simple',
unaccent(%(expressions)s)
)'''
-
-
-def build_search_vector(*fields):
- return TSV(*fields)
-
-
-def build_search_query(*fields, **kwargs):
- return UnaccentTSQuery(SearchQuery(*fields, **kwargs))
-
-
-
-class SearchHeadline(SH):
-
- def __init__(
- self,
- expression,
- query,
- *,
- config=None,
- start_sel=None,
- stop_sel=None,
- max_words=None,
- min_words=None,
- short_word=None,
- highlight_all=None,
- max_fragments=None,
- fragment_delimiter=None,
- ):
- options = {
- "StartSel": start_sel,
- "StopSel": stop_sel,
- "MaxWords": max_words,
- "MinWords": min_words,
- "ShortWord": short_word,
- "HighlightAll": highlight_all,
- "MaxFragments": max_fragments,
- "FragmentDelimiter": fragment_delimiter,
- }
- self.options = {
- option: value for option, value in options.items() if value is not None
- }
- expressions = (expression, query)
- if config is not None:
- config = SearchConfig.from_parameter(config)
- expressions = (config,) + expressions
- Func.__init__(self, *expressions)
+ output_field = SearchVectorField()