X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/224e9f9ea36a6c39466fdb13cbc8c06bedfe8c76..5fce7f7cad84675043cf56fce5163e1553d5b6fa:/src/search/utils.py diff --git a/src/search/utils.py b/src/search/utils.py index b2cbe8d94..77ff1ae11 100644 --- a/src/search/utils.py +++ b/src/search/utils.py @@ -1,79 +1,31 @@ +from django.conf import settings from django.db.models import Func -from django.contrib.postgres.search import SearchVector, SearchQuery, SearchQueryField, SearchHeadline as SH - - - -class UnaccentTSVector(Func): - function = 'UNACCENT' - template = '%(function)s(%(expressions)s::text)::tsvector' - - -class Unaccent(Func): - function = 'UNACCENT' - - -class ConcatTSVector(Func): - function = 'CONCAT' - template = '%(function)s(%(expressions)s)::tsvector' - - -class UnaccentTSQuery(Func): - function = 'UNACCENT' - template = '%(function)s(%(expressions)s::text)::tsquery' - output_field = SearchQueryField() - - -class TSV(Func): +from django.contrib.postgres.search import SearchQuery, SearchVectorField + + +class UnaccentSearchQuery(SearchQuery): + ''' + The idea is to run unaccent *after* the query is already passed through the language dictionary. + ''' + def as_sql(self, *args, **kwargs): + sql, params = super().as_sql(*args, **kwargs) + if settings.SEARCH_USE_UNACCENT: + sql = f'unaccent({sql}::text)::tsquery' + return sql, params + + +class UnaccentSearchVector(Func): + ''' + We do the indexing twice, to account for non-diacritic versions. + For example: user enters 'róże' -> stem to 'róża' -> unaccent to 'roza'. + But user enters 'roze' -> stem leaves it as is, so we need original form in the vector. + ''' function='to_tsvector' - template = '''unaccent( - %(function)s('polish', %(expressions)s)::text)::tsvector || - to_tsvector( - 'polish_simple', - unaccent(%(expressions)s) - )''' - - -def build_search_vector(*fields): - return TSV(*fields) - - -def build_search_query(*fields, **kwargs): - return UnaccentTSQuery(SearchQuery(*fields, **kwargs)) - - - -class SearchHeadline(SH): - - def __init__( - self, - expression, - query, - *, - config=None, - start_sel=None, - stop_sel=None, - max_words=None, - min_words=None, - short_word=None, - highlight_all=None, - max_fragments=None, - fragment_delimiter=None, - ): - options = { - "StartSel": start_sel, - "StopSel": stop_sel, - "MaxWords": max_words, - "MinWords": min_words, - "ShortWord": short_word, - "HighlightAll": highlight_all, - "MaxFragments": max_fragments, - "FragmentDelimiter": fragment_delimiter, - } - self.options = { - option: value for option, value in options.items() if value is not None - } - expressions = (expression, query) - if config is not None: - config = SearchConfig.from_parameter(config) - expressions = (config,) + expressions - Func.__init__(self, *expressions) + if settings.SEARCH_USE_UNACCENT: + template = f'''unaccent( + %(function)s('{settings.SEARCH_CONFIG}', %(expressions)s)::text)::tsvector || + to_tsvector( + '{settings.SEARCH_CONFIG_SIMPLE}', + unaccent(%(expressions)s) + )''' + output_field = SearchVectorField()