X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/4cbcf8cac5e8cad592d9d84d76408249ccca8112..801a05d2ff33bb8a3c1a46ea0c657825b2787fa7:/src/search/utils.py?ds=sidebyside diff --git a/src/search/utils.py b/src/search/utils.py index b2cbe8d94..6c0acf594 100644 --- a/src/search/utils.py +++ b/src/search/utils.py @@ -1,29 +1,23 @@ from django.db.models import Func -from django.contrib.postgres.search import SearchVector, SearchQuery, SearchQueryField, SearchHeadline as SH +from django.contrib.postgres.search import SearchQuery, SearchVectorField +class UnaccentSearchQuery(SearchQuery): + ''' + The idea is to run unaccent *after* the query is already passed through the language dictionary. + ''' + def as_sql(self, *args, **kwargs): + sql, params = super().as_sql(*args, **kwargs) + sql = f'unaccent({sql}::text)::tsquery' + return sql, params -class UnaccentTSVector(Func): - function = 'UNACCENT' - template = '%(function)s(%(expressions)s::text)::tsvector' - -class Unaccent(Func): - function = 'UNACCENT' - - -class ConcatTSVector(Func): - function = 'CONCAT' - template = '%(function)s(%(expressions)s)::tsvector' - - -class UnaccentTSQuery(Func): - function = 'UNACCENT' - template = '%(function)s(%(expressions)s::text)::tsquery' - output_field = SearchQueryField() - - -class TSV(Func): +class UnaccentSearchVector(Func): + ''' + We do the indexing twice, to account for non-diacritic versions. + For example: user enters 'róże' -> stem to 'róża' -> unaccent to 'roza'. + But user enters 'roze' -> stem leaves it as is, so we need original form in the vector. + ''' function='to_tsvector' template = '''unaccent( %(function)s('polish', %(expressions)s)::text)::tsvector || @@ -31,49 +25,4 @@ class TSV(Func): 'polish_simple', unaccent(%(expressions)s) )''' - - -def build_search_vector(*fields): - return TSV(*fields) - - -def build_search_query(*fields, **kwargs): - return UnaccentTSQuery(SearchQuery(*fields, **kwargs)) - - - -class SearchHeadline(SH): - - def __init__( - self, - expression, - query, - *, - config=None, - start_sel=None, - stop_sel=None, - max_words=None, - min_words=None, - short_word=None, - highlight_all=None, - max_fragments=None, - fragment_delimiter=None, - ): - options = { - "StartSel": start_sel, - "StopSel": stop_sel, - "MaxWords": max_words, - "MinWords": min_words, - "ShortWord": short_word, - "HighlightAll": highlight_all, - "MaxFragments": max_fragments, - "FragmentDelimiter": fragment_delimiter, - } - self.options = { - option: value for option, value in options.items() if value is not None - } - expressions = (expression, query) - if config is not None: - config = SearchConfig.from_parameter(config) - expressions = (config,) + expressions - Func.__init__(self, *expressions) + output_field = SearchVectorField()