From 9977ccb7fb21737b6f0e91f2eac264db9a59eea2 Mon Sep 17 00:00:00 2001 From: Jan Szejko Date: Mon, 19 Feb 2018 12:39:11 +0100 Subject: [PATCH] ignore stopwords in query --- src/search/index.py | 26 ++++++++++++++++++-------- src/wolnelektury/settings/basic.py | 1 + 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/search/index.py b/src/search/index.py index 4c278eab5..5cae3e3e6 100644 --- a/src/search/index.py +++ b/src/search/index.py @@ -20,6 +20,13 @@ from wolnelektury.utils import makedirs log = logging.getLogger('search') +if os.path.isfile(settings.SOLR_STOPWORDS): + stopwords = set( + line.decode('utf-8').strip() + for line in open(settings.SOLR_STOPWORDS) if not line.startswith('#')) +else: + stopwords = set() + class SolrIndex(object): def __init__(self, mode=None): @@ -731,14 +738,17 @@ class Search(SolrIndex): def search_words(self, words, fields, book=True): filters = [] for word in words: - word_filter = None - for field in fields: - q = self.index.Q(**{field: word}) - if word_filter is None: - word_filter = q - else: - word_filter |= q - filters.append(word_filter) + if word not in stopwords: + word_filter = None + for field in fields: + q = self.index.Q(**{field: word}) + if word_filter is None: + word_filter = q + else: + word_filter |= q + filters.append(word_filter) + if not filters: + return [] if book: query = self.index.query(is_book=True) else: diff --git a/src/wolnelektury/settings/basic.py b/src/wolnelektury/settings/basic.py index c9939b52c..32b48f299 100644 --- a/src/wolnelektury/settings/basic.py +++ b/src/wolnelektury/settings/basic.py @@ -26,6 +26,7 @@ DATABASES = { SOLR = "http://localhost:8983/solr/wl/" SOLR_TEST = "http://localhost:8983/solr/wl_test/" +SOLR_STOPWORDS = "/path/to/solr/data/conf/lang/stopwords_pl.txt" # Local time zone for this installation. Choices can be found here: # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name -- 2.20.1