From 42ddf46506573c753210e81f962ea14917df7cc8 Mon Sep 17 00:00:00 2001 From: Jan Szejko Date: Mon, 26 Feb 2018 17:02:22 +0100 Subject: [PATCH] single search query with boosts - stub --- src/search/index.py | 59 ++++++++++++++++++++++---------------- src/search/views.py | 70 +++++++++++++++++++++++---------------------- 2 files changed, 70 insertions(+), 59 deletions(-) diff --git a/src/search/index.py b/src/search/index.py index a1c2716a8..3bb778152 100644 --- a/src/search/index.py +++ b/src/search/index.py @@ -897,36 +897,45 @@ class Search(SolrIndex): books = books.filter(cached_author__iregex='\m%s\M' % word).select_related('popularity__count') return [SearchResult.from_book(book, how_found='search_by_author', query_terms=words) for book in books[:30]] - def search_words(self, words, fields, required=None, book=True, picture=False): - if book and not picture and fields == ['authors']: - return self.search_by_author(words) + def search_words(self, words, fieldsets, picture=False): + # if book and not picture and fields == ['authors']: + # return self.search_by_author(words) filters = [] for word in words: - if book or picture or (word not in stopwords): - word_filter = None - for field in fields: - q = self.index.Q(**{field: word}) - if word_filter is None: - word_filter = q - else: - word_filter |= q - filters.append(word_filter) - if required: - required_filter = None - for field in required: - for word in words: - if book or picture or (word not in stopwords): + word_filter = None + for fields, book, boost in fieldsets: + if book or picture or (word not in stopwords): + fieldset_filter = None + for field in fields: q = self.index.Q(**{field: word}) - if required_filter is None: - required_filter = q + if fieldset_filter is None: + fieldset_filter = q else: - required_filter |= q - filters.append(required_filter) - if not filters: - return [] + fieldset_filter |= q + if book: + fieldset_filter &= self.index.Q(is_book=True) + fieldset_filter = fieldset_filter**boost + if word_filter is None: + word_filter = fieldset_filter + else: + word_filter |= fieldset_filter + filters.append(word_filter) + # if required: + # required_filter = None + # for field in required: + # for word in words: + # if book or picture or (word not in stopwords): + # q = self.index.Q(**{field: word}) + # if required_filter is None: + # required_filter = q + # else: + # required_filter |= q + # filters.append(required_filter) + # if not filters: + # return [] params = {} - if book: - params['is_book'] = True + # if book: + # params['is_book'] = True if picture: params['picture_id__gt'] = 0 else: diff --git a/src/search/views.py b/src/search/views.py index 6cb6bc53e..8c02131bc 100644 --- a/src/search/views.py +++ b/src/search/views.py @@ -147,28 +147,29 @@ def main(request): def search_books(query): search = Search() - results_parts = [] - search_fields = [] + # results_parts = [] + # search_fields = [] words = query.split() fieldsets = ( - (['authors'], True), - (['title'], True), - (['metadata'], True), - (['text', 'themes_pl'], False), + (['authors'], True, 8), + (['title'], True, 4), + (['metadata'], True, 2), + (['text', 'themes_pl'], False, 1), ) - for fields, is_book in fieldsets: - search_fields += fields - results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book)) + # for fields, is_book in fieldsets: + # search_fields += fields + # results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book)) + query_results = search.search_words(words, fieldsets) results = [] ids_results = {} - for results_part in results_parts: - for result in sorted(SearchResult.aggregate(results_part), reverse=True): - book_id = result.book_id - if book_id in ids_results: - ids_results[book_id].merge(result) - else: - results.append(result) - ids_results[book_id] = result + # for results_part in results_parts: + for result in sorted(SearchResult.aggregate(query_results), reverse=True): + book_id = result.book_id + if book_id in ids_results: + ids_results[book_id].merge(result) + else: + results.append(result) + ids_results[book_id] = result descendant_ids = set( Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True)) results = [result for result in results if result.book_id not in descendant_ids] @@ -187,28 +188,29 @@ def search_books(query): def search_pictures(query): search = Search() - results_parts = [] - search_fields = [] + # results_parts = [] + # search_fields = [] words = query.split() fieldsets = ( - (['authors'], True), - (['title'], True), - (['metadata'], True), - (['themes_pl'], False), + (['authors'], True, 8), + (['title'], True, 4), + (['metadata'], True, 2), + (['themes_pl'], False, 1), ) - for fields, is_book in fieldsets: - search_fields += fields - results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True)) + # for fields, is_book in fieldsets: + # search_fields += fields + # results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True)) + query_results = search.search_words(words, fieldsets, picture=True) results = [] ids_results = {} - for results_part in results_parts: - for result in sorted(PictureResult.aggregate(results_part), reverse=True): - picture_id = result.picture_id - if picture_id in ids_results: - ids_results[picture_id].merge(result) - else: - results.append(result) - ids_results[picture_id] = result + # for results_part in results_parts: + for result in sorted(PictureResult.aggregate(query_results), reverse=True): + picture_id = result.picture_id + if picture_id in ids_results: + ids_results[picture_id].merge(result) + else: + results.append(result) + ids_results[picture_id] = result def ensure_exists(r): try: -- 2.20.1