From e4381ce9a2094f3075b3cf81fe124081767c28a4 Mon Sep 17 00:00:00 2001 From: Jan Szejko Date: Tue, 20 Feb 2018 15:56:09 +0100 Subject: [PATCH] limited stopwords + better search by author + remove descendant results --- doc/schema.xml | 22 ++++++++++++++++------ src/search/index.py | 24 ++++++++++++++++++++++-- src/search/views.py | 8 ++++++-- 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/doc/schema.xml b/doc/schema.xml index 56172d3ec..ccf9bd5e6 100644 --- a/doc/schema.xml +++ b/doc/schema.xml @@ -134,7 +134,7 @@ - + @@ -143,10 +143,10 @@ - + - + @@ -159,7 +159,7 @@ - + @@ -1073,13 +1073,23 @@ - + + + + + + + + + + + + - diff --git a/src/search/index.py b/src/search/index.py index ab3286aee..102601463 100644 --- a/src/search/index.py +++ b/src/search/index.py @@ -558,6 +558,17 @@ class SearchResult(object): self._hits.append(hit) + @classmethod + def from_book(cls, book, how_found=None, query_terms=None): + doc = { + 'score': book.popularity.count, + 'book_id': book.id, + 'published_date': 0, + } + result = cls(doc, how_found=how_found, query_terms=query_terms) + result._book = book + return result + def __unicode__(self): return u"" % \ (self.book_id, len(self._hits), @@ -575,7 +586,7 @@ class SearchResult(object): if self.book_id != other.book_id: raise ValueError("this search result is for book %d; tried to merge with %d" % (self.book_id, other.book_id)) self._hits += other._hits - self._score += max(other._score, 0) + 0.5 + self._score += max(other._score, 0) return self def get_book(self): @@ -734,10 +745,19 @@ class Search(SolrIndex): return q + def search_by_author(self, words): + from catalogue.models import Book + books = Book.objects.filter(parent=None).order_by('-popularity__count') + for word in words: + books = books.filter(cached_author__iregex='\m%s\M' % word).select_related('popularity__count') + return [SearchResult.from_book(book, how_found='search_by_author', query_terms=words) for book in books[:30]] + def search_words(self, words, fields, book=True): + if book and fields == ['authors']: + return self.search_by_author(words) filters = [] for word in words: - if word not in stopwords: + if book or (word not in stopwords): word_filter = None for field in fields: q = self.index.Q(**{field: word}) diff --git a/src/search/views.py b/src/search/views.py index bda24b0dd..f7f60407d 100644 --- a/src/search/views.py +++ b/src/search/views.py @@ -142,8 +142,8 @@ def main(request): (['metadata'], True), (['text', 'themes_pl'], False), ) - for fieldset, is_book in fieldsets: - search_fields += fieldset + for fields, is_book in fieldsets: + search_fields += fields results_parts.append(search.search_words(words, search_fields, book=is_book)) results = [] @@ -157,6 +157,10 @@ def main(request): results.append(result) ids_results[book_id] = result + descendant_ids = set( + Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True)) + results = [result for result in results if result.book_id not in descendant_ids] + for result in results: search.get_snippets(result, query, num=3) -- 2.20.1