single search query with boosts - stub search-experimental
authorJan Szejko <janek37@gmail.com>
Mon, 26 Feb 2018 16:02:22 +0000 (17:02 +0100)
committerJan Szejko <janek37@gmail.com>
Mon, 26 Feb 2018 16:02:22 +0000 (17:02 +0100)
src/search/index.py
src/search/views.py

index a1c2716..3bb7781 100644 (file)
@@ -897,36 +897,45 @@ class Search(SolrIndex):
             books = books.filter(cached_author__iregex='\m%s\M' % word).select_related('popularity__count')
         return [SearchResult.from_book(book, how_found='search_by_author', query_terms=words) for book in books[:30]]
 
-    def search_words(self, words, fields, required=None, book=True, picture=False):
-        if book and not picture and fields == ['authors']:
-            return self.search_by_author(words)
+    def search_words(self, words, fieldsets, picture=False):
+        if book and not picture and fields == ['authors']:
+            return self.search_by_author(words)
         filters = []
         for word in words:
-            if book or picture or (word not in stopwords):
-                word_filter = None
-                for field in fields:
-                    q = self.index.Q(**{field: word})
-                    if word_filter is None:
-                        word_filter = q
-                    else:
-                        word_filter |= q
-                filters.append(word_filter)
-        if required:
-            required_filter = None
-            for field in required:
-                for word in words:
-                    if book or picture or (word not in stopwords):
+            word_filter = None
+            for fields, book, boost in fieldsets:
+                if book or picture or (word not in stopwords):
+                    fieldset_filter = None
+                    for field in fields:
                         q = self.index.Q(**{field: word})
-                        if required_filter is None:
-                            required_filter = q
+                        if fieldset_filter is None:
+                            fieldset_filter = q
                         else:
-                            required_filter |= q
-            filters.append(required_filter)
-        if not filters:
-            return []
+                            fieldset_filter |= q
+                    if book:
+                        fieldset_filter &= self.index.Q(is_book=True)
+                    fieldset_filter = fieldset_filter**boost
+                    if word_filter is None:
+                        word_filter = fieldset_filter
+                    else:
+                        word_filter |= fieldset_filter
+            filters.append(word_filter)
+        # if required:
+        #     required_filter = None
+        #     for field in required:
+        #         for word in words:
+        #             if book or picture or (word not in stopwords):
+        #                 q = self.index.Q(**{field: word})
+        #                 if required_filter is None:
+        #                     required_filter = q
+        #                 else:
+        #                     required_filter |= q
+        #     filters.append(required_filter)
+        # if not filters:
+        #     return []
         params = {}
-        if book:
-            params['is_book'] = True
+        if book:
+            params['is_book'] = True
         if picture:
             params['picture_id__gt'] = 0
         else:
index 6cb6bc5..8c02131 100644 (file)
@@ -147,28 +147,29 @@ def main(request):
 
 def search_books(query):
     search = Search()
-    results_parts = []
-    search_fields = []
+    results_parts = []
+    search_fields = []
     words = query.split()
     fieldsets = (
-        (['authors'], True),
-        (['title'], True),
-        (['metadata'], True),
-        (['text', 'themes_pl'], False),
+        (['authors'], True, 8),
+        (['title'], True, 4),
+        (['metadata'], True, 2),
+        (['text', 'themes_pl'], False, 1),
     )
-    for fields, is_book in fieldsets:
-        search_fields += fields
-        results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
+    # for fields, is_book in fieldsets:
+    #     search_fields += fields
+    #     results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book))
+    query_results = search.search_words(words, fieldsets)
     results = []
     ids_results = {}
-    for results_part in results_parts:
-        for result in sorted(SearchResult.aggregate(results_part), reverse=True):
-            book_id = result.book_id
-            if book_id in ids_results:
-                ids_results[book_id].merge(result)
-            else:
-                results.append(result)
-                ids_results[book_id] = result
+    for results_part in results_parts:
+    for result in sorted(SearchResult.aggregate(query_results), reverse=True):
+        book_id = result.book_id
+        if book_id in ids_results:
+            ids_results[book_id].merge(result)
+        else:
+            results.append(result)
+            ids_results[book_id] = result
     descendant_ids = set(
         Book.objects.filter(id__in=ids_results, ancestor__in=ids_results).values_list('id', flat=True))
     results = [result for result in results if result.book_id not in descendant_ids]
@@ -187,28 +188,29 @@ def search_books(query):
 
 def search_pictures(query):
     search = Search()
-    results_parts = []
-    search_fields = []
+    results_parts = []
+    search_fields = []
     words = query.split()
     fieldsets = (
-        (['authors'], True),
-        (['title'], True),
-        (['metadata'], True),
-        (['themes_pl'], False),
+        (['authors'], True, 8),
+        (['title'], True, 4),
+        (['metadata'], True, 2),
+        (['themes_pl'], False, 1),
     )
-    for fields, is_book in fieldsets:
-        search_fields += fields
-        results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
+    # for fields, is_book in fieldsets:
+    #     search_fields += fields
+    #     results_parts.append(search.search_words(words, search_fields, required=fields, book=is_book, picture=True))
+    query_results = search.search_words(words, fieldsets, picture=True)
     results = []
     ids_results = {}
-    for results_part in results_parts:
-        for result in sorted(PictureResult.aggregate(results_part), reverse=True):
-            picture_id = result.picture_id
-            if picture_id in ids_results:
-                ids_results[picture_id].merge(result)
-            else:
-                results.append(result)
-                ids_results[picture_id] = result
+    for results_part in results_parts:
+    for result in sorted(PictureResult.aggregate(query_results), reverse=True):
+        picture_id = result.picture_id
+        if picture_id in ids_results:
+            ids_results[picture_id].merge(result)
+        else:
+            results.append(result)
+            ids_results[picture_id] = result
 
     def ensure_exists(r):
         try: