search fixes
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Fri, 27 Jan 2012 11:58:04 +0000 (12:58 +0100)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Fri, 27 Jan 2012 11:58:37 +0000 (12:58 +0100)
apps/search/index.py
apps/search/templatetags/search_tags.py
apps/search/views.py
wolnelektury/templates/catalogue/search_multiple_hits.html

index 33836ad..9617077 100644 (file)
@@ -642,7 +642,7 @@ class SearchResult(object):
             raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id))
         self._hits += other._hits
         if other.score > self.score:
-            self.score = other.score
+            self._score = other._score
         return self
 
     def get_book(self):
@@ -714,7 +714,6 @@ class SearchResult(object):
                 tokens = self.search.get_tokens(self.searched, 'POLISH', cached=self.tokens_cache)
                 for theme in themes:
                     name_tokens = self.search.get_tokens(theme.name, 'POLISH')
-                    print "THEME HIT: %s in %s" % (tokens, name_tokens)
                     for t in tokens:
                         if t in name_tokens:
                             if not theme in themes_hit:
@@ -967,13 +966,13 @@ class Search(IndexStore):
         return q
 
     def search_phrase(self, searched, field, book=True, max_results=20, fuzzy=False,
-                      filters=None, tokens_cache=None, boost=None, snippets=False):
+                      filters=None, tokens_cache=None, boost=None, snippets=False, slop=2):
         if filters is None: filters = []
         if tokens_cache is None: tokens_cache = {}
 
         tokens = self.get_tokens(searched, field, cached=tokens_cache)
 
-        query = self.make_phrase(tokens, field=field, fuzzy=fuzzy)
+        query = self.make_phrase(tokens, field=field, fuzzy=fuzzy, slop=slop)
         if book:
             filters.append(self.term_filter(Term('is_book', 'true')))
         top = self.searcher.search(query, self.chain_filters(filters), max_results)
@@ -981,7 +980,7 @@ class Search(IndexStore):
         return [SearchResult(self, found, snippets=(snippets and self.get_snippets(found, query) or None), searched=searched) for found in top.scoreDocs]
 
     def search_some(self, searched, fields, book=True, max_results=20, fuzzy=False,
-                    filters=None, tokens_cache=None, boost=None):
+                    filters=None, tokens_cache=None, boost=None, snippets=True):
         if filters is None: filters = []
         if tokens_cache is None: tokens_cache = {}
 
@@ -999,7 +998,7 @@ class Search(IndexStore):
         top = self.searcher.search(query, self.chain_filters(filters), max_results)
 
         return [SearchResult(self, found, searched=searched, tokens_cache=tokens_cache,
-                             snippets=self.get_snippets(found, query)) for found in top.scoreDocs]
+                             snippets=(snippets and self.get_snippets(found, query) or None)) for found in top.scoreDocs]
 
     def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
         """
index 0e20913..97deb9d 100644 (file)
@@ -36,7 +36,7 @@ def book_searched(context, result):
     # We don't need hits which lead to sections but do not have
     # snippets.
     hits = filter(lambda h: 'fragment' in h or
-                  h['snippets'], result.hits)
+                  h['snippets'], result.hits)[0:5]
 
     for hit in hits:
         hit['snippets'] = map(lambda s: s.replace("\n", "<br />").replace('---', '&mdash;'), hit['snippets'])
@@ -49,3 +49,4 @@ def book_searched(context, result):
         'hits': hits,
         'main_link': book.get_absolute_url(),
     }
+
index 052b2f1..cf00870 100644 (file)
@@ -137,20 +137,25 @@ def main(request):
                 b2.boost *= 1.1
             if bks is []:
                 author_title_rest.append(b)
-        
-        text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False))
-        
+
+        # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
+        # Because the query is using only one field.
+        text_phrase = SearchResult.aggregate(
+            srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
+            srch.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
+
         everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 
         def already_found(results):
             def f(e):
                 for r in results:
                     if e.book_id == r.book_id:
+                        e.boost = 0.9
                         results.append(e)
                         return True
                 return False
             return f
-        f = already_found(author_results + title_results)
+        f = already_found(author_results + title_results + text_phrase)
         everywhere = filter(lambda x: not f(x), everywhere)
 
         author_results = SearchResult.aggregate(author_results)
index ed06c94..efe6d79 100644 (file)
@@ -12,7 +12,6 @@
     {% endif %}
     <!-- tu pójdą trafienia w tagi: Autorzy - z description oraz motywy i rodzaje (z book_count) -->
 
-    {% spaceless %}
     {% if results.author %}
     <div class="book-list-header">
       <div class="book-box-inner">
@@ -80,7 +79,6 @@
       </ol>
     </div>
     {% endif %}
-    {% endspaceless %}