From: Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Date: Fri, 27 Jan 2012 11:58:04 +0000 (+0100)
Subject: search fixes
X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/386957d4f29d34d557a62065575a40c83d7d3ede?ds=sidebyside;hp=--cc

search fixes
---

386957d4f29d34d557a62065575a40c83d7d3ede
diff --git a/apps/search/index.py b/apps/search/index.py
index 33836ad37..9617077d9 100644
--- a/apps/search/index.py
+++ b/apps/search/index.py
@@ -642,7 +642,7 @@ class SearchResult(object):
             raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id))
         self._hits += other._hits
         if other.score > self.score:
-            self.score = other.score
+            self._score = other._score
         return self
 
     def get_book(self):
@@ -714,7 +714,6 @@ class SearchResult(object):
                 tokens = self.search.get_tokens(self.searched, 'POLISH', cached=self.tokens_cache)
                 for theme in themes:
                     name_tokens = self.search.get_tokens(theme.name, 'POLISH')
-                    print "THEME HIT: %s in %s" % (tokens, name_tokens)
                     for t in tokens:
                         if t in name_tokens:
                             if not theme in themes_hit:
@@ -967,13 +966,13 @@ class Search(IndexStore):
         return q
 
     def search_phrase(self, searched, field, book=True, max_results=20, fuzzy=False,
-                      filters=None, tokens_cache=None, boost=None, snippets=False):
+                      filters=None, tokens_cache=None, boost=None, snippets=False, slop=2):
         if filters is None: filters = []
         if tokens_cache is None: tokens_cache = {}
 
         tokens = self.get_tokens(searched, field, cached=tokens_cache)
 
-        query = self.make_phrase(tokens, field=field, fuzzy=fuzzy)
+        query = self.make_phrase(tokens, field=field, fuzzy=fuzzy, slop=slop)
         if book:
             filters.append(self.term_filter(Term('is_book', 'true')))
         top = self.searcher.search(query, self.chain_filters(filters), max_results)
@@ -981,7 +980,7 @@ class Search(IndexStore):
         return [SearchResult(self, found, snippets=(snippets and self.get_snippets(found, query) or None), searched=searched) for found in top.scoreDocs]
 
     def search_some(self, searched, fields, book=True, max_results=20, fuzzy=False,
-                    filters=None, tokens_cache=None, boost=None):
+                    filters=None, tokens_cache=None, boost=None, snippets=True):
         if filters is None: filters = []
         if tokens_cache is None: tokens_cache = {}
 
@@ -999,7 +998,7 @@ class Search(IndexStore):
         top = self.searcher.search(query, self.chain_filters(filters), max_results)
 
         return [SearchResult(self, found, searched=searched, tokens_cache=tokens_cache,
-                             snippets=self.get_snippets(found, query)) for found in top.scoreDocs]
+                             snippets=(snippets and self.get_snippets(found, query) or None)) for found in top.scoreDocs]
 
     def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
         """
diff --git a/apps/search/templatetags/search_tags.py b/apps/search/templatetags/search_tags.py
index 0e20913d5..97deb9d13 100644
--- a/apps/search/templatetags/search_tags.py
+++ b/apps/search/templatetags/search_tags.py
@@ -36,7 +36,7 @@ def book_searched(context, result):
     # We don't need hits which lead to sections but do not have
     # snippets.
     hits = filter(lambda h: 'fragment' in h or
-                  h['snippets'], result.hits)
+                  h['snippets'], result.hits)[0:5]
 
     for hit in hits:
         hit['snippets'] = map(lambda s: s.replace("\n", "<br />").replace('---', '&mdash;'), hit['snippets'])
@@ -49,3 +49,4 @@ def book_searched(context, result):
         'hits': hits,
         'main_link': book.get_absolute_url(),
     }
+
diff --git a/apps/search/views.py b/apps/search/views.py
index 052b2f1ce..cf008705d 100644
--- a/apps/search/views.py
+++ b/apps/search/views.py
@@ -137,20 +137,25 @@ def main(request):
                 b2.boost *= 1.1
             if bks is []:
                 author_title_rest.append(b)
-        
-        text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False))
-        
+
+        # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
+        # Because the query is using only one field.
+        text_phrase = SearchResult.aggregate(
+            srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
+            srch.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
+
         everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
 
         def already_found(results):
             def f(e):
                 for r in results:
                     if e.book_id == r.book_id:
+                        e.boost = 0.9
                         results.append(e)
                         return True
                 return False
             return f
-        f = already_found(author_results + title_results)
+        f = already_found(author_results + title_results + text_phrase)
         everywhere = filter(lambda x: not f(x), everywhere)
 
         author_results = SearchResult.aggregate(author_results)
diff --git a/wolnelektury/templates/catalogue/search_multiple_hits.html b/wolnelektury/templates/catalogue/search_multiple_hits.html
index ed06c94a8..efe6d7915 100644
--- a/wolnelektury/templates/catalogue/search_multiple_hits.html
+++ b/wolnelektury/templates/catalogue/search_multiple_hits.html
@@ -12,7 +12,6 @@
     {% endif %}
     <!-- tu pÃ³jdÄ trafienia w tagi: Autorzy - z description oraz motywy i rodzaje (z book_count) -->
 
-    {% spaceless %}
     {% if results.author %}
     <div class="book-list-header">
       <div class="book-box-inner">
@@ -80,7 +79,6 @@
       </ol>
     </div>
     {% endif %}
-    {% endspaceless %}