search fixes

author Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>

Fri, 27 Jan 2012 11:58:04 +0000 (12:58 +0100)

committer Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>

Fri, 27 Jan 2012 11:58:37 +0000 (12:58 +0100)
author Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Fri, 27 Jan 2012 11:58:04 +0000 (12:58 +0100)
committer Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Fri, 27 Jan 2012 11:58:37 +0000 (12:58 +0100)
diff --git a/apps/search/index.py b/apps/search/index.py

index 33836ad..9617077 100644 (file)
--- a/apps/search/index.py
+++ b/apps/search/index.py
@@ -642,7 +642,7 @@ class SearchResult(object):
              raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id))
          self._hits += other._hits
          if other.score > self.score:
-            self.score = other.score
+            self._score = other._score
          return self
  
      def get_book(self):
@@ -714,7 +714,6 @@ class SearchResult(object):
                  tokens = self.search.get_tokens(self.searched, 'POLISH', cached=self.tokens_cache)
                  for theme in themes:
                      name_tokens = self.search.get_tokens(theme.name, 'POLISH')
-                    print "THEME HIT: %s in %s" % (tokens, name_tokens)
                      for t in tokens:
                          if t in name_tokens:
                              if not theme in themes_hit:
@@ -967,13 +966,13 @@ class Search(IndexStore):
          return q
  
      def search_phrase(self, searched, field, book=True, max_results=20, fuzzy=False,
-                      filters=None, tokens_cache=None, boost=None, snippets=False):
+                      filters=None, tokens_cache=None, boost=None, snippets=False, slop=2):
          if filters is None: filters = []
          if tokens_cache is None: tokens_cache = {}
  
          tokens = self.get_tokens(searched, field, cached=tokens_cache)
  
-        query = self.make_phrase(tokens, field=field, fuzzy=fuzzy)
+        query = self.make_phrase(tokens, field=field, fuzzy=fuzzy, slop=slop)
          if book:
              filters.append(self.term_filter(Term('is_book', 'true')))
          top = self.searcher.search(query, self.chain_filters(filters), max_results)
@@ -981,7 +980,7 @@ class Search(IndexStore):
          return [SearchResult(self, found, snippets=(snippets and self.get_snippets(found, query) or None), searched=searched) for found in top.scoreDocs]
  
      def search_some(self, searched, fields, book=True, max_results=20, fuzzy=False,
-                    filters=None, tokens_cache=None, boost=None):
+                    filters=None, tokens_cache=None, boost=None, snippets=True):
          if filters is None: filters = []
          if tokens_cache is None: tokens_cache = {}
  
@@ -999,7 +998,7 @@ class Search(IndexStore):
          top = self.searcher.search(query, self.chain_filters(filters), max_results)
  
          return [SearchResult(self, found, searched=searched, tokens_cache=tokens_cache,
-                             snippets=self.get_snippets(found, query)) for found in top.scoreDocs]
+                             snippets=(snippets and self.get_snippets(found, query) or None)) for found in top.scoreDocs]
  
      def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
          """
diff --git a/apps/search/templatetags/search_tags.py b/apps/search/templatetags/search_tags.py

index 0e20913..97deb9d 100644 (file)
--- a/apps/search/templatetags/search_tags.py
+++ b/apps/search/templatetags/search_tags.py
@@ -36,7 +36,7 @@ def book_searched(context, result):
      # We don't need hits which lead to sections but do not have
      # snippets.
      hits = filter(lambda h: 'fragment' in h or
-                  h['snippets'], result.hits)
+                  h['snippets'], result.hits)[0:5]
  
      for hit in hits:
          hit['snippets'] = map(lambda s: s.replace("\n", "<br />").replace('---', '&mdash;'), hit['snippets'])
@@ -49,3 +49,4 @@ def book_searched(context, result):
          'hits': hits,
          'main_link': book.get_absolute_url(),
      }
+
diff --git a/apps/search/views.py b/apps/search/views.py

index 052b2f1..cf00870 100644 (file)
--- a/apps/search/views.py
+++ b/apps/search/views.py
@@ -137,20 +137,25 @@ def main(request):
                  b2.boost *= 1.1
              if bks is []:
                  author_title_rest.append(b)
-        
-        text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False))
-        
+
+        # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
+        # Because the query is using only one field.
+        text_phrase = SearchResult.aggregate(
+            srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
+            srch.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
+
          everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
  
          def already_found(results):
              def f(e):
                  for r in results:
                      if e.book_id == r.book_id:
+                        e.boost = 0.9
                          results.append(e)
                          return True
                  return False
              return f
-        f = already_found(author_results + title_results)
+        f = already_found(author_results + title_results + text_phrase)
          everywhere = filter(lambda x: not f(x), everywhere)
  
          author_results = SearchResult.aggregate(author_results)
diff --git a/wolnelektury/templates/catalogue/search_multiple_hits.html b/wolnelektury/templates/catalogue/search_multiple_hits.html

index ed06c94..efe6d79 100644 (file)
--- a/wolnelektury/templates/catalogue/search_multiple_hits.html
+++ b/wolnelektury/templates/catalogue/search_multiple_hits.html
@@ -12,7 +12,6 @@
      {% endif %}
      <!-- tu pójdą trafienia w tagi: Autorzy - z description oraz motywy i rodzaje (z book_count) -->
  
-    {% spaceless %}
      {% if results.author %}
      <div class="book-list-header">
        <div class="book-box-inner">
@@ -80,7 +79,6 @@
        </ol>
      </div>
      {% endif %}
-    {% endspaceless %}
author	Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
	Fri, 27 Jan 2012 11:58:04 +0000 (12:58 +0100)
committer	Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
	Fri, 27 Jan 2012 11:58:37 +0000 (12:58 +0100)
apps/search/index.py		patch \| blob \| history
apps/search/templatetags/search_tags.py		patch \| blob \| history
apps/search/views.py		patch \| blob \| history
wolnelektury/templates/catalogue/search_multiple_hits.html		patch \| blob \| history