From: Marcin Koziej Date: Fri, 27 Jan 2012 11:58:04 +0000 (+0100) Subject: search fixes X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/386957d4f29d34d557a62065575a40c83d7d3ede search fixes --- diff --git a/apps/search/index.py b/apps/search/index.py index 33836ad37..9617077d9 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -642,7 +642,7 @@ class SearchResult(object): raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id)) self._hits += other._hits if other.score > self.score: - self.score = other.score + self._score = other._score return self def get_book(self): @@ -714,7 +714,6 @@ class SearchResult(object): tokens = self.search.get_tokens(self.searched, 'POLISH', cached=self.tokens_cache) for theme in themes: name_tokens = self.search.get_tokens(theme.name, 'POLISH') - print "THEME HIT: %s in %s" % (tokens, name_tokens) for t in tokens: if t in name_tokens: if not theme in themes_hit: @@ -967,13 +966,13 @@ class Search(IndexStore): return q def search_phrase(self, searched, field, book=True, max_results=20, fuzzy=False, - filters=None, tokens_cache=None, boost=None, snippets=False): + filters=None, tokens_cache=None, boost=None, snippets=False, slop=2): if filters is None: filters = [] if tokens_cache is None: tokens_cache = {} tokens = self.get_tokens(searched, field, cached=tokens_cache) - query = self.make_phrase(tokens, field=field, fuzzy=fuzzy) + query = self.make_phrase(tokens, field=field, fuzzy=fuzzy, slop=slop) if book: filters.append(self.term_filter(Term('is_book', 'true'))) top = self.searcher.search(query, self.chain_filters(filters), max_results) @@ -981,7 +980,7 @@ class Search(IndexStore): return [SearchResult(self, found, snippets=(snippets and self.get_snippets(found, query) or None), searched=searched) for found in top.scoreDocs] def search_some(self, searched, fields, book=True, max_results=20, fuzzy=False, - filters=None, tokens_cache=None, boost=None): + filters=None, tokens_cache=None, boost=None, snippets=True): if filters is None: filters = [] if tokens_cache is None: tokens_cache = {} @@ -999,7 +998,7 @@ class Search(IndexStore): top = self.searcher.search(query, self.chain_filters(filters), max_results) return [SearchResult(self, found, searched=searched, tokens_cache=tokens_cache, - snippets=self.get_snippets(found, query)) for found in top.scoreDocs] + snippets=(snippets and self.get_snippets(found, query) or None)) for found in top.scoreDocs] def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None): """ diff --git a/apps/search/templatetags/search_tags.py b/apps/search/templatetags/search_tags.py index 0e20913d5..97deb9d13 100644 --- a/apps/search/templatetags/search_tags.py +++ b/apps/search/templatetags/search_tags.py @@ -36,7 +36,7 @@ def book_searched(context, result): # We don't need hits which lead to sections but do not have # snippets. hits = filter(lambda h: 'fragment' in h or - h['snippets'], result.hits) + h['snippets'], result.hits)[0:5] for hit in hits: hit['snippets'] = map(lambda s: s.replace("\n", "
").replace('---', '—'), hit['snippets']) @@ -49,3 +49,4 @@ def book_searched(context, result): 'hits': hits, 'main_link': book.get_absolute_url(), } + diff --git a/apps/search/views.py b/apps/search/views.py index 052b2f1ce..cf008705d 100644 --- a/apps/search/views.py +++ b/apps/search/views.py @@ -137,20 +137,25 @@ def main(request): b2.boost *= 1.1 if bks is []: author_title_rest.append(b) - - text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False)) - + + # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere, + # Because the query is using only one field. + text_phrase = SearchResult.aggregate( + srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4), + srch.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False)) + everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache) def already_found(results): def f(e): for r in results: if e.book_id == r.book_id: + e.boost = 0.9 results.append(e) return True return False return f - f = already_found(author_results + title_results) + f = already_found(author_results + title_results + text_phrase) everywhere = filter(lambda x: not f(x), everywhere) author_results = SearchResult.aggregate(author_results) diff --git a/wolnelektury/templates/catalogue/search_multiple_hits.html b/wolnelektury/templates/catalogue/search_multiple_hits.html index ed06c94a8..efe6d7915 100644 --- a/wolnelektury/templates/catalogue/search_multiple_hits.html +++ b/wolnelektury/templates/catalogue/search_multiple_hits.html @@ -12,7 +12,6 @@ {% endif %} - {% spaceless %} {% if results.author %}
@@ -80,7 +79,6 @@
{% endif %} - {% endspaceless %}