raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id))
self._hits += other._hits
if other.score > self.score:
- self.score = other.score
+ self._score = other._score
return self
def get_book(self):
tokens = self.search.get_tokens(self.searched, 'POLISH', cached=self.tokens_cache)
for theme in themes:
name_tokens = self.search.get_tokens(theme.name, 'POLISH')
- print "THEME HIT: %s in %s" % (tokens, name_tokens)
for t in tokens:
if t in name_tokens:
if not theme in themes_hit:
return q
def search_phrase(self, searched, field, book=True, max_results=20, fuzzy=False,
- filters=None, tokens_cache=None, boost=None, snippets=False):
+ filters=None, tokens_cache=None, boost=None, snippets=False, slop=2):
if filters is None: filters = []
if tokens_cache is None: tokens_cache = {}
tokens = self.get_tokens(searched, field, cached=tokens_cache)
- query = self.make_phrase(tokens, field=field, fuzzy=fuzzy)
+ query = self.make_phrase(tokens, field=field, fuzzy=fuzzy, slop=slop)
if book:
filters.append(self.term_filter(Term('is_book', 'true')))
top = self.searcher.search(query, self.chain_filters(filters), max_results)
return [SearchResult(self, found, snippets=(snippets and self.get_snippets(found, query) or None), searched=searched) for found in top.scoreDocs]
def search_some(self, searched, fields, book=True, max_results=20, fuzzy=False,
- filters=None, tokens_cache=None, boost=None):
+ filters=None, tokens_cache=None, boost=None, snippets=True):
if filters is None: filters = []
if tokens_cache is None: tokens_cache = {}
top = self.searcher.search(query, self.chain_filters(filters), max_results)
return [SearchResult(self, found, searched=searched, tokens_cache=tokens_cache,
- snippets=self.get_snippets(found, query)) for found in top.scoreDocs]
+ snippets=(snippets and self.get_snippets(found, query) or None)) for found in top.scoreDocs]
def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
"""
b2.boost *= 1.1
if bks is []:
author_title_rest.append(b)
-
- text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False))
-
+
+ # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
+ # Because the query is using only one field.
+ text_phrase = SearchResult.aggregate(
+ srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
+ srch.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
+
everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
def already_found(results):
def f(e):
for r in results:
if e.book_id == r.book_id:
+ e.boost = 0.9
results.append(e)
return True
return False
return f
- f = already_found(author_results + title_results)
+ f = already_found(author_results + title_results + text_phrase)
everywhere = filter(lambda x: not f(x), everywhere)
author_results = SearchResult.aggregate(author_results)