+class Search(SolrIndex):
+ """
+ Search facilities.
+ """
+ def __init__(self, default_field="text"):
+ super(Search, self).__init__()
+
+ # def get_tokens(self, searched, field='text', cached=None):
+ # """returns tokens analyzed by a proper (for a field) analyzer
+ # argument can be: StringReader, string/unicode, or tokens. In the last case
+ # they will just be returned (so we can reuse tokens, if we don't change the analyzer)
+ # """
+ # if cached is not None and field in cached:
+ # return cached[field]
+
+ # if isinstance(searched, str) or isinstance(searched, unicode):
+ # searched = StringReader(searched)
+ # elif isinstance(searched, list):
+ # return searched
+
+ # searched.reset()
+ # tokens = self.analyzer.reusableTokenStream(field, searched)
+ # toks = []
+ # while tokens.incrementToken():
+ # cta = tokens.getAttribute(CharTermAttribute.class_)
+ # toks.append(cta.toString())
+
+ # if cached is not None:
+ # cached[field] = toks
+
+ # return toks
+
+ # @staticmethod
+ # def fuzziness(fuzzy):
+ # """Helper method to sanitize fuzziness"""
+ # if not fuzzy:
+ # return None
+ # if isinstance(fuzzy, float) and fuzzy > 0.0 and fuzzy <= 1.0:
+ # return fuzzy
+ # else:
+ # return 0.5
+
+ # def make_phrase(self, tokens, field='text', slop=2, fuzzy=False):
+ # """
+ # Return a PhraseQuery with a series of tokens.
+ # """
+ # if fuzzy:
+ # phrase = MultiPhraseQuery()
+ # for t in tokens:
+ # term = Term(field, t)
+ # fuzzterm = FuzzyTermEnum(self.searcher.getIndexReader(), term, self.fuzziness(fuzzy))
+ # fuzzterms = []
+
+ # while True:
+ # ft = fuzzterm.term()
+ # if ft:
+ # fuzzterms.append(ft)
+ # if not fuzzterm.next(): break
+ # if fuzzterms:
+ # phrase.add(JArray('object')(fuzzterms, Term))
+ # else:
+ # phrase.add(term)
+ # else:
+ # phrase = PhraseQuery()
+ # phrase.setSlop(slop)
+ # for t in tokens:
+ # term = Term(field, t)
+ # phrase.add(term)
+ # return phrase
+
+ def make_term_query(self, query, field='text', modal=operator.or_):
+ """
+ Returns term queries joined by boolean query.
+ modal - applies to boolean query
+ fuzzy - should the query by fuzzy.
+ """
+ q = self.index.Q()
+ q = reduce(modal, map(lambda s: self.index.Q(**{field: s}),
+ query.split(r" ")), q)
+
+ return q
+
+ def search_phrase(self, searched, field='text', book=False,
+ filters=None,
+ snippets=False):
+ if filters is None: filters = []
+ if book: filters.append(self.index.Q(is_book=True))
+
+ q = self.index.query(**{field: searched})
+ q = self.apply_filters(q, filters).field_limit(score=True, all_fields=True)
+ res = q.execute()
+ return [SearchResult(found, how_found=u'search_phrase') for found in res]
+
+ def search_some(self, searched, fields, book=True,
+ filters=None,
+ snippets=True):
+ assert isinstance(fields, list)
+ if filters is None: filters = []
+ if book: filters.append(self.index.Q(is_book=True))
+
+ query = self.index.Q()
+
+ for fld in fields:
+ query = self.index.Q(query | self.make_term_query(searched, fld))
+
+ query = self.index.query(query)
+ query = self.apply_filters(query, filters).field_limit(score=True, all_fields=True)
+ res = query.execute()
+ return [SearchResult(found, how_found='search_some') for found in res]
+
+ # def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
+ # """
+ # Search for perfect book matches. Just see if the query matches with some author or title,
+ # taking hints into account.
+ # """
+ # fields_to_search = ['authors', 'title']
+ # only_in = None
+ # if hint:
+ # if not hint.should_search_for_book():
+ # return []
+ # fields_to_search = hint.just_search_in(fields_to_search)
+ # only_in = hint.book_filter()
+
+ # qrys = [self.make_phrase(self.get_tokens(searched, field=fld), field=fld, fuzzy=fuzzy) for fld in fields_to_search]
+
+ # books = []
+ # for q in qrys:
+ # top = self.searcher.search(q,
+ # self.chain_filters([only_in, self.term_filter(Term('is_book', 'true'))]),
+ # max_results)
+ # for found in top.scoreDocs:
+ # books.append(SearchResult(self, found, how_found="search_perfect_book"))
+ # return books
+
+ # def search_book(self, searched, max_results=20, fuzzy=False, hint=None):
+ # fields_to_search = ['tags', 'authors', 'title']
+
+ # only_in = None
+ # if hint:
+ # if not hint.should_search_for_book():
+ # return []
+ # fields_to_search = hint.just_search_in(fields_to_search)
+ # only_in = hint.book_filter()
+
+ # tokens = self.get_tokens(searched, field='SIMPLE')
+
+ # q = BooleanQuery()
+
+ # for fld in fields_to_search:
+ # q.add(BooleanClause(self.make_term_query(tokens, field=fld,
+ # fuzzy=fuzzy), BooleanClause.Occur.SHOULD))
+
+ # books = []
+ # top = self.searcher.search(q,
+ # self.chain_filters([only_in, self.term_filter(Term('is_book', 'true'))]),
+ # max_results)
+ # for found in top.scoreDocs:
+ # books.append(SearchResult(self, found, how_found="search_book"))
+
+ # return books
+
+ # def search_perfect_parts(self, searched, max_results=20, fuzzy=False, hint=None):
+ # """
+ # Search for book parts which contains a phrase perfectly matching (with a slop of 2, default for make_phrase())
+ # some part/fragment of the book.
+ # """
+ # qrys = [self.make_phrase(self.get_tokens(searched), field=fld, fuzzy=fuzzy) for fld in ['text']]
+
+ # flt = None
+ # if hint:
+ # flt = hint.part_filter()
+
+ # books = []
+ # for q in qrys:
+ # top = self.searcher.search(q,
+ # self.chain_filters([self.term_filter(Term('is_book', 'true'), inverse=True),
+ # flt]),
+ # max_results)
+ # for found in top.scoreDocs:
+ # books.append(SearchResult(self, found, snippets=self.get_snippets(found, q), how_found='search_perfect_parts'))
+
+ # return books
+
+ def search_everywhere(self, searched):
+ """
+ Tries to use search terms to match different fields of book (or its parts).
+ E.g. one word can be an author survey, another be a part of the title, and the rest
+ are some words from third chapter.
+ """