- return cmp(self.score, other.score)
-
-
-class Hint(object):
- def __init__(self, search):
- self.search = search
- self.book_tags = {}
- self.part_tags = []
- self._books = []
-
- def books(self, *books):
- self._books = books
-
- def tags(self, tags):
- for t in tags:
- if t.category in ['author', 'title', 'epoch', 'genre', 'kind']:
- lst = self.book_tags.get(t.category, [])
- lst.append(t)
- self.book_tags[t.category] = lst
- if t.category in ['theme']:
- self.part_tags.append(t)
-
- def tag_filter(self, tags, field='tags'):
- q = BooleanQuery()
-
- for tag in tags:
- toks = self.search.get_tokens(tag.name, field=field)
- tag_phrase = PhraseQuery()
- for tok in toks:
- tag_phrase.add(Term(field, tok))
- q.add(BooleanClause(tag_phrase, BooleanClause.Occur.MUST))
-
- return QueryWrapperFilter(q)
-
- def book_filter(self):
- tags = reduce(lambda a, b: a + b, self.book_tags.values(), [])
- if tags:
- return self.tag_filter(tags)
- else:
- return None
-
- def part_filter(self):
- fs = []
- if self.part_tags:
- fs.append(self.tag_filter(self.part_tags, field='themes'))
-
- if self._books != []:
- bf = BooleanFilter()
- for b in self._books:
- id_filter = NumericRangeFilter.newIntRange('book_id', b.id, b.id, True, True)
- bf.add(FilterClause(id_filter, BooleanClause.Occur.SHOULD))
- fs.append(bf)
-
- return MultiSearch.chain_filters(fs)
-
- def should_search_for_book(self):
- return self._books == []
-
- def just_search_in(self, all):
- """Holds logic to figure out which indexes should be search, when we have some hinst already"""
- some = []
- for field in all:
- if field == 'author' and 'author' in self.book_tags:
- continue
- if field == 'title' and self._books != []:
- continue
- if (field == 'themes' or field == 'themes_pl') and self.part_tags:
- continue
- some.append(field)
- return some
-
-
-class MultiSearch(Search):
- """Class capable of IMDb-like searching"""
- def get_tokens(self, searched, field='content'):
- """returns tokens analyzed by a proper (for a field) analyzer
- argument can be: StringReader, string/unicode, or tokens. In the last case
- they will just be returned (so we can reuse tokens, if we don't change the analyzer)
- """
- if isinstance(searched, str) or isinstance(searched, unicode):
- searched = StringReader(searched)
- elif isinstance(searched, list):
- return searched
-
- searched.reset()
- tokens = self.analyzer.reusableTokenStream(field, searched)
- toks = []
- while tokens.incrementToken():
- cta = tokens.getAttribute(CharTermAttribute.class_)
- toks.append(cta.toString())
- return toks
-
- def fuzziness(self, fuzzy):
- if not fuzzy:
- return None
- if isinstance(fuzzy, float) and fuzzy > 0.0 and fuzzy <= 1.0:
- return fuzzy
- else:
- return 0.5
-
- def make_phrase(self, tokens, field='content', slop=2, fuzzy=False):
- if fuzzy:
- phrase = MultiPhraseQuery()
- for t in tokens:
- term = Term(field, t)
- fuzzterm = FuzzyTermEnum(self.searcher.getIndexReader(), term, self.fuzziness(fuzzy))
- fuzzterms = []
-
- while True:
- # print("fuzz %s" % unicode(fuzzterm.term()).encode('utf-8'))
- ft = fuzzterm.term()
- if ft:
- fuzzterms.append(ft)
- if not fuzzterm.next(): break
- if fuzzterms:
- phrase.add(JArray('object')(fuzzterms, Term))
- else:
- phrase.add(term)