- def __init__(self, default_field="content"):
- IndexStore.__init__(self)
- self.analyzer = WLAnalyzer() # PolishAnalyzer(Version.LUCENE_34)
- # self.analyzer = WLAnalyzer()
- reader = IndexReader.open(self.store, True)
- self.searcher = IndexSearcher(reader)
- self.parser = QueryParser(Version.LUCENE_34, default_field,
- self.analyzer)
-
- self.parent_filter = TermsFilter()
- self.parent_filter.addTerm(Term("is_book", "true"))
- index_changed.connect(self.reopen)
-
- def close(self):
- reader = self.searcher.getIndexReader()
- self.searcher.close()
- reader.close()
- super(Search, self).close()
- index_changed.disconnect(self.reopen)
-
- def reopen(self, **unused):
- reader = self.searcher.getIndexReader()
- rdr = reader.reopen()
- if not rdr.equals(reader):
- log.debug('Reopening index')
- oldsearch = self.searcher
- self.searcher = IndexSearcher(rdr)
- oldsearch.close()
- reader.close()
-
- def query(self, query):
- """Parse query in default Lucene Syntax. (for humans)
- """
- return self.parser.parse(query)
-
- def simple_search(self, query, max_results=50):
- """Runs a query for books using lucene syntax. (for humans)
- Returns (books, total_hits)
- """
-
- tops = self.searcher.search(self.query(query), max_results)
- bks = []
- for found in tops.scoreDocs:
- doc = self.searcher.doc(found.doc)
- bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id")))
- return (bks, tops.totalHits)
-
- def get_tokens(self, searched, field='content', cached=None):
- """returns tokens analyzed by a proper (for a field) analyzer
- argument can be: StringReader, string/unicode, or tokens. In the last case
- they will just be returned (so we can reuse tokens, if we don't change the analyzer)
- """
- if cached is not None and field in cached:
- return cached[field]
-
- if isinstance(searched, str) or isinstance(searched, unicode):
- searched = StringReader(searched)
- elif isinstance(searched, list):
- return searched
-
- searched.reset()
- tokens = self.analyzer.reusableTokenStream(field, searched)
- toks = []
- while tokens.incrementToken():
- cta = tokens.getAttribute(CharTermAttribute.class_)
- toks.append(cta.toString())
-
- if cached is not None:
- cached[field] = toks
-
- return toks
-
- def fuzziness(self, fuzzy):
- """Helper method to sanitize fuzziness"""
- if not fuzzy:
- return None
- if isinstance(fuzzy, float) and fuzzy > 0.0 and fuzzy <= 1.0:
- return fuzzy
- else:
- return 0.5
-
- def make_phrase(self, tokens, field='content', slop=2, fuzzy=False):
- """
- Return a PhraseQuery with a series of tokens.
- """
- if fuzzy:
- phrase = MultiPhraseQuery()
- for t in tokens:
- term = Term(field, t)
- fuzzterm = FuzzyTermEnum(self.searcher.getIndexReader(), term, self.fuzziness(fuzzy))
- fuzzterms = []