+class Hint(object):
+ """
+ Given some hint information (information we already know about)
+ our search target - like author, title (specific book), epoch, genre, kind
+ we can narrow down search using filters.
+ """
+ def __init__(self, search):
+ """
+ Accepts a Searcher instance.
+ """
+ self.search = search
+ self.book_tags = {}
+ self.part_tags = []
+ self._books = []
+
+ def books(self, *books):
+ """
+ Give a hint that we search these books.
+ """
+ self._books = books
+
+ def tags(self, tags):
+ """
+ Give a hint that these Tag objects (a list of)
+ is necessary.
+ """
+ for t in tags:
+ if t.category in ['author', 'title', 'epoch', 'genre', 'kind']:
+ lst = self.book_tags.get(t.category, [])
+ lst.append(t)
+ self.book_tags[t.category] = lst
+ if t.category in ['theme', 'theme_pl']:
+ self.part_tags.append(t)
+
+ def tag_filter(self, tags, field='tags'):
+ """
+ Given a lsit of tags and an optional field (but they are normally in tags field)
+ returns a filter accepting only books with specific tags.
+ """
+ q = BooleanQuery()
+
+ for tag in tags:
+ toks = self.search.get_tokens(tag.name, field=field)
+ tag_phrase = PhraseQuery()
+ for tok in toks:
+ tag_phrase.add(Term(field, tok))
+ q.add(BooleanClause(tag_phrase, BooleanClause.Occur.MUST))
+
+ return QueryWrapperFilter(q)
+
+ def book_filter(self):
+ """
+ Filters using book tags (all tag kinds except a theme)
+ """
+ tags = reduce(lambda a, b: a + b, self.book_tags.values(), [])
+ if tags:
+ return self.tag_filter(tags)
+ else:
+ return None
+
+ def part_filter(self):
+ """
+ This filter can be used to look for book parts.
+ It filters on book id and/or themes.
+ """
+ fs = []
+ if self.part_tags:
+ fs.append(self.tag_filter(self.part_tags, field='themes'))
+
+ if self._books != []:
+ bf = BooleanFilter()
+ for b in self._books:
+ id_filter = NumericRangeFilter.newIntRange('book_id', b.id, b.id, True, True)
+ bf.add(FilterClause(id_filter, BooleanClause.Occur.SHOULD))
+ fs.append(bf)
+
+ return Search.chain_filters(fs)
+
+ def should_search_for_book(self):
+ return self._books == []
+
+ def just_search_in(self, all):
+ """Holds logic to figure out which indexes should be search, when we have some hinst already"""
+ some = []
+ for field in all:
+ if field == 'authors' and 'author' in self.book_tags:
+ continue
+ if field == 'title' and self._books != []:
+ continue
+ if (field == 'themes' or field == 'themes_pl') and self.part_tags:
+ continue
+ some.append(field)
+ return some
+
+
+class Search(IndexStore):
+ """
+ Search facilities.
+ """
+ def __init__(self, default_field="content"):
+ IndexStore.__init__(self)
+ self.analyzer = WLAnalyzer() # PolishAnalyzer(Version.LUCENE_34)
+ # self.analyzer = WLAnalyzer()
+ self.searcher = IndexSearcher(self.store, True)
+ self.parser = QueryParser(Version.LUCENE_34, default_field,
+ self.analyzer)
+
+ self.parent_filter = TermsFilter()
+ self.parent_filter.addTerm(Term("is_book", "true"))
+
+ def query(self, query):
+ """Parse query in default Lucene Syntax. (for humans)
+ """
+ return self.parser.parse(query)
+
+ def simple_search(self, query, max_results=50):
+ """Runs a query for books using lucene syntax. (for humans)
+ Returns (books, total_hits)
+ """
+
+ tops = self.searcher.search(self.query(query), max_results)
+ bks = []
+ for found in tops.scoreDocs:
+ doc = self.searcher.doc(found.doc)
+ bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id")))
+ return (bks, tops.totalHits)
+
+ def get_tokens(self, searched, field='content', cached=None):
+ """returns tokens analyzed by a proper (for a field) analyzer
+ argument can be: StringReader, string/unicode, or tokens. In the last case
+ they will just be returned (so we can reuse tokens, if we don't change the analyzer)
+ """
+ if cached is not None and field in cached:
+ return cached[field]
+
+ if isinstance(searched, str) or isinstance(searched, unicode):
+ searched = StringReader(searched)
+ elif isinstance(searched, list):
+ return searched
+
+ searched.reset()
+ tokens = self.analyzer.reusableTokenStream(field, searched)