+ return bks
+
+ def get_snippets(self, scoreDoc, query, field='content'):
+ htmlFormatter = SimpleHTMLFormatter()
+ highlighter = Highlighter(htmlFormatter, QueryScorer(query))
+
+ stored = self.searcher.doc(scoreDoc.doc)
+
+ # locate content.
+ snippets = Snippets(stored.get('book_id')).open()
+ try:
+ text = snippets.get((int(stored.get('snippets_position')),
+ int(stored.get('snippets_length'))))
+ finally:
+ snippets.close()
+
+ tokenStream = TokenSources.getAnyTokenStream(self.searcher.getIndexReader(), scoreDoc.doc, field, self.analyzer)
+ # highlighter.getBestTextFragments(tokenStream, text, False, 10)
+ # import pdb; pdb.set_trace()
+ snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
+
+ return [snip]
+
+ @staticmethod
+ def enum_to_array(enum):
+ """
+ Converts a lucene TermEnum to array of Terms, suitable for
+ addition to queries
+ """
+ terms = []
+
+ while True:
+ t = enum.term()
+ if t:
+ terms.append(t)
+ if not enum.next(): break
+
+ if terms:
+ return JArray('object')(terms, Term)
+
+ def search_tags(self, query, filter=None, max_results=40):
+ tops = self.searcher.search(query, filter, max_results)
+
+ tags = []
+ for found in tops.scoreDocs:
+ doc = self.searcher.doc(found.doc)
+ tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id"))
+ tags.append(tag)
+ print "%s (%d) -> %f" % (tag, tag.id, found.score)
+
+ return tags
+
+ def search_books(self, query, filter=None, max_results=10):
+ bks = []
+ tops = self.searcher.search(query, filter, max_results)
+ for found in tops.scoreDocs:
+ doc = self.searcher.doc(found.doc)
+ bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id")))
+ return bks
+
+ def create_prefix_phrase(self, toks, field):
+ q = MultiPhraseQuery()
+ for i in range(len(toks)):
+ t = Term(field, toks[i])
+ if i == len(toks) - 1:
+ pterms = MultiSearch.enum_to_array(PrefixTermEnum(self.searcher.getIndexReader(), t))
+ if pterms:
+ q.add(pterms)
+ else:
+ q.add(t)
+ else:
+ q.add(t)
+ return q
+
+ @staticmethod
+ def term_filter(term, inverse=False):
+ only_term = TermsFilter()
+ only_term.addTerm(term)
+
+ if inverse:
+ neg = BooleanFilter()
+ neg.add(FilterClause(only_term, BooleanClause.Occur.MUST_NOT))
+ only_term = neg
+
+ return only_term
+
+ def hint_tags(self, string, max_results=50):
+ toks = self.get_tokens(string, field='SIMPLE')
+ top = BooleanQuery()
+
+ for field in ['tag_name', 'tag_name_pl']:
+ q = self.create_prefix_phrase(toks, field)
+ top.add(BooleanClause(q, BooleanClause.Occur.SHOULD))
+
+ no_book_cat = self.term_filter(Term("tag_category", "book"), inverse=True)
+
+ return self.search_tags(top, no_book_cat, max_results=max_results)
+
+ def hint_books(self, string, max_results=50):
+ toks = self.get_tokens(string, field='SIMPLE')
+
+ q = self.create_prefix_phrase(toks, 'title')
+
+ return self.book_search(q, self.term_filter(Term("is_book", "true")), max_results=max_results)
+
+ @staticmethod
+ def chain_filters(filters, op=ChainedFilter.AND):
+ filters = filter(lambda x: x is not None, filters)
+ if not filters:
+ return None
+ chf = ChainedFilter(JArray('object')(filters, Filter), op)
+ return chf
+
+ def filtered_categories(self, tags):
+ cats = {}
+ for t in tags:
+ cats[t.category] = True
+ return cats.keys()
+
+ def hint(self):
+ return Hint(self)