+ tokens_pl = self.get_tokens(searched, field='text', cached=tokens_cache)
+ tokens = self.get_tokens(searched, field='SIMPLE', cached=tokens_cache)
+
+ # only search in themes when we do not already filter by themes
+ if hint is None or hint.just_search_in(['themes']) != []:
+ q.add(BooleanClause(self.make_term_query(tokens_pl, field='themes_pl',
+ fuzzy=fuzzy), BooleanClause.Occur.MUST))
+
+ q.add(BooleanClause(self.make_term_query(tokens_pl, field='text',
+ fuzzy=fuzzy), BooleanClause.Occur.SHOULD))
+
+ topDocs = self.searcher.search(q, only_in, max_results)
+ for found in topDocs.scoreDocs:
+ books.append(SearchResult(self, found, how_found='search_everywhere_themesXcontent', searched=searched))
+
+ # query themes/content x author/title/tags
+ q = BooleanQuery()
+ in_content = BooleanQuery()
+ in_meta = BooleanQuery()
+
+ for fld in ['themes_pl', 'text']:
+ in_content.add(BooleanClause(self.make_term_query(tokens_pl, field=fld, fuzzy=False), BooleanClause.Occur.SHOULD))
+
+ for fld in ['tags', 'authors', 'title']:
+ in_meta.add(BooleanClause(self.make_term_query(tokens, field=fld, fuzzy=False), BooleanClause.Occur.SHOULD))
+
+ q.add(BooleanClause(in_content, BooleanClause.Occur.MUST))
+ q.add(BooleanClause(in_meta, BooleanClause.Occur.SHOULD))
+
+ topDocs = self.searcher.search(q, only_in, max_results)
+ for found in topDocs.scoreDocs:
+ books.append(SearchResult(self, found, how_found='search_everywhere', searched=searched))
+
+ return books
+
+ # def multisearch(self, query, max_results=50):
+ # """
+ # Search strategy:
+ # - (phrase) OR -> content
+ # -> title
+ # -> authors
+ # - (keywords) -> authors
+ # -> motyw
+ # -> tags
+ # -> content
+ # """
+ # queryreader = StringReader(query)
+ # tokens = self.get_tokens(queryreader)
+
+ # top_level = BooleanQuery()
+ # Should = BooleanClause.Occur.SHOULD
+
+ # phrase_level = BooleanQuery()
+ # phrase_level.setBoost(1.3)
+
+ # p_content = self.make_phrase(tokens, joined=True)
+ # p_title = self.make_phrase(tokens, 'title')
+ # p_author = self.make_phrase(tokens, 'author')
+
+ # phrase_level.add(BooleanClause(p_content, Should))
+ # phrase_level.add(BooleanClause(p_title, Should))
+ # phrase_level.add(BooleanClause(p_author, Should))
+
+ # kw_level = BooleanQuery()
+
+ # kw_level.add(self.make_term_query(tokens, 'author'), Should)
+ # j_themes = self.make_term_query(tokens, 'themes', joined=True)
+ # kw_level.add(j_themes, Should)
+ # kw_level.add(self.make_term_query(tokens, 'tags'), Should)
+ # j_con = self.make_term_query(tokens, joined=True)
+ # kw_level.add(j_con, Should)
+
+ # top_level.add(BooleanClause(phrase_level, Should))
+ # top_level.add(BooleanClause(kw_level, Should))
+
+ # return None
+
+ def get_snippets(self, scoreDoc, query, field='text'):
+ """
+ Returns a snippet for found scoreDoc.
+ """
+ htmlFormatter = SimpleHTMLFormatter()
+ highlighter = Highlighter(htmlFormatter, QueryScorer(query))
+
+ stored = self.searcher.doc(scoreDoc.doc)
+
+ position = stored.get('snippets_position')
+ length = stored.get('snippets_length')
+ if position is None or length is None:
+ return None
+ revision = stored.get('snippets_revision')
+ if revision: revision = int(revision)
+
+ # locate content.
+ book_id = int(stored.get('book_id'))
+ snippets = Snippets(book_id, revision=revision)
+
+ try:
+ snippets.open()
+ except IOError, e:
+ log.error("Cannot open snippet file for book id = %d [rev=%d], %s" % (book_id, revision, e))
+ return []
+
+ try:
+ try:
+ text = snippets.get((int(position),
+ int(length)))
+ finally:
+ snippets.close()
+
+ tokenStream = TokenSources.getAnyTokenStream(self.searcher.getIndexReader(), scoreDoc.doc, field, self.analyzer)
+ # highlighter.getBestTextFragments(tokenStream, text, False, 10)
+ snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
+
+ except Exception, e:
+ e2 = e
+ if hasattr(e, 'getJavaException'):
+ e2 = unicode(e.getJavaException())
+ raise Exception("Problem fetching snippets for book %d, @%d len=%d" % (book_id, int(position), int(length)),
+ e2)
+ return snip
+
+ @staticmethod
+ def enum_to_array(enum):
+ """
+ Converts a lucene TermEnum to array of Terms, suitable for
+ addition to queries
+ """
+ terms = []
+
+ while True:
+ t = enum.term()
+ if t:
+ terms.append(t)
+ if not enum.next(): break
+
+ if terms:
+ return JArray('object')(terms, Term)
+
+ def search_tags(self, query, filt=None, max_results=40, pdcounter=False):
+ """
+ Search for Tag objects using query.
+ """
+ if not pdcounter:
+ filters = self.chain_filters([filt, self.term_filter(Term('is_pdcounter', 'true'), inverse=True)])
+ tops = self.searcher.search(query, filt, max_results)
+
+ tags = []
+ for found in tops.scoreDocs:
+ doc = self.searcher.doc(found.doc)
+ is_pdcounter = doc.get('is_pdcounter')
+ category = doc.get('tag_category')
+ try:
+ if is_pdcounter == 'true':
+ if category == 'pd_author':
+ tag = PDCounterAuthor.objects.get(id=doc.get('tag_id'))
+ elif category == 'pd_book':
+ tag = PDCounterBook.objects.get(id=doc.get('tag_id'))
+ tag.category = 'pd_book' # make it look more lik a tag.
+ else:
+ print "Warning. cannot get pdcounter tag_id=%d from db; cat=%s" % (int(doc.get('tag_id')), category)
+ else:
+ tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id"))
+ # don't add the pdcounter tag if same tag already exists
+
+ tags.append(tag)
+
+ except catalogue.models.Tag.DoesNotExist: pass
+ except PDCounterAuthor.DoesNotExist: pass
+ except PDCounterBook.DoesNotExist: pass
+
+ log.debug('search_tags: %s' % tags)
+
+ return tags
+
+ def search_books(self, query, filt=None, max_results=10):
+ """
+ Searches for Book objects using query
+ """