+ # return books
+
+ # def search_perfect_parts(self, searched, max_results=20, fuzzy=False, hint=None):
+ # """
+ # Search for book parts which contains a phrase perfectly matching (with a slop of 2, default for make_phrase())
+ # some part/fragment of the book.
+ # """
+ # qrys = [self.make_phrase(self.get_tokens(searched), field=fld, fuzzy=fuzzy) for fld in ['text']]
+
+ # flt = None
+ # if hint:
+ # flt = hint.part_filter()
+
+ # books = []
+ # for q in qrys:
+ # top = self.searcher.search(q,
+ # self.chain_filters([self.term_filter(Term('is_book', 'true'), inverse=True),
+ # flt]),
+ # max_results)
+ # for found in top.scoreDocs:
+ # books.append(SearchResult(self, found, snippets=self.get_snippets(found, q), how_found='search_perfect_parts'))
+
+ # return books
+
+ def search_everywhere(self, searched, query_terms=None):
+ """
+ Tries to use search terms to match different fields of book (or its parts).
+ E.g. one word can be an author survey, another be a part of the title, and the rest
+ are some words from third chapter.
+ """
+ books = []
+ # content only query : themes x content
+ q = self.make_term_query(searched, 'text')
+ q_themes = self.make_term_query(searched, 'themes_pl')
+
+ query = self.index.query(q).query(q_themes).field_limit(score=True, all_fields=True)
+ res = query.execute()
+
+ for found in res:
+ books.append(SearchResult(found, how_found='search_everywhere_themesXcontent', query_terms=query_terms))
+
+ # query themes/content x author/title/tags
+ in_content = self.index.Q()
+ in_meta = self.index.Q()
+
+ for fld in ['themes_pl', 'text']:
+ in_content |= self.make_term_query(searched, field=fld)
+
+ for fld in ['tags', 'authors', 'title']:
+ in_meta |= self.make_term_query(searched, field=fld)
+
+ q = in_content & in_meta
+ res = self.index.query(q).field_limit(score=True, all_fields=True).execute()
+
+ for found in res:
+ books.append(SearchResult(found, how_found='search_everywhere', query_terms=query_terms))
+
+ return books
+
+ def get_snippets(self, searchresult, query, field='text', num=1):
+ """
+ Returns a snippet for found scoreDoc.
+ """
+ maxnum = len(searchresult)
+ if num is None or num < 0 or num > maxnum:
+ num = maxnum
+ book_id = searchresult.book_id
+ revision = searchresult.snippet_revision()
+ snippets = Snippets(book_id, revision=revision)
+ snips = [None] * maxnum
+ try:
+ snippets.open()
+ idx = 0
+ while idx < maxnum and num > 0:
+ position, length = searchresult.snippet_pos(idx)
+ if position is None or length is None:
+ continue
+ text = snippets.get((int(position),
+ int(length)))
+ snip = self.index.highlight(text=text, field=field, q=query)
+ snips[idx] = snip
+ if snip:
+ num -= 1
+ idx += 1
+
+ except IOError, e:
+ log.error("Cannot open snippet file for book id = %d [rev=%d], %s" % (book_id, revision, e))
+ return []
+ finally:
+ snippets.close()
+
+ # remove verse end markers..
+ snips = map(lambda s: s and s.replace("/\n", "\n"), snips)
+
+ searchresult.snippets = snips
+
+ return snips
+
+ def hint_tags(self, query, pdcounter=True, prefix=True):
+ """
+ Return auto-complete hints for tags
+ using prefix search.
+ """
+ q = self.index.Q()
+ query = query.strip()
+ for field in ['tag_name', 'tag_name_pl']:
+ if prefix:
+ q |= self.index.Q(**{field: query + "*"})
+ else:
+ q |= self.make_term_query(query, field=field)
+ qu = self.index.query(q).exclude(tag_category="book")
+
+ return self.search_tags(qu, pdcounter=pdcounter)
+
+ def search_tags(self, query, filters=None, pdcounter=False):
+ """
+ Search for Tag objects using query.
+ """
+ if not filters: filters = []
+ if not pdcounter:
+ filters.append(~self.index.Q(is_pdcounter=True))
+ res = self.apply_filters(query, filters).execute()
+
+ tags = []
+ for doc in res:
+ is_pdcounter = doc.get('is_pdcounter', False)
+ category = doc.get('tag_category')
+ try:
+ if is_pdcounter == True:
+ if category == 'pd_author':
+ tag = PDCounterAuthor.objects.get(id=doc.get('tag_id'))
+ elif category == 'pd_book':
+ tag = PDCounterBook.objects.get(id=doc.get('tag_id'))
+ tag.category = 'pd_book' # make it look more lik a tag.
+ else:
+ print "Warning. cannot get pdcounter tag_id=%d from db; cat=%s" % (int(doc.get('tag_id')), category)
+ else:
+ tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id"))
+ # don't add the pdcounter tag if same tag already exists
+
+ tags.append(tag)
+
+ except catalogue.models.Tag.DoesNotExist: pass
+ except PDCounterAuthor.DoesNotExist: pass
+ except PDCounterBook.DoesNotExist: pass
+
+ log.debug('search_tags: %s' % tags)
+
+ return tags
+
+ def hint_books(self, query, prefix=True):
+ """
+ Returns auto-complete hints for book titles
+ Because we do not index 'pseudo' title-tags.
+ Prefix search.
+ """
+ q = self.index.Q()
+ query = query.strip()
+ if prefix:
+ q |= self.index.Q(title=query + "*")
+ else:
+ q |= self.make_term_query(query, field='title')
+ qu = self.index.query(q)
+ only_books = self.index.Q(is_book=True)
+ return self.search_books(qu, [only_books])
+
+ def search_books(self, query, filters=None, max_results=10):
+ """
+ Searches for Book objects using query
+ """