X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/ae60b2a3949e96357477cc04f90fd0873cee8a92..fd753649825bfdefb749506d81ae776e55a5c63c:/src/search/index.py diff --git a/src/search/index.py b/src/search/index.py index ffad25788..f9fb4b286 100644 --- a/src/search/index.py +++ b/src/search/index.py @@ -139,7 +139,6 @@ class Index(SolrIndex): else: return False - # WTF def index_tags(self, *tags, **kw): """ Re-index global tag list. @@ -273,7 +272,7 @@ class Index(SolrIndex): ] ignore_content_tags = [ - 'uwaga', 'extra', + 'uwaga', 'extra', 'nota_red', 'zastepnik_tekstu', 'sekcja_asterysk', 'separator_linia', 'zastepnik_wersu', 'didaskalia', 'naglowek_aktu', 'naglowek_sceny', 'naglowek_czesc', @@ -366,8 +365,8 @@ class Index(SolrIndex): if master is None: return [] - def walker(node, ignore_tags=()): - if node.tag not in ignore_tags: + def walker(node): + if node.tag not in self.ignore_content_tags: yield node, None, None if node.text is not None: yield None, node.text, None @@ -418,17 +417,10 @@ class Index(SolrIndex): if 'themes' in fields: doc['themes'] = fields['themes'] - doc['uid'] = "part%s%s%s" % (doc['header_index'], - doc['header_span'], - doc.get('fragment_anchor', '')) + doc['uid'] = "part%s-%s-%s-%s" % ( + book.id, doc['header_index'], doc['header_span'], doc.get('fragment_anchor', '')) return doc - def give_me_utf8(s): - if isinstance(s, unicode): - return s.encode('utf-8') - else: - return s - fragments = {} snippets = Snippets(book.id).open('w') try: @@ -449,7 +441,7 @@ class Index(SolrIndex): content.append(text) handle_text = [all_content] - for start, text, end in walker(header, ignore_tags=self.ignore_content_tags): + for start, text, end in walker(header): # handle footnotes if start is not None and start.tag in self.footnote_tags: footnote = [] @@ -516,8 +508,7 @@ class Index(SolrIndex): class SearchResult(object): - def __init__(self, doc, how_found=None, query=None, query_terms=None): - # self.search = search + def __init__(self, doc, how_found=None, query_terms=None): self.boost = 1.0 self._hits = [] self._processed_hits = None # processed hits @@ -745,7 +736,7 @@ class Search(SolrIndex): q = self.index.query(**{field: searched}) q = self.apply_filters(q, filters).field_limit(score=True, all_fields=True) - res = q.execute() + res = q.paginate(rows=100).execute() return [SearchResult(found, how_found=u'search_phrase') for found in res] def search_some(self, searched, fields, book=True, @@ -822,13 +813,18 @@ class Search(SolrIndex): text = snippets.get((int(position), int(length))) snip = self.index.highlight(text=text, field=field, q=query) - snips[idx] = snip - if snip: - num -= 1 + if snip not in snips: + snips[idx] = snip + if snip: + num -= 1 idx += 1 except IOError, e: - log.error("Cannot open snippet file for book id = %d [rev=%s], %s" % (book_id, revision, e)) + book = catalogue.models.Book.objects.filter(id=book_id) + if not book: + log.error("Book does not exist for book id = %d" % book_id) + elif not book.get().children.exists(): + log.error("Cannot open snippet file for book id = %d [rev=%s], %s" % (book_id, revision, e)) return [] finally: snippets.close() @@ -912,8 +908,10 @@ class Search(SolrIndex): query = query.strip() if prefix: q |= self.index.Q(title=query + "*") + q |= self.index.Q(title_orig=query + "*") else: q |= self.make_term_query(query, field='title') + q |= self.make_term_query(query, field='title_orig') qu = self.index.query(q) only_books = self.index.Q(is_book=True) return self.search_books(qu, [only_books])