From: Jan Szejko Date: Wed, 25 Oct 2017 11:07:44 +0000 (+0200) Subject: minor fixes in search X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/a8ecd9c5f7bba62b02bf9c9a75a6070eda115184 minor fixes in search --- diff --git a/doc/schema.xml b/doc/schema.xml index 33886be20..17868e14a 100644 --- a/doc/schema.xml +++ b/doc/schema.xml @@ -135,10 +135,12 @@ - + + + diff --git a/src/catalogue/templates/catalogue/book_searched.html b/src/catalogue/templates/catalogue/book_searched.html index 26b9f6130..23d473689 100644 --- a/src/catalogue/templates/catalogue/book_searched.html +++ b/src/catalogue/templates/catalogue/book_searched.html @@ -21,7 +21,7 @@ {% if hit.snippet %} {{hit.snippet|safe}} {% else %} - {{hit.fragment.text|truncatewords_html:15|safe}} + {{hit.fragment.text|truncatewords_html:15|safe}} {# bad, now impossible #} {% endif %} diff --git a/src/search/custom.py b/src/search/custom.py index dfface953..da21e019e 100644 --- a/src/search/custom.py +++ b/src/search/custom.py @@ -184,5 +184,6 @@ class CustomSolrInterface(sunburnt.SolrInterface): off = -start snip = snip[:e + off] + mark[1] + snip[e + off:] snip = snip[:s + off] + mark[0] + snip[s + off:] + snip = re.sub('%s[ \t\n]+%s' % (mark[1], mark[0]), " ", snip) return snip diff --git a/src/search/index.py b/src/search/index.py index 1cac82ef6..f9fb4b286 100644 --- a/src/search/index.py +++ b/src/search/index.py @@ -272,7 +272,7 @@ class Index(SolrIndex): ] ignore_content_tags = [ - 'uwaga', 'extra', + 'uwaga', 'extra', 'nota_red', 'zastepnik_tekstu', 'sekcja_asterysk', 'separator_linia', 'zastepnik_wersu', 'didaskalia', 'naglowek_aktu', 'naglowek_sceny', 'naglowek_czesc', @@ -365,8 +365,8 @@ class Index(SolrIndex): if master is None: return [] - def walker(node, ignore_tags=()): - if node.tag not in ignore_tags: + def walker(node): + if node.tag not in self.ignore_content_tags: yield node, None, None if node.text is not None: yield None, node.text, None @@ -421,12 +421,6 @@ class Index(SolrIndex): book.id, doc['header_index'], doc['header_span'], doc.get('fragment_anchor', '')) return doc - def give_me_utf8(s): - if isinstance(s, unicode): - return s.encode('utf-8') - else: - return s - fragments = {} snippets = Snippets(book.id).open('w') try: @@ -447,7 +441,7 @@ class Index(SolrIndex): content.append(text) handle_text = [all_content] - for start, text, end in walker(header, ignore_tags=self.ignore_content_tags): + for start, text, end in walker(header): # handle footnotes if start is not None and start.tag in self.footnote_tags: footnote = [] @@ -819,9 +813,10 @@ class Search(SolrIndex): text = snippets.get((int(position), int(length))) snip = self.index.highlight(text=text, field=field, q=query) - snips[idx] = snip - if snip: - num -= 1 + if snip not in snips: + snips[idx] = snip + if snip: + num -= 1 idx += 1 except IOError, e: diff --git a/src/search/templatetags/search_tags.py b/src/search/templatetags/search_tags.py index c135b8096..517f18f07 100644 --- a/src/search/templatetags/search_tags.py +++ b/src/search/templatetags/search_tags.py @@ -32,7 +32,6 @@ def book_searched(context, result): # We don't need hits which lead to sections but do not have # snippets. hits = filter(lambda (idx, h): - 'fragment' in h or result.snippets[idx] is not None, enumerate(result.hits)) # print "[tmpl: from %d hits selected %d]" % (len(result.hits), len(hits)) @@ -45,8 +44,8 @@ def book_searched(context, result): continue snip = result.snippets[idx] # fix some formattting - snip = re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", - re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", snip)[0])[0] + snip = re.sub(r"[ \t\n]*\n[ \t\n]*", u"\n", snip) + snip = re.sub(r"(^[ \t\n]+|[ \t\n]+$)", u"", snip) snip = snip.replace("\n", "
").replace('---', '—') hit['snippet'] = snip @@ -54,5 +53,5 @@ def book_searched(context, result): return { 'request': context['request'], 'book': book, - 'hits': hits and zip(*hits)[1] or [] + 'hits': zip(*hits)[1] if hits else [] } diff --git a/src/search/views.py b/src/search/views.py index d105be6e9..14da6f60d 100644 --- a/src/search/views.py +++ b/src/search/views.py @@ -222,7 +222,6 @@ def main(request): text_phrase = filter(ensure_exists, text_phrase) everywhere = filter(ensure_exists, everywhere) - results = author_results + translator_results + title_results + text_phrase + everywhere # ensure books do exists & sort them for res in (author_results, translator_results, title_results, text_phrase, everywhere): res.sort(reverse=True) @@ -235,7 +234,7 @@ def main(request): # #frag = Fragment.objects.get(anchor=anchor) # return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url()) # return HttpResponseRedirect(results[0].book.get_absolute_url()) - if len(results) == 0: + if not (author_results or translator_results or title_results or text_phrase or everywhere): form = PublishingSuggestForm(initial={"books": query + ", "}) return render_to_response( 'catalogue/search_no_hits.html',