From 462d84ea8718cd3ca1112fe11c2120a0f20f475f Mon Sep 17 00:00:00 2001 From: Marcin Koziej Date: Thu, 19 Jan 2012 16:45:27 +0100 Subject: [PATCH] search - new result layout. the css is not perfect though. --- apps/search/index.py | 91 ++++++++++++++++--- apps/search/views.py | 44 ++++++--- wolnelektury/static/css/book_box.css | 36 +++++++- .../templates/catalogue/book_searched.html | 43 ++++----- .../catalogue/search_multiple_hits.html | 76 +++++++++++++++- 5 files changed, 230 insertions(+), 60 deletions(-) diff --git a/apps/search/index.py b/apps/search/index.py index 307376de9..df44888df 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -553,11 +553,15 @@ class JoinSearch(object): class SearchResult(object): - def __init__(self, searcher, scoreDocs, score=None, how_found=None, snippets=None): + def __init__(self, searcher, scoreDocs, score=None, how_found=None, snippets=None, searched=None, tokens_cache=None): + if tokens_cache is None: tokens_cache = {} + if score: - self.score = score + self._score = score else: - self.score = scoreDocs.score + self._score = scoreDocs.score + + self.boost = 1.0 self._hits = [] self.hits = None # processed hits @@ -581,6 +585,14 @@ class SearchResult(object): self._hits.append(hit) + self.searcher = searcher + self.searched = searched + self.tokens_cache = tokens_cache + + @property + def score(self): + return self._score * self.boost + def merge(self, other): if self.book_id != other.book_id: raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id)) @@ -642,10 +654,25 @@ class SearchResult(object): for f in frags: frag = catalogue.models.Fragment.objects.get(anchor=f[FRAGMENT]) + + # Figure out if we were searching for a token matching some word in theme name. + themes = frag.tags.filter(category='theme') + themes_hit = [] + if self.searched is not None: + tokens = self.searcher.get_tokens(self.searched, 'POLISH', tokens_cache=self.tokens_cache) + for theme in themes: + name_tokens = self.searcher.get_tokens(theme.name, 'POLISH') + for t in tokens: + if name_tokens.index(t): + if not theme in themes_hit: + themes_hit.append(theme) + break + m = {'score': f[SCORE], 'fragment': frag, 'section_number': f[POSITION][POSITION_INDEX] + 1, - 'themes': frag.tags.filter(category='theme') + 'themes': themes, + 'themes_hit': themes_hit } m.update(f[OTHER]) hits.append(m) @@ -802,11 +829,14 @@ class Search(IndexStore): bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id"))) return (bks, tops.totalHits) - def get_tokens(self, searched, field='content'): + def get_tokens(self, searched, field='content', cached=None): """returns tokens analyzed by a proper (for a field) analyzer argument can be: StringReader, string/unicode, or tokens. In the last case they will just be returned (so we can reuse tokens, if we don't change the analyzer) """ + if cached is not None and field in cached: + return cached[field] + if isinstance(searched, str) or isinstance(searched, unicode): searched = StringReader(searched) elif isinstance(searched, list): @@ -818,6 +848,10 @@ class Search(IndexStore): while tokens.incrementToken(): cta = tokens.getAttribute(CharTermAttribute.class_) toks.append(cta.toString()) + + if cached is not None: + cached[field] = toks + return toks def fuzziness(self, fuzzy): @@ -874,9 +908,39 @@ class Search(IndexStore): q.add(BooleanClause(term, modal)) return q - # def content_query(self, query): - # return BlockJoinQuery(query, self.parent_filter, - # BlockJoinQuery.ScoreMode.Total) + def search_phrase(self, searched, field, book=True, max_results=20, fuzzy=False, + filters=None, tokens_cache=None, boost=None): + if filters is None: filters = [] + if tokens_cache is None: tokens_cache = {} + + tokens = self.get_tokens(searched, field, cached=tokens_cache) + + query = self.make_phrase(tokens, field=field, fuzzy=fuzzy) + if book: + filters.append(self.term_filter(Term('is_book', 'true'))) + top = self.searcher.search(query, self.chain_filters(filters), max_results) + + return [SearchResult(self.searcher, found) for found in top.scoreDocs] + + def search_some(self, searched, fields, book=True, max_results=20, fuzzy=False, + filters=None, tokens_cache=None, boost=None): + if filters is None: filters = [] + if tokens_cache is None: tokens_cache = {} + + if book: + filters.append(self.term_filter(Term('is_book', 'true'))) + + query = BooleanQuery() + + for fld in fields: + tokens = self.get_tokens(searched, fld, cached=tokens_cache) + + query.add(BooleanClause(self.make_term_query(tokens, field=fld, + fuzzy=fuzzy), BooleanClause.Occur.SHOULD)) + + top = self.searcher.search(query, self.chain_filters(filters), max_results) + + return [SearchResult(self.searcher, found, searched=searched, tokens_cache=tokens_cache) for found in top.scoreDocs] def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None): """ @@ -931,7 +995,7 @@ class Search(IndexStore): def search_perfect_parts(self, searched, max_results=20, fuzzy=False, hint=None): """ - Search for book parts which containt a phrase perfectly matching (with a slop of 2, default for make_phrase()) + Search for book parts which contains a phrase perfectly matching (with a slop of 2, default for make_phrase()) some part/fragment of the book. """ qrys = [self.make_phrase(self.get_tokens(searched), field=fld, fuzzy=fuzzy) for fld in ['content']] @@ -951,12 +1015,13 @@ class Search(IndexStore): return books - def search_everywhere(self, searched, max_results=20, fuzzy=False, hint=None): + def search_everywhere(self, searched, max_results=20, fuzzy=False, hint=None, tokens_cache=None): """ Tries to use search terms to match different fields of book (or its parts). E.g. one word can be an author survey, another be a part of the title, and the rest are some words from third chapter. """ + if tokens_cache is None: tokens_cache = {} books = [] only_in = None @@ -966,8 +1031,8 @@ class Search(IndexStore): # content only query : themes x content q = BooleanQuery() - tokens_pl = self.get_tokens(searched, field='content') - tokens = self.get_tokens(searched, field='SIMPLE') + tokens_pl = self.get_tokens(searched, field='content', cached=tokens_cache) + tokens = self.get_tokens(searched, field='SIMPLE', cached=tokens_cache) # only search in themes when we do not already filter by themes if hint is None or hint.just_search_in(['themes']) != []: @@ -1171,7 +1236,7 @@ class Search(IndexStore): Chains a filter list together """ filters = filter(lambda x: x is not None, filters) - if not filters: + if not filters or filters is []: return None chf = ChainedFilter(JArray('object')(filters, Filter), op) return chf diff --git a/apps/search/views.py b/apps/search/views.py index 710c6dafd..e9b25649c 100644 --- a/apps/search/views.py +++ b/apps/search/views.py @@ -119,28 +119,39 @@ def main(request): hint.books(book) toks = StringReader(query) + tokens_cache = {} fuzzy = 'fuzzy' in request.GET if fuzzy: fuzzy = 0.7 - results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint), - srch.search_book(toks, fuzzy=fuzzy, hint=hint), - srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint), - srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint)) - - for r in results: - r.process_hits() - - results.sort(reverse=True) + author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache) + title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache) + + # Boost main author/title results with mixed search, and save some of its results for end of list. + # boost author, title results + author_title_mixed = srch.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache) + author_title_rest = [] + for b in author_title_mixed: + bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results) + for b2 in bks: + b2.boost *= 1.1 + if bks is []: + author_title_rest.append(b) + + text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache)) + [r.process_hits() for r in text_phrase] + + everywhere = SearchResult.aggregate(srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache), author_title_rest) + [r.process_hits() for r in everywhere] - for r in results: - print "-----" - for h in r.hits: - print "- %s" % h + for res in [author_results, title_results, text_phrase, everywhere]: + res.sort(reverse=True) - # Did you mean? suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE")) + results = author_results + title_results + text_phrase + everywhere + results.sort(reverse=True) + if len(results) == 1: if len(results[0].hits) == 0: return HttpResponseRedirect(results[0].book.get_absolute_url()) @@ -159,6 +170,9 @@ def main(request): return render_to_response('catalogue/search_multiple_hits.html', {'tags': tag_list, 'prefix': query, - 'results': results, + 'results': { 'author': author_results, + 'title': title_results, + 'content': text_phrase, + 'other': everywhere}, 'did_you_mean': suggestion}, context_instance=RequestContext(request)) diff --git a/wolnelektury/static/css/book_box.css b/wolnelektury/static/css/book_box.css index ca58a634e..2ba381985 100755 --- a/wolnelektury/static/css/book_box.css +++ b/wolnelektury/static/css/book_box.css @@ -7,7 +7,6 @@ vertical-align: top; } - .book-box { width: 48.75em; } @@ -17,7 +16,7 @@ } .book-wide-box { - width: 98.5em; + width: 97.5em; /** This is a fullpage box, it must be aligned with the top menu. This corresponds to a .1em margin below **/ @@ -60,11 +59,34 @@ min-height: 24.4em; } +.search-result { + width: 97.5em; +} + +.search-result .book-box-body { + width: 31em; +} + +.book-list-header { + width: 97.5em; + padding: 0em; + margin-left: -0.1em; +} + +.book-list-header .book-box-inner { + min-height: 1em; +} + +.book-list-header p { + font-size: 2.4em; + margin: 0.5em; +} + /*.book-wide-box.search-result .book-box-inner, .book-wide-box.search-result blockquote { height: auto !important; }*/ -.book-mini-box img, .book-box img, .book-wide-box img { +.book-mini-box img, .book-box img, .book-wide-box img, .search-result img { width: 13.9em; height: 19.3em; } @@ -292,9 +314,17 @@ ul.inline-items li { .unlike .if-like { display: none; + +.snippets { + width: 44em; + float: right; } .snippets .snippet-text { font-size: 1.2em; margin: 1.083em 0em; } + +.snipptes .anchor { + display: none; +} diff --git a/wolnelektury/templates/catalogue/book_searched.html b/wolnelektury/templates/catalogue/book_searched.html index d719f30e2..4b92c19d2 100644 --- a/wolnelektury/templates/catalogue/book_searched.html +++ b/wolnelektury/templates/catalogue/book_searched.html @@ -1,34 +1,27 @@ -{% extends "catalogue/book_wide.html" %} +{% extends "catalogue/book_short.html" %} {% load i18n %} -{% block box-class %}book-wide-box search-result{% endblock %} +{% block box-class %}search-result{% endblock %} -{% block quote %} -{% if hits.0.snippets %} -
{{hits.0.snippets.0|safe}}
-{% else %}{% if hits.0.fragment %} -
{{hits.0.fragment.short_text|safe}}
-{% endif %}{% endif %} - -{% if hits.1 %} -

{% trans "See more" %}

-{% endif %} -{% endblock %} - - -{% block box-append %} -
- -{% for hit in hits %} +{% block right-column %} +
+ {% for hit in hits %} {% if hit.snippets %} - +

In text:

+ {% else %} - {% if hit.fragment %} - - {% endif %} + {% if hit.fragment %} +
+

{% trans "In fragment" %} + {% if hit.themes_hit %}{% trans ", for themes:" %}{% for t in hit.themes_hit %}{{t.name}} {% endfor %}{% endif %} +

+ {{hit.fragment.short_text|safe}} +
{% endif %} -{% endfor %} - + {% endif %} + {% endfor %}
+ {% endblock %} + diff --git a/wolnelektury/templates/catalogue/search_multiple_hits.html b/wolnelektury/templates/catalogue/search_multiple_hits.html index 5d222519f..0a7dc1cf1 100644 --- a/wolnelektury/templates/catalogue/search_multiple_hits.html +++ b/wolnelektury/templates/catalogue/search_multiple_hits.html @@ -12,11 +12,79 @@ {% endif %} -
- {% for result in results %} - {% book_searched result %} - {% endfor %} + + {% if results.author %} +
+
+

{% trans "Results by authors" %}

+
+
+
+
    + {% for author in results.author %} +
  1. + {{author.book.short_html}} +
  2. + {% endfor %} +
+
+ {% endif %} + + {% if results.title %} +
+
+

{% trans "Results by title" %}

+
+
+
+
    + {% for result in results.title %} +
  1. + {{result.book.short_html}} +
  2. + {% endfor %} +
+
+ {% endif %} + + {% if results.content %} + {% for result in results.content %} +
+
+

{% trans "Results in text" %}

+
+
+
+
    + {% for result in results.title %} +
  1. + {% book_searched result %} +
  2. + {% endfor %} +
+
+ {% endfor %} + {% endif %} + + {% if results.other %} + {% for result in results.other %} +
+
+

{% trans "Other results" %}

+
+
+
+
    + {% for result in results.other %} +
  1. + {% book_searched result %} +
  2. + {% endfor %} +
+ {% endfor %} + {% endif %} + -- 2.20.1