search - new result layout. the css is not perfect though.
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Thu, 19 Jan 2012 15:45:27 +0000 (16:45 +0100)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Thu, 19 Jan 2012 15:46:59 +0000 (16:46 +0100)
apps/search/index.py
apps/search/views.py
wolnelektury/static/css/book_box.css
wolnelektury/templates/catalogue/book_searched.html
wolnelektury/templates/catalogue/search_multiple_hits.html

index 307376d..df44888 100644 (file)
@@ -553,11 +553,15 @@ class JoinSearch(object):
 
 
 class SearchResult(object):
-    def __init__(self, searcher, scoreDocs, score=None, how_found=None, snippets=None):
+    def __init__(self, searcher, scoreDocs, score=None, how_found=None, snippets=None, searched=None, tokens_cache=None):
+        if tokens_cache is None: tokens_cache = {}
+            
         if score:
-            self.score = score
+            self._score = score
         else:
-            self.score = scoreDocs.score
+            self._score = scoreDocs.score
+            
+        self.boost = 1.0
 
         self._hits = []
         self.hits = None  # processed hits
@@ -581,6 +585,14 @@ class SearchResult(object):
 
         self._hits.append(hit)
 
+        self.searcher = searcher
+        self.searched = searched
+        self.tokens_cache = tokens_cache
+
+    @property
+    def score(self):
+        return self._score * self.boost
+
     def merge(self, other):
         if self.book_id != other.book_id:
             raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id))
@@ -642,10 +654,25 @@ class SearchResult(object):
 
         for f in frags:
             frag = catalogue.models.Fragment.objects.get(anchor=f[FRAGMENT])
+
+            # Figure out if we were searching for a token matching some word in theme name.
+            themes = frag.tags.filter(category='theme')
+            themes_hit = []
+            if self.searched is not None:
+                tokens = self.searcher.get_tokens(self.searched, 'POLISH', tokens_cache=self.tokens_cache)
+                for theme in themes:
+                    name_tokens = self.searcher.get_tokens(theme.name, 'POLISH')
+                    for t in tokens:
+                        if name_tokens.index(t):
+                            if not theme in themes_hit:
+                                themes_hit.append(theme)
+                            break
+
             m = {'score': f[SCORE],
                  'fragment': frag,
                  'section_number': f[POSITION][POSITION_INDEX] + 1,
-                 'themes': frag.tags.filter(category='theme')
+                 'themes': themes,
+                 'themes_hit': themes_hit
                  }
             m.update(f[OTHER])
             hits.append(m)
@@ -802,11 +829,14 @@ class Search(IndexStore):
             bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id")))
         return (bks, tops.totalHits)
 
-    def get_tokens(self, searched, field='content'):
+    def get_tokens(self, searched, field='content', cached=None):
         """returns tokens analyzed by a proper (for a field) analyzer
         argument can be: StringReader, string/unicode, or tokens. In the last case
         they will just be returned (so we can reuse tokens, if we don't change the analyzer)
         """
+        if cached is not None and field in cached:
+            return cached[field]
+
         if isinstance(searched, str) or isinstance(searched, unicode):
             searched = StringReader(searched)
         elif isinstance(searched, list):
@@ -818,6 +848,10 @@ class Search(IndexStore):
         while tokens.incrementToken():
             cta = tokens.getAttribute(CharTermAttribute.class_)
             toks.append(cta.toString())
+
+        if cached is not None:
+            cached[field] = toks
+
         return toks
 
     def fuzziness(self, fuzzy):
@@ -874,9 +908,39 @@ class Search(IndexStore):
             q.add(BooleanClause(term, modal))
         return q
 
-    # def content_query(self, query):
-    #     return BlockJoinQuery(query, self.parent_filter,
-    #                           BlockJoinQuery.ScoreMode.Total)
+    def search_phrase(self, searched, field, book=True, max_results=20, fuzzy=False,
+                      filters=None, tokens_cache=None, boost=None):
+        if filters is None: filters = []
+        if tokens_cache is None: tokens_cache = {}
+
+        tokens = self.get_tokens(searched, field, cached=tokens_cache)
+
+        query = self.make_phrase(tokens, field=field, fuzzy=fuzzy)
+        if book:
+            filters.append(self.term_filter(Term('is_book', 'true')))
+        top = self.searcher.search(query, self.chain_filters(filters), max_results)
+
+        return [SearchResult(self.searcher, found) for found in top.scoreDocs]
+
+    def search_some(self, searched, fields, book=True, max_results=20, fuzzy=False,
+                    filters=None, tokens_cache=None, boost=None):
+        if filters is None: filters = []
+        if tokens_cache is None: tokens_cache = {}
+
+        if book:
+            filters.append(self.term_filter(Term('is_book', 'true')))
+
+        query = BooleanQuery()
+
+        for fld in fields:
+            tokens = self.get_tokens(searched, fld, cached=tokens_cache)
+
+            query.add(BooleanClause(self.make_term_query(tokens, field=fld,
+                                fuzzy=fuzzy), BooleanClause.Occur.SHOULD))
+
+        top = self.searcher.search(query, self.chain_filters(filters), max_results)
+
+        return [SearchResult(self.searcher, found, searched=searched, tokens_cache=tokens_cache) for found in top.scoreDocs]
 
     def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
         """
@@ -931,7 +995,7 @@ class Search(IndexStore):
 
     def search_perfect_parts(self, searched, max_results=20, fuzzy=False, hint=None):
         """
-        Search for book parts which containt a phrase perfectly matching (with a slop of 2, default for make_phrase())
+        Search for book parts which contains a phrase perfectly matching (with a slop of 2, default for make_phrase())
         some part/fragment of the book.
         """
         qrys = [self.make_phrase(self.get_tokens(searched), field=fld, fuzzy=fuzzy) for fld in ['content']]
@@ -951,12 +1015,13 @@ class Search(IndexStore):
 
         return books
 
-    def search_everywhere(self, searched, max_results=20, fuzzy=False, hint=None):
+    def search_everywhere(self, searched, max_results=20, fuzzy=False, hint=None, tokens_cache=None):
         """
         Tries to use search terms to match different fields of book (or its parts).
         E.g. one word can be an author survey, another be a part of the title, and the rest
         are some words from third chapter.
         """
+        if tokens_cache is None: tokens_cache = {}
         books = []
         only_in = None
 
@@ -966,8 +1031,8 @@ class Search(IndexStore):
         # content only query : themes x content
         q = BooleanQuery()
 
-        tokens_pl = self.get_tokens(searched, field='content')
-        tokens = self.get_tokens(searched, field='SIMPLE')
+        tokens_pl = self.get_tokens(searched, field='content', cached=tokens_cache)
+        tokens = self.get_tokens(searched, field='SIMPLE', cached=tokens_cache)
 
         # only search in themes when we do not already filter by themes
         if hint is None or hint.just_search_in(['themes']) != []:
@@ -1171,7 +1236,7 @@ class Search(IndexStore):
         Chains a filter list together
         """
         filters = filter(lambda x: x is not None, filters)
-        if not filters:
+        if not filters or filters is []:
             return None
         chf = ChainedFilter(JArray('object')(filters, Filter), op)
         return chf
index 710c6da..e9b2564 100644 (file)
@@ -119,28 +119,39 @@ def main(request):
             hint.books(book)
 
         toks = StringReader(query)
+        tokens_cache = {}
         fuzzy = 'fuzzy' in request.GET
         if fuzzy:
             fuzzy = 0.7
 
-        results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_book(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
-
-        for r in results:
-            r.process_hits()
-
-        results.sort(reverse=True)
+        author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
+        title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
+
+        # Boost main author/title results with mixed search, and save some of its results for end of list.
+        # boost author, title results
+        author_title_mixed = srch.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
+        author_title_rest = []
+        for b in author_title_mixed:
+            bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
+            for b2 in bks:
+                b2.boost *= 1.1
+            if bks is []:
+                author_title_rest.append(b)
+        
+        text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache))
+        [r.process_hits() for r in text_phrase]
+        
+        everywhere = SearchResult.aggregate(srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache), author_title_rest)
+        [r.process_hits() for r in everywhere]
 
-        for r in results:
-            print "-----"
-            for h in r.hits:
-                print "- %s" % h
+        for res in [author_results, title_results, text_phrase, everywhere]:
+            res.sort(reverse=True)
 
-                # Did you mean?
         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
 
+        results = author_results + title_results + text_phrase + everywhere
+        results.sort(reverse=True)
+        
         if len(results) == 1:
             if len(results[0].hits) == 0:
                 return HttpResponseRedirect(results[0].book.get_absolute_url())
@@ -159,6 +170,9 @@ def main(request):
         return render_to_response('catalogue/search_multiple_hits.html',
                                   {'tags': tag_list,
                                    'prefix': query,
-                                   'results': results,
+                                   'results': { 'author': author_results,
+                                                'title': title_results,
+                                                'content': text_phrase,
+                                                'other': everywhere},
                                    'did_you_mean': suggestion},
             context_instance=RequestContext(request))
index ca58a63..2ba3819 100755 (executable)
@@ -7,7 +7,6 @@
     vertical-align: top;
 }
 
-
 .book-box {
     width: 48.75em;
 }
@@ -17,7 +16,7 @@
 }
 
 .book-wide-box {
-    width: 98.5em;
+    width: 97.5em;
 
     /** This is a fullpage box, it must be aligned with the top menu. 
         This corresponds to a .1em margin below **/
    min-height: 24.4em;
 }
 
+.search-result {
+    width: 97.5em;
+}
+
+.search-result .book-box-body {
+    width: 31em;
+}
+
+.book-list-header {
+    width: 97.5em;
+    padding: 0em;
+    margin-left: -0.1em;
+}
+
+.book-list-header .book-box-inner {
+    min-height: 1em;
+}
+
+.book-list-header p {
+    font-size: 2.4em;
+    margin: 0.5em;
+}
+
 /*.book-wide-box.search-result .book-box-inner, .book-wide-box.search-result blockquote {
     height: auto !important;
 }*/
 
-.book-mini-box img, .book-box img, .book-wide-box img {
+.book-mini-box img, .book-box img, .book-wide-box img, .search-result img {
     width: 13.9em;
     height: 19.3em;
 }
@@ -292,9 +314,17 @@ ul.inline-items li {
 
 .unlike .if-like {
     display: none;
+
+.snippets {
+    width: 44em;
+    float: right;
 }
 
 .snippets .snippet-text {
     font-size: 1.2em;
     margin: 1.083em 0em;
 }
+
+.snipptes .anchor {
+    display: none;
+}
index d719f30..4b92c19 100644 (file)
@@ -1,34 +1,27 @@
-{% extends "catalogue/book_wide.html" %}
+{% extends "catalogue/book_short.html" %}
 {% load i18n %}
 
 
-{% block box-class %}book-wide-box search-result{% endblock %}
+{% block box-class %}search-result{% endblock %}
 
-{% block quote %}
-{% if hits.0.snippets %}
-  <div class="cite-text"><a href="{% url book_text book.slug %}#f{{hits.0.section_number}}">{{hits.0.snippets.0|safe}}</a></div>
-{% else %}{% if hits.0.fragment %}
-  <div class="cite-text"><a href="{{hits.0.fragment.get_absolute_url}}">{{hits.0.fragment.short_text|safe}}</a></div>
-{% endif %}{% endif %}
-
-{% if hits.1 %}
-  <p class="cite-more mono"><a class="see-more-snippets" href="#snippets-{{book.id}}">{% trans "See more" %}</a></p>
-{% endif %}
-{% endblock %}
-
-
-{% block box-append %}
-<div class="snippets ui-helper-hidden">
-<a name="snippets-{{book.id}}">
-{% for hit in hits %}
+{% block right-column %}
+<div class="snippets">
+  {% for hit in hits %}
   {% if hit.snippets %}
-    <div class="snippet-text"><a href="{% url book_text book.slug %}#f{{hit.section_number}}">{{hit.snippets.0|safe}}</a></div>
+  <p>In text:</p>
+  <div class="snippet-text"><a href="{% url book_text book.slug %}#f{{hit.section_number}}">{{hit.snippets.0|safe}}</a></div>
   {% else %}
-    {% if hit.fragment %}
-      <div class="snippet-text"><a href="{{hit.fragment.get_absolute_url}}">{{hit.fragment.short_text|safe}}</a></div>
-    {% endif %}
+  {% if hit.fragment %}
+  <div class="snippet-text">
+    <p>{% trans "In fragment" %}
+      {% if hit.themes_hit %}{% trans ", for themes:" %}{% for t in hit.themes_hit %}{{t.name}} {% endfor %}{% endif %}
+    </p>
+    <a href="{{hit.fragment.get_absolute_url}}">{{hit.fragment.short_text|safe}}</a>
+  </div>
   {% endif %}
-{% endfor %}
-</a>
+  {% endif %}
+  {% endfor %}
 </div>
+
 {% endblock %}
+
index 5d22251..0a7dc1c 100644 (file)
     {% endif %}
     <!-- tu pójdą trafienia w tagi: Autorzy - z description oraz motywy i rodzaje (z book_count) -->
 
-    <div id="results">
-      {% for result in results %}
-        {% book_searched result %}
-      {% endfor %}
+
+    {% if results.author %}
+    <div class="book-list-header">
+      <div class="book-box-inner">
+       <p>{% trans "Results by authors" %}</p>
+      </div>
+    </div>
+    <div>
+      <ol class="work-list">
+       {% for author in results.author %}
+       <li class="work-item">
+         {{author.book.short_html}}
+       </li>
+       {% endfor %}
+      </ol>
+    </div>
+    {% endif %}
+
+    {% if results.title %}
+    <div class="book-list-header">
+      <div class="book-box-inner">
+      <p>{% trans "Results by title" %}</p>
+      </div>
+    </div>
+    <div>
+      <ol class="work-list">
+       {% for result in results.title %}
+       <li class="work-item">
+         {{result.book.short_html}}
+       </li>
+       {% endfor %}
+      </ol>
+    </div>
+    {% endif %}
+
+    {% if results.content %}
+    {% for result in results.content %}
+    <div class="book-list-header">
+      <div class="book-box-inner">
+      <p>{% trans "Results in text" %}</p>
+      </div>
+    </div>
+    <div>
+      <ol class="work-list">
+       {% for result in results.title %}
+       <li class="work-item">
+          {% book_searched result %}
+       </li>
+       {% endfor %}
+      </ol>
+    </div>
+    {% endfor %}
+    {% endif %}
+
+    {% if results.other %}
+    {% for result in results.other %}
+    <div class="book-list-header">
+      <div class="book-box-inner">
+       <p>{% trans "Other results" %}</p>
+      </div>
+    </div>
+    <div>
+      <ol class="work-list">
+       {% for result in results.other %}
+       <li class="work-item">
+          {% book_searched result %}
+       </li>
+       {% endfor %}
+      </ol>
     </div>
+    {% endfor %}
+    {% endif %}
+