search - new result layout. the css is not perfect though.
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Thu, 19 Jan 2012 15:45:27 +0000 (16:45 +0100)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Thu, 19 Jan 2012 15:46:59 +0000 (16:46 +0100)
apps/search/index.py
apps/search/views.py
wolnelektury/static/css/book_box.css
wolnelektury/templates/catalogue/book_searched.html
wolnelektury/templates/catalogue/search_multiple_hits.html

index 307376d..df44888 100644 (file)
@@ -553,11 +553,15 @@ class JoinSearch(object):
 
 
 class SearchResult(object):
 
 
 class SearchResult(object):
-    def __init__(self, searcher, scoreDocs, score=None, how_found=None, snippets=None):
+    def __init__(self, searcher, scoreDocs, score=None, how_found=None, snippets=None, searched=None, tokens_cache=None):
+        if tokens_cache is None: tokens_cache = {}
+            
         if score:
         if score:
-            self.score = score
+            self._score = score
         else:
         else:
-            self.score = scoreDocs.score
+            self._score = scoreDocs.score
+            
+        self.boost = 1.0
 
         self._hits = []
         self.hits = None  # processed hits
 
         self._hits = []
         self.hits = None  # processed hits
@@ -581,6 +585,14 @@ class SearchResult(object):
 
         self._hits.append(hit)
 
 
         self._hits.append(hit)
 
+        self.searcher = searcher
+        self.searched = searched
+        self.tokens_cache = tokens_cache
+
+    @property
+    def score(self):
+        return self._score * self.boost
+
     def merge(self, other):
         if self.book_id != other.book_id:
             raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id))
     def merge(self, other):
         if self.book_id != other.book_id:
             raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id))
@@ -642,10 +654,25 @@ class SearchResult(object):
 
         for f in frags:
             frag = catalogue.models.Fragment.objects.get(anchor=f[FRAGMENT])
 
         for f in frags:
             frag = catalogue.models.Fragment.objects.get(anchor=f[FRAGMENT])
+
+            # Figure out if we were searching for a token matching some word in theme name.
+            themes = frag.tags.filter(category='theme')
+            themes_hit = []
+            if self.searched is not None:
+                tokens = self.searcher.get_tokens(self.searched, 'POLISH', tokens_cache=self.tokens_cache)
+                for theme in themes:
+                    name_tokens = self.searcher.get_tokens(theme.name, 'POLISH')
+                    for t in tokens:
+                        if name_tokens.index(t):
+                            if not theme in themes_hit:
+                                themes_hit.append(theme)
+                            break
+
             m = {'score': f[SCORE],
                  'fragment': frag,
                  'section_number': f[POSITION][POSITION_INDEX] + 1,
             m = {'score': f[SCORE],
                  'fragment': frag,
                  'section_number': f[POSITION][POSITION_INDEX] + 1,
-                 'themes': frag.tags.filter(category='theme')
+                 'themes': themes,
+                 'themes_hit': themes_hit
                  }
             m.update(f[OTHER])
             hits.append(m)
                  }
             m.update(f[OTHER])
             hits.append(m)
@@ -802,11 +829,14 @@ class Search(IndexStore):
             bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id")))
         return (bks, tops.totalHits)
 
             bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id")))
         return (bks, tops.totalHits)
 
-    def get_tokens(self, searched, field='content'):
+    def get_tokens(self, searched, field='content', cached=None):
         """returns tokens analyzed by a proper (for a field) analyzer
         argument can be: StringReader, string/unicode, or tokens. In the last case
         they will just be returned (so we can reuse tokens, if we don't change the analyzer)
         """
         """returns tokens analyzed by a proper (for a field) analyzer
         argument can be: StringReader, string/unicode, or tokens. In the last case
         they will just be returned (so we can reuse tokens, if we don't change the analyzer)
         """
+        if cached is not None and field in cached:
+            return cached[field]
+
         if isinstance(searched, str) or isinstance(searched, unicode):
             searched = StringReader(searched)
         elif isinstance(searched, list):
         if isinstance(searched, str) or isinstance(searched, unicode):
             searched = StringReader(searched)
         elif isinstance(searched, list):
@@ -818,6 +848,10 @@ class Search(IndexStore):
         while tokens.incrementToken():
             cta = tokens.getAttribute(CharTermAttribute.class_)
             toks.append(cta.toString())
         while tokens.incrementToken():
             cta = tokens.getAttribute(CharTermAttribute.class_)
             toks.append(cta.toString())
+
+        if cached is not None:
+            cached[field] = toks
+
         return toks
 
     def fuzziness(self, fuzzy):
         return toks
 
     def fuzziness(self, fuzzy):
@@ -874,9 +908,39 @@ class Search(IndexStore):
             q.add(BooleanClause(term, modal))
         return q
 
             q.add(BooleanClause(term, modal))
         return q
 
-    # def content_query(self, query):
-    #     return BlockJoinQuery(query, self.parent_filter,
-    #                           BlockJoinQuery.ScoreMode.Total)
+    def search_phrase(self, searched, field, book=True, max_results=20, fuzzy=False,
+                      filters=None, tokens_cache=None, boost=None):
+        if filters is None: filters = []
+        if tokens_cache is None: tokens_cache = {}
+
+        tokens = self.get_tokens(searched, field, cached=tokens_cache)
+
+        query = self.make_phrase(tokens, field=field, fuzzy=fuzzy)
+        if book:
+            filters.append(self.term_filter(Term('is_book', 'true')))
+        top = self.searcher.search(query, self.chain_filters(filters), max_results)
+
+        return [SearchResult(self.searcher, found) for found in top.scoreDocs]
+
+    def search_some(self, searched, fields, book=True, max_results=20, fuzzy=False,
+                    filters=None, tokens_cache=None, boost=None):
+        if filters is None: filters = []
+        if tokens_cache is None: tokens_cache = {}
+
+        if book:
+            filters.append(self.term_filter(Term('is_book', 'true')))
+
+        query = BooleanQuery()
+
+        for fld in fields:
+            tokens = self.get_tokens(searched, fld, cached=tokens_cache)
+
+            query.add(BooleanClause(self.make_term_query(tokens, field=fld,
+                                fuzzy=fuzzy), BooleanClause.Occur.SHOULD))
+
+        top = self.searcher.search(query, self.chain_filters(filters), max_results)
+
+        return [SearchResult(self.searcher, found, searched=searched, tokens_cache=tokens_cache) for found in top.scoreDocs]
 
     def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
         """
 
     def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
         """
@@ -931,7 +995,7 @@ class Search(IndexStore):
 
     def search_perfect_parts(self, searched, max_results=20, fuzzy=False, hint=None):
         """
 
     def search_perfect_parts(self, searched, max_results=20, fuzzy=False, hint=None):
         """
-        Search for book parts which containt a phrase perfectly matching (with a slop of 2, default for make_phrase())
+        Search for book parts which contains a phrase perfectly matching (with a slop of 2, default for make_phrase())
         some part/fragment of the book.
         """
         qrys = [self.make_phrase(self.get_tokens(searched), field=fld, fuzzy=fuzzy) for fld in ['content']]
         some part/fragment of the book.
         """
         qrys = [self.make_phrase(self.get_tokens(searched), field=fld, fuzzy=fuzzy) for fld in ['content']]
@@ -951,12 +1015,13 @@ class Search(IndexStore):
 
         return books
 
 
         return books
 
-    def search_everywhere(self, searched, max_results=20, fuzzy=False, hint=None):
+    def search_everywhere(self, searched, max_results=20, fuzzy=False, hint=None, tokens_cache=None):
         """
         Tries to use search terms to match different fields of book (or its parts).
         E.g. one word can be an author survey, another be a part of the title, and the rest
         are some words from third chapter.
         """
         """
         Tries to use search terms to match different fields of book (or its parts).
         E.g. one word can be an author survey, another be a part of the title, and the rest
         are some words from third chapter.
         """
+        if tokens_cache is None: tokens_cache = {}
         books = []
         only_in = None
 
         books = []
         only_in = None
 
@@ -966,8 +1031,8 @@ class Search(IndexStore):
         # content only query : themes x content
         q = BooleanQuery()
 
         # content only query : themes x content
         q = BooleanQuery()
 
-        tokens_pl = self.get_tokens(searched, field='content')
-        tokens = self.get_tokens(searched, field='SIMPLE')
+        tokens_pl = self.get_tokens(searched, field='content', cached=tokens_cache)
+        tokens = self.get_tokens(searched, field='SIMPLE', cached=tokens_cache)
 
         # only search in themes when we do not already filter by themes
         if hint is None or hint.just_search_in(['themes']) != []:
 
         # only search in themes when we do not already filter by themes
         if hint is None or hint.just_search_in(['themes']) != []:
@@ -1171,7 +1236,7 @@ class Search(IndexStore):
         Chains a filter list together
         """
         filters = filter(lambda x: x is not None, filters)
         Chains a filter list together
         """
         filters = filter(lambda x: x is not None, filters)
-        if not filters:
+        if not filters or filters is []:
             return None
         chf = ChainedFilter(JArray('object')(filters, Filter), op)
         return chf
             return None
         chf = ChainedFilter(JArray('object')(filters, Filter), op)
         return chf
index 710c6da..e9b2564 100644 (file)
@@ -119,28 +119,39 @@ def main(request):
             hint.books(book)
 
         toks = StringReader(query)
             hint.books(book)
 
         toks = StringReader(query)
+        tokens_cache = {}
         fuzzy = 'fuzzy' in request.GET
         if fuzzy:
             fuzzy = 0.7
 
         fuzzy = 'fuzzy' in request.GET
         if fuzzy:
             fuzzy = 0.7
 
-        results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_book(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
-
-        for r in results:
-            r.process_hits()
-
-        results.sort(reverse=True)
+        author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
+        title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
+
+        # Boost main author/title results with mixed search, and save some of its results for end of list.
+        # boost author, title results
+        author_title_mixed = srch.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
+        author_title_rest = []
+        for b in author_title_mixed:
+            bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
+            for b2 in bks:
+                b2.boost *= 1.1
+            if bks is []:
+                author_title_rest.append(b)
+        
+        text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache))
+        [r.process_hits() for r in text_phrase]
+        
+        everywhere = SearchResult.aggregate(srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache), author_title_rest)
+        [r.process_hits() for r in everywhere]
 
 
-        for r in results:
-            print "-----"
-            for h in r.hits:
-                print "- %s" % h
+        for res in [author_results, title_results, text_phrase, everywhere]:
+            res.sort(reverse=True)
 
 
-                # Did you mean?
         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
 
         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
 
+        results = author_results + title_results + text_phrase + everywhere
+        results.sort(reverse=True)
+        
         if len(results) == 1:
             if len(results[0].hits) == 0:
                 return HttpResponseRedirect(results[0].book.get_absolute_url())
         if len(results) == 1:
             if len(results[0].hits) == 0:
                 return HttpResponseRedirect(results[0].book.get_absolute_url())
@@ -159,6 +170,9 @@ def main(request):
         return render_to_response('catalogue/search_multiple_hits.html',
                                   {'tags': tag_list,
                                    'prefix': query,
         return render_to_response('catalogue/search_multiple_hits.html',
                                   {'tags': tag_list,
                                    'prefix': query,
-                                   'results': results,
+                                   'results': { 'author': author_results,
+                                                'title': title_results,
+                                                'content': text_phrase,
+                                                'other': everywhere},
                                    'did_you_mean': suggestion},
             context_instance=RequestContext(request))
                                    'did_you_mean': suggestion},
             context_instance=RequestContext(request))
index ca58a63..2ba3819 100755 (executable)
@@ -7,7 +7,6 @@
     vertical-align: top;
 }
 
     vertical-align: top;
 }
 
-
 .book-box {
     width: 48.75em;
 }
 .book-box {
     width: 48.75em;
 }
@@ -17,7 +16,7 @@
 }
 
 .book-wide-box {
 }
 
 .book-wide-box {
-    width: 98.5em;
+    width: 97.5em;
 
     /** This is a fullpage box, it must be aligned with the top menu. 
         This corresponds to a .1em margin below **/
 
     /** This is a fullpage box, it must be aligned with the top menu. 
         This corresponds to a .1em margin below **/
    min-height: 24.4em;
 }
 
    min-height: 24.4em;
 }
 
+.search-result {
+    width: 97.5em;
+}
+
+.search-result .book-box-body {
+    width: 31em;
+}
+
+.book-list-header {
+    width: 97.5em;
+    padding: 0em;
+    margin-left: -0.1em;
+}
+
+.book-list-header .book-box-inner {
+    min-height: 1em;
+}
+
+.book-list-header p {
+    font-size: 2.4em;
+    margin: 0.5em;
+}
+
 /*.book-wide-box.search-result .book-box-inner, .book-wide-box.search-result blockquote {
     height: auto !important;
 }*/
 
 /*.book-wide-box.search-result .book-box-inner, .book-wide-box.search-result blockquote {
     height: auto !important;
 }*/
 
-.book-mini-box img, .book-box img, .book-wide-box img {
+.book-mini-box img, .book-box img, .book-wide-box img, .search-result img {
     width: 13.9em;
     height: 19.3em;
 }
     width: 13.9em;
     height: 19.3em;
 }
@@ -292,9 +314,17 @@ ul.inline-items li {
 
 .unlike .if-like {
     display: none;
 
 .unlike .if-like {
     display: none;
+
+.snippets {
+    width: 44em;
+    float: right;
 }
 
 .snippets .snippet-text {
     font-size: 1.2em;
     margin: 1.083em 0em;
 }
 }
 
 .snippets .snippet-text {
     font-size: 1.2em;
     margin: 1.083em 0em;
 }
+
+.snipptes .anchor {
+    display: none;
+}
index d719f30..4b92c19 100644 (file)
@@ -1,34 +1,27 @@
-{% extends "catalogue/book_wide.html" %}
+{% extends "catalogue/book_short.html" %}
 {% load i18n %}
 
 
 {% load i18n %}
 
 
-{% block box-class %}book-wide-box search-result{% endblock %}
+{% block box-class %}search-result{% endblock %}
 
 
-{% block quote %}
-{% if hits.0.snippets %}
-  <div class="cite-text"><a href="{% url book_text book.slug %}#f{{hits.0.section_number}}">{{hits.0.snippets.0|safe}}</a></div>
-{% else %}{% if hits.0.fragment %}
-  <div class="cite-text"><a href="{{hits.0.fragment.get_absolute_url}}">{{hits.0.fragment.short_text|safe}}</a></div>
-{% endif %}{% endif %}
-
-{% if hits.1 %}
-  <p class="cite-more mono"><a class="see-more-snippets" href="#snippets-{{book.id}}">{% trans "See more" %}</a></p>
-{% endif %}
-{% endblock %}
-
-
-{% block box-append %}
-<div class="snippets ui-helper-hidden">
-<a name="snippets-{{book.id}}">
-{% for hit in hits %}
+{% block right-column %}
+<div class="snippets">
+  {% for hit in hits %}
   {% if hit.snippets %}
   {% if hit.snippets %}
-    <div class="snippet-text"><a href="{% url book_text book.slug %}#f{{hit.section_number}}">{{hit.snippets.0|safe}}</a></div>
+  <p>In text:</p>
+  <div class="snippet-text"><a href="{% url book_text book.slug %}#f{{hit.section_number}}">{{hit.snippets.0|safe}}</a></div>
   {% else %}
   {% else %}
-    {% if hit.fragment %}
-      <div class="snippet-text"><a href="{{hit.fragment.get_absolute_url}}">{{hit.fragment.short_text|safe}}</a></div>
-    {% endif %}
+  {% if hit.fragment %}
+  <div class="snippet-text">
+    <p>{% trans "In fragment" %}
+      {% if hit.themes_hit %}{% trans ", for themes:" %}{% for t in hit.themes_hit %}{{t.name}} {% endfor %}{% endif %}
+    </p>
+    <a href="{{hit.fragment.get_absolute_url}}">{{hit.fragment.short_text|safe}}</a>
+  </div>
   {% endif %}
   {% endif %}
-{% endfor %}
-</a>
+  {% endif %}
+  {% endfor %}
 </div>
 </div>
+
 {% endblock %}
 {% endblock %}
+
index 5d22251..0a7dc1c 100644 (file)
     {% endif %}
     <!-- tu pójdą trafienia w tagi: Autorzy - z description oraz motywy i rodzaje (z book_count) -->
 
     {% endif %}
     <!-- tu pójdą trafienia w tagi: Autorzy - z description oraz motywy i rodzaje (z book_count) -->
 
-    <div id="results">
-      {% for result in results %}
-        {% book_searched result %}
-      {% endfor %}
+
+    {% if results.author %}
+    <div class="book-list-header">
+      <div class="book-box-inner">
+       <p>{% trans "Results by authors" %}</p>
+      </div>
+    </div>
+    <div>
+      <ol class="work-list">
+       {% for author in results.author %}
+       <li class="work-item">
+         {{author.book.short_html}}
+       </li>
+       {% endfor %}
+      </ol>
+    </div>
+    {% endif %}
+
+    {% if results.title %}
+    <div class="book-list-header">
+      <div class="book-box-inner">
+      <p>{% trans "Results by title" %}</p>
+      </div>
+    </div>
+    <div>
+      <ol class="work-list">
+       {% for result in results.title %}
+       <li class="work-item">
+         {{result.book.short_html}}
+       </li>
+       {% endfor %}
+      </ol>
+    </div>
+    {% endif %}
+
+    {% if results.content %}
+    {% for result in results.content %}
+    <div class="book-list-header">
+      <div class="book-box-inner">
+      <p>{% trans "Results in text" %}</p>
+      </div>
+    </div>
+    <div>
+      <ol class="work-list">
+       {% for result in results.title %}
+       <li class="work-item">
+          {% book_searched result %}
+       </li>
+       {% endfor %}
+      </ol>
+    </div>
+    {% endfor %}
+    {% endif %}
+
+    {% if results.other %}
+    {% for result in results.other %}
+    <div class="book-list-header">
+      <div class="book-box-inner">
+       <p>{% trans "Other results" %}</p>
+      </div>
+    </div>
+    <div>
+      <ol class="work-list">
+       {% for result in results.other %}
+       <li class="work-item">
+          {% book_searched result %}
+       </li>
+       {% endfor %}
+      </ol>
     </div>
     </div>
+    {% endfor %}
+    {% endif %}
+