simplify search results list

[wolnelektury.git] / src / search / views.py
diff --git a/src/search/views.py b/src/search/views.py

index f7aa77c..a659329 100644 (file)
--- a/src/search/views.py
+++ b/src/search/views.py
@@ -3,19 +3,18 @@
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  from django.conf import settings
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  from django.conf import settings
-from django.shortcuts import render_to_response, get_object_or_404
+from django.shortcuts import render_to_response
  from django.template import RequestContext
  from django.views.decorators import cache
  from django.template import RequestContext
  from django.views.decorators import cache
-from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
+from django.http import HttpResponse, JsonResponse
  from django.utils.translation import ugettext as _
  
  from catalogue.utils import split_tags
  from django.utils.translation import ugettext as _
  
  from catalogue.utils import split_tags
-from catalogue.models import Book, Tag, Fragment
+from catalogue.models import Book
  from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  from search.index import Search, SearchResult
  from suggest.forms import PublishingSuggestForm
  import re
  from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  from search.index import Search, SearchResult
  from suggest.forms import PublishingSuggestForm
  import re
-#import enchant
  import json
  
  
  import json
  
  
@@ -116,7 +115,7 @@ def hint(request):
                  break
              limit -= 1
              data.append({
                  break
              limit -= 1
              data.append({
-                'label': b.title,
+                'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
                  'category': _('book'),
                  'id': b.id,
                  'url': b.get_absolute_url()
                  'category': _('book'),
                  'id': b.id,
                  'url': b.get_absolute_url()
@@ -132,80 +131,57 @@ def hint(request):
  
  @cache.never_cache
  def main(request):
  
  @cache.never_cache
  def main(request):
-    results = {}
-
-    results = None
-    query = None
-
      query = request.GET.get('q', '')
      query = request.GET.get('q', '')
+    query = ' '.join(query.split())
+    # filter out private use characters
+    import unicodedata
+    query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
  
      if len(query) < 2:
  
      if len(query) < 2:
-        return render_to_response('catalogue/search_too_short.html',
-                                  {'prefix': query},
+        return render_to_response(
+            'catalogue/search_too_short.html', {'prefix': query},
              context_instance=RequestContext(request))
              context_instance=RequestContext(request))
+    elif len(query) > 256:
+        return render_to_response(
+            'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
  
      query = remove_query_syntax_chars(query)
  
      query = remove_query_syntax_chars(query)
-    
-    search = Search()
  
  
-    theme_terms = search.index.analyze(text=query, field="themes_pl") \
-        + search.index.analyze(text=query, field="themes")
+    words = query.split()
+    if len(words) > 10:
+        query = ' '.join(words[:10])
+
+    search = Search()
  
  
-            # change hints
      tags = search.hint_tags(query, pdcounter=True, prefix=False)
      tags = split_tags(tags)
  
      tags = search.hint_tags(query, pdcounter=True, prefix=False)
      tags = split_tags(tags)
  
-    author_results = search.search_phrase(query, 'authors', book=True)
-    translator_results = search.search_phrase(query, 'translators', book=True)
-
-    title_results = search.search_phrase(query, 'title', book=True)
-
-    # Boost main author/title results with mixed search, and save some of its results for end of list.
-    # boost author, title results
-    author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
-    author_title_rest = []
-
-    for b in author_title_mixed:
-        also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
-        for b2 in also_in_mixed:
-            b2.boost *= 1.1
-        if also_in_mixed is []:
-            author_title_rest.append(b)
-
-    # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
-    # Because the query is using only one field.
-    text_phrase = SearchResult.aggregate(
-        search.search_phrase(query, 'text', snippets=True, book=False),
-        search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
-
-    everywhere = search.search_everywhere(query, query_terms=theme_terms)
-
-    def already_found(results):
-        def f(e):
-            for r in results:
-                if e.book_id == r.book_id:
-                    e.boost = 0.9
-                    results.append(e)
-                    return True
-            return False
-        return f
-    f = already_found(author_results + translator_results + title_results + text_phrase)
-    everywhere = filter(lambda x: not f(x), everywhere)
-
-    author_results = SearchResult.aggregate(author_results)
-    translator_results = SearchResult.aggregate(translator_results)
-    title_results = SearchResult.aggregate(title_results)
-
-    everywhere = SearchResult.aggregate(everywhere, author_title_rest)
-
-    for field, res in [('authors', author_results),
-                       ('translators', translator_results),
-                       ('title', title_results),
-                       ('text', text_phrase),
-                       ('text', everywhere)]:
-        res.sort(reverse=True)
-        for r in res:
-            search.get_snippets(r, query, field, 3)
+    results_parts = []
+
+    search_fields = []
+    fieldsets = (
+        (['authors'], True),
+        (['title'], True),
+        (['metadata'], True),
+        (['text', 'themes_pl'], False),
+    )
+    for fieldset, is_book in fieldsets:
+        search_fields += fieldset
+        results_parts.append(search.search_words(words, search_fields, book=is_book))
+
+    results = []
+    ids_results = {}
+    for results_part in results_parts:
+        for result in sorted(SearchResult.aggregate(results_part), reverse=True):
+            book_id = result.book_id
+            if book_id in ids_results:
+                ids_results[book_id].merge(result)
+            else:
+                results.append(result)
+                ids_results[book_id] = result
+
+    for result in results:
+        search.get_snippets(result, query, num=3)
  
      suggestion = u''
  
  
      suggestion = u''
  
@@ -215,41 +191,26 @@ def main(request):
          except Book.DoesNotExist:
              return False
  
          except Book.DoesNotExist:
              return False
  
-    author_results = filter(ensure_exists, author_results)
-    translator_results = filter(ensure_exists, translator_results)
-    title_results = filter(ensure_exists, title_results)
-    text_phrase = filter(ensure_exists, text_phrase)
-    everywhere = filter(ensure_exists, everywhere)
-
-    results = author_results + translator_results + title_results + text_phrase + everywhere
-    # ensure books do exists & sort them
-    for res in (author_results, translator_results, title_results, text_phrase, everywhere):
-        res.sort(reverse=True)
-
-    # We don't want to redirect to book text, but rather display result page even with one result.
-    # if len(results) == 1:
-    #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
-    #     if len(fragment_hits) == 1:
-    #         #anchor = fragment_hits[0]['fragment']
-    #         #frag = Fragment.objects.get(anchor=anchor)
-    #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
-    #     return HttpResponseRedirect(results[0].book.get_absolute_url())
-    if len(results) == 0:
+    results = filter(ensure_exists, results)
+
+    if not results:
          form = PublishingSuggestForm(initial={"books": query + ", "})
          form = PublishingSuggestForm(initial={"books": query + ", "})
-        return render_to_response('catalogue/search_no_hits.html',
-                                  {'tags': tags,
-                                   'prefix': query,
-                                   "form": form,
-                                   'did_you_mean': suggestion},
+        return render_to_response(
+            'catalogue/search_no_hits.html',
+            {
+                'tags': tags,
+                'prefix': query,
+                'form': form,
+                'did_you_mean': suggestion
+            },
              context_instance=RequestContext(request))
  
              context_instance=RequestContext(request))
  
-    return render_to_response('catalogue/search_multiple_hits.html',
-                              {'tags': tags,
-                               'prefix': query,
-                               'results': {'author': author_results,
-                                           'translator': translator_results,
-                                           'title': title_results,
-                                           'content': text_phrase,
-                                           'other': everywhere},
-                               'did_you_mean': suggestion},
+    return render_to_response(
+        'catalogue/search_multiple_hits.html',
+        {
+            'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
+            'prefix': query,
+            'results': results,
+            'did_you_mean': suggestion
+        },
          context_instance=RequestContext(request))
          context_instance=RequestContext(request))