simplify search results list
[wolnelektury.git] / src / search / views.py
index f7aa77c..a659329 100644 (file)
@@ -3,19 +3,18 @@
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from django.conf import settings
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from django.conf import settings
-from django.shortcuts import render_to_response, get_object_or_404
+from django.shortcuts import render_to_response
 from django.template import RequestContext
 from django.views.decorators import cache
 from django.template import RequestContext
 from django.views.decorators import cache
-from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
+from django.http import HttpResponse, JsonResponse
 from django.utils.translation import ugettext as _
 
 from catalogue.utils import split_tags
 from django.utils.translation import ugettext as _
 
 from catalogue.utils import split_tags
-from catalogue.models import Book, Tag, Fragment
+from catalogue.models import Book
 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
 from search.index import Search, SearchResult
 from suggest.forms import PublishingSuggestForm
 import re
 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
 from search.index import Search, SearchResult
 from suggest.forms import PublishingSuggestForm
 import re
-#import enchant
 import json
 
 
 import json
 
 
@@ -116,7 +115,7 @@ def hint(request):
                 break
             limit -= 1
             data.append({
                 break
             limit -= 1
             data.append({
-                'label': b.title,
+                'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
                 'category': _('book'),
                 'id': b.id,
                 'url': b.get_absolute_url()
                 'category': _('book'),
                 'id': b.id,
                 'url': b.get_absolute_url()
@@ -132,80 +131,57 @@ def hint(request):
 
 @cache.never_cache
 def main(request):
 
 @cache.never_cache
 def main(request):
-    results = {}
-
-    results = None
-    query = None
-
     query = request.GET.get('q', '')
     query = request.GET.get('q', '')
+    query = ' '.join(query.split())
+    # filter out private use characters
+    import unicodedata
+    query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
 
     if len(query) < 2:
 
     if len(query) < 2:
-        return render_to_response('catalogue/search_too_short.html',
-                                  {'prefix': query},
+        return render_to_response(
+            'catalogue/search_too_short.html', {'prefix': query},
             context_instance=RequestContext(request))
             context_instance=RequestContext(request))
+    elif len(query) > 256:
+        return render_to_response(
+            'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
 
     query = remove_query_syntax_chars(query)
 
     query = remove_query_syntax_chars(query)
-    
-    search = Search()
 
 
-    theme_terms = search.index.analyze(text=query, field="themes_pl") \
-        + search.index.analyze(text=query, field="themes")
+    words = query.split()
+    if len(words) > 10:
+        query = ' '.join(words[:10])
+
+    search = Search()
 
 
-            # change hints
     tags = search.hint_tags(query, pdcounter=True, prefix=False)
     tags = split_tags(tags)
 
     tags = search.hint_tags(query, pdcounter=True, prefix=False)
     tags = split_tags(tags)
 
-    author_results = search.search_phrase(query, 'authors', book=True)
-    translator_results = search.search_phrase(query, 'translators', book=True)
-
-    title_results = search.search_phrase(query, 'title', book=True)
-
-    # Boost main author/title results with mixed search, and save some of its results for end of list.
-    # boost author, title results
-    author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
-    author_title_rest = []
-
-    for b in author_title_mixed:
-        also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
-        for b2 in also_in_mixed:
-            b2.boost *= 1.1
-        if also_in_mixed is []:
-            author_title_rest.append(b)
-
-    # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
-    # Because the query is using only one field.
-    text_phrase = SearchResult.aggregate(
-        search.search_phrase(query, 'text', snippets=True, book=False),
-        search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
-
-    everywhere = search.search_everywhere(query, query_terms=theme_terms)
-
-    def already_found(results):
-        def f(e):
-            for r in results:
-                if e.book_id == r.book_id:
-                    e.boost = 0.9
-                    results.append(e)
-                    return True
-            return False
-        return f
-    f = already_found(author_results + translator_results + title_results + text_phrase)
-    everywhere = filter(lambda x: not f(x), everywhere)
-
-    author_results = SearchResult.aggregate(author_results)
-    translator_results = SearchResult.aggregate(translator_results)
-    title_results = SearchResult.aggregate(title_results)
-
-    everywhere = SearchResult.aggregate(everywhere, author_title_rest)
-
-    for field, res in [('authors', author_results),
-                       ('translators', translator_results),
-                       ('title', title_results),
-                       ('text', text_phrase),
-                       ('text', everywhere)]:
-        res.sort(reverse=True)
-        for r in res:
-            search.get_snippets(r, query, field, 3)
+    results_parts = []
+
+    search_fields = []
+    fieldsets = (
+        (['authors'], True),
+        (['title'], True),
+        (['metadata'], True),
+        (['text', 'themes_pl'], False),
+    )
+    for fieldset, is_book in fieldsets:
+        search_fields += fieldset
+        results_parts.append(search.search_words(words, search_fields, book=is_book))
+
+    results = []
+    ids_results = {}
+    for results_part in results_parts:
+        for result in sorted(SearchResult.aggregate(results_part), reverse=True):
+            book_id = result.book_id
+            if book_id in ids_results:
+                ids_results[book_id].merge(result)
+            else:
+                results.append(result)
+                ids_results[book_id] = result
+
+    for result in results:
+        search.get_snippets(result, query, num=3)
 
     suggestion = u''
 
 
     suggestion = u''
 
@@ -215,41 +191,26 @@ def main(request):
         except Book.DoesNotExist:
             return False
 
         except Book.DoesNotExist:
             return False
 
-    author_results = filter(ensure_exists, author_results)
-    translator_results = filter(ensure_exists, translator_results)
-    title_results = filter(ensure_exists, title_results)
-    text_phrase = filter(ensure_exists, text_phrase)
-    everywhere = filter(ensure_exists, everywhere)
-
-    results = author_results + translator_results + title_results + text_phrase + everywhere
-    # ensure books do exists & sort them
-    for res in (author_results, translator_results, title_results, text_phrase, everywhere):
-        res.sort(reverse=True)
-
-    # We don't want to redirect to book text, but rather display result page even with one result.
-    # if len(results) == 1:
-    #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
-    #     if len(fragment_hits) == 1:
-    #         #anchor = fragment_hits[0]['fragment']
-    #         #frag = Fragment.objects.get(anchor=anchor)
-    #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
-    #     return HttpResponseRedirect(results[0].book.get_absolute_url())
-    if len(results) == 0:
+    results = filter(ensure_exists, results)
+
+    if not results:
         form = PublishingSuggestForm(initial={"books": query + ", "})
         form = PublishingSuggestForm(initial={"books": query + ", "})
-        return render_to_response('catalogue/search_no_hits.html',
-                                  {'tags': tags,
-                                   'prefix': query,
-                                   "form": form,
-                                   'did_you_mean': suggestion},
+        return render_to_response(
+            'catalogue/search_no_hits.html',
+            {
+                'tags': tags,
+                'prefix': query,
+                'form': form,
+                'did_you_mean': suggestion
+            },
             context_instance=RequestContext(request))
 
             context_instance=RequestContext(request))
 
-    return render_to_response('catalogue/search_multiple_hits.html',
-                              {'tags': tags,
-                               'prefix': query,
-                               'results': {'author': author_results,
-                                           'translator': translator_results,
-                                           'title': title_results,
-                                           'content': text_phrase,
-                                           'other': everywhere},
-                               'did_you_mean': suggestion},
+    return render_to_response(
+        'catalogue/search_multiple_hits.html',
+        {
+            'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
+            'prefix': query,
+            'results': results,
+            'did_you_mean': suggestion
+        },
         context_instance=RequestContext(request))
         context_instance=RequestContext(request))