resize snippets
[wolnelektury.git] / src / search / views.py
index f7aa77c..8b055a4 100644 (file)
@@ -3,19 +3,18 @@
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from django.conf import settings
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from django.conf import settings
-from django.shortcuts import render_to_response, get_object_or_404
+from django.shortcuts import render_to_response
 from django.template import RequestContext
 from django.views.decorators import cache
 from django.template import RequestContext
 from django.views.decorators import cache
-from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect, JsonResponse
+from django.http import HttpResponse, JsonResponse
 from django.utils.translation import ugettext as _
 
 from catalogue.utils import split_tags
 from django.utils.translation import ugettext as _
 
 from catalogue.utils import split_tags
-from catalogue.models import Book, Tag, Fragment
+from catalogue.models import Book
 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
 from search.index import Search, SearchResult
 from suggest.forms import PublishingSuggestForm
 import re
 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
 from search.index import Search, SearchResult
 from suggest.forms import PublishingSuggestForm
 import re
-#import enchant
 import json
 
 
 import json
 
 
@@ -116,7 +115,7 @@ def hint(request):
                 break
             limit -= 1
             data.append({
                 break
             limit -= 1
             data.append({
-                'label': b.title,
+                'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
                 'category': _('book'),
                 'id': b.id,
                 'url': b.get_absolute_url()
                 'category': _('book'),
                 'id': b.id,
                 'url': b.get_absolute_url()
@@ -132,53 +131,49 @@ def hint(request):
 
 @cache.never_cache
 def main(request):
 
 @cache.never_cache
 def main(request):
-    results = {}
-
-    results = None
-    query = None
-
     query = request.GET.get('q', '')
     query = request.GET.get('q', '')
+    query = ' '.join(query.split())
+    # filter out private use characters
+    import unicodedata
+    query = ''.join(ch for ch in query if unicodedata.category(ch) != 'Co')
 
     if len(query) < 2:
 
     if len(query) < 2:
-        return render_to_response('catalogue/search_too_short.html',
-                                  {'prefix': query},
+        return render_to_response(
+            'catalogue/search_too_short.html', {'prefix': query},
             context_instance=RequestContext(request))
             context_instance=RequestContext(request))
+    elif len(query) > 256:
+        return render_to_response(
+            'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
 
     query = remove_query_syntax_chars(query)
 
     query = remove_query_syntax_chars(query)
-    
-    search = Search()
 
 
-    theme_terms = search.index.analyze(text=query, field="themes_pl") \
-        + search.index.analyze(text=query, field="themes")
+    words = query.split()
+    if len(words) > 10:
+        query = ' '.join(words[:10])
+
+    search = Search()
 
 
-            # change hints
+    # change hints
     tags = search.hint_tags(query, pdcounter=True, prefix=False)
     tags = split_tags(tags)
 
     tags = search.hint_tags(query, pdcounter=True, prefix=False)
     tags = split_tags(tags)
 
-    author_results = search.search_phrase(query, 'authors', book=True)
-    translator_results = search.search_phrase(query, 'translators', book=True)
+    author_results = search.search_words(words, ['authors'])
 
 
-    title_results = search.search_phrase(query, 'title', book=True)
+    title_results = search.search_words(words, ['title'])
 
 
-    # Boost main author/title results with mixed search, and save some of its results for end of list.
-    # boost author, title results
-    author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
+    author_title_mixed = search.search_words(words, ['authors', 'title', 'metadata'])
     author_title_rest = []
 
     for b in author_title_mixed:
     author_title_rest = []
 
     for b in author_title_mixed:
-        also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
+        also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
         for b2 in also_in_mixed:
             b2.boost *= 1.1
         for b2 in also_in_mixed:
             b2.boost *= 1.1
-        if also_in_mixed is []:
+        if not also_in_mixed:
             author_title_rest.append(b)
 
             author_title_rest.append(b)
 
-    # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
-    # Because the query is using only one field.
-    text_phrase = SearchResult.aggregate(
-        search.search_phrase(query, 'text', snippets=True, book=False),
-        search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
+    text_phrase = SearchResult.aggregate(search.search_words(words, ['text'], book=False))
 
 
-    everywhere = search.search_everywhere(query, query_terms=theme_terms)
+    everywhere = search.search_words(words, ['metadata', 'text', 'themes_pl'], book=False)
 
     def already_found(results):
         def f(e):
 
     def already_found(results):
         def f(e):
@@ -189,17 +184,15 @@ def main(request):
                     return True
             return False
         return f
                     return True
             return False
         return f
-    f = already_found(author_results + translator_results + title_results + text_phrase)
+    f = already_found(author_results + title_results + text_phrase)
     everywhere = filter(lambda x: not f(x), everywhere)
 
     everywhere = filter(lambda x: not f(x), everywhere)
 
-    author_results = SearchResult.aggregate(author_results)
-    translator_results = SearchResult.aggregate(translator_results)
+    author_results = SearchResult.aggregate(author_results, author_title_rest)
     title_results = SearchResult.aggregate(title_results)
 
     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 
     for field, res in [('authors', author_results),
     title_results = SearchResult.aggregate(title_results)
 
     everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 
     for field, res in [('authors', author_results),
-                       ('translators', translator_results),
                        ('title', title_results),
                        ('text', text_phrase),
                        ('text', everywhere)]:
                        ('title', title_results),
                        ('text', text_phrase),
                        ('text', everywhere)]:
@@ -216,40 +209,37 @@ def main(request):
             return False
 
     author_results = filter(ensure_exists, author_results)
             return False
 
     author_results = filter(ensure_exists, author_results)
-    translator_results = filter(ensure_exists, translator_results)
     title_results = filter(ensure_exists, title_results)
     text_phrase = filter(ensure_exists, text_phrase)
     everywhere = filter(ensure_exists, everywhere)
 
     title_results = filter(ensure_exists, title_results)
     text_phrase = filter(ensure_exists, text_phrase)
     everywhere = filter(ensure_exists, everywhere)
 
-    results = author_results + translator_results + title_results + text_phrase + everywhere
     # ensure books do exists & sort them
     # ensure books do exists & sort them
-    for res in (author_results, translator_results, title_results, text_phrase, everywhere):
+    for res in (author_results, title_results, text_phrase):
         res.sort(reverse=True)
 
         res.sort(reverse=True)
 
-    # We don't want to redirect to book text, but rather display result page even with one result.
-    # if len(results) == 1:
-    #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
-    #     if len(fragment_hits) == 1:
-    #         #anchor = fragment_hits[0]['fragment']
-    #         #frag = Fragment.objects.get(anchor=anchor)
-    #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
-    #     return HttpResponseRedirect(results[0].book.get_absolute_url())
-    if len(results) == 0:
+    if not (author_results or title_results or text_phrase or everywhere):
         form = PublishingSuggestForm(initial={"books": query + ", "})
         form = PublishingSuggestForm(initial={"books": query + ", "})
-        return render_to_response('catalogue/search_no_hits.html',
-                                  {'tags': tags,
-                                   'prefix': query,
-                                   "form": form,
-                                   'did_you_mean': suggestion},
+        return render_to_response(
+            'catalogue/search_no_hits.html',
+            {
+                'tags': tags,
+                'prefix': query,
+                'form': form,
+                'did_you_mean': suggestion
+            },
             context_instance=RequestContext(request))
 
             context_instance=RequestContext(request))
 
-    return render_to_response('catalogue/search_multiple_hits.html',
-                              {'tags': tags,
-                               'prefix': query,
-                               'results': {'author': author_results,
-                                           'translator': translator_results,
-                                           'title': title_results,
-                                           'content': text_phrase,
-                                           'other': everywhere},
-                               'did_you_mean': suggestion},
+    return render_to_response(
+        'catalogue/search_multiple_hits.html',
+        {
+            'tags': tags,
+            'prefix': query,
+            'results': {
+                'author': author_results,
+                'title': title_results,
+                'content': text_phrase,
+                'other': everywhere
+            },
+            'did_you_mean': suggestion
+        },
         context_instance=RequestContext(request))
         context_instance=RequestContext(request))