revamp search hints

[wolnelektury.git] / src / search / views.py
diff --git a/src/search/views.py b/src/search/views.py

index 8b055a4..70a216e 100644 (file)
--- a/src/search/views.py
+++ b/src/search/views.py
@@ -10,7 +10,7 @@ from django.http import HttpResponse, JsonResponse
  from django.utils.translation import ugettext as _
  
  from catalogue.utils import split_tags
  from django.utils.translation import ugettext as _
  
  from catalogue.utils import split_tags
-from catalogue.models import Book
+from catalogue.models import Book, Tag
  from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  from search.index import Search, SearchResult
  from suggest.forms import PublishingSuggestForm
  from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  from search.index import Search, SearchResult
  from suggest.forms import PublishingSuggestForm
@@ -66,26 +66,6 @@ def hint(request):
  
      prefix = remove_query_syntax_chars(prefix)
  
  
      prefix = remove_query_syntax_chars(prefix)
  
-    search = Search()
-    # tagi beda ograniczac tutaj
-    # ale tagi moga byc na ksiazce i na fragmentach
-    # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
-    # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
-
-    def is_dupe(tag):
-        if isinstance(tag, PDCounterAuthor):
-            if filter(lambda t: t.slug == tag.slug and t != tag, tags):
-                return True
-        elif isinstance(tag, PDCounterBook):
-            if filter(lambda b: b.slug == tag.slug, tags):
-                return True
-        return False
-
-    def category_name(c):
-        if c.startswith('pd_'):
-            c = c[len('pd_'):]
-        return _(c)
-
      try:
          limit = int(request.GET.get('max', ''))
      except ValueError:
      try:
          limit = int(request.GET.get('max', ''))
      except ValueError:
@@ -94,33 +74,25 @@ def hint(request):
          if limit < 1:
              limit = -1
  
          if limit < 1:
              limit = -1
  
-    data = []
-
-    tags = search.hint_tags(prefix, pdcounter=True)
-    tags = filter(lambda t: not is_dupe(t), tags)
-    for t in tags:
-        if not limit:
-            break
-        limit -= 1
-        data.append({
-            'label': t.name,
-            'category': category_name(t.category),
-            'id': t.id,
-            'url': t.get_absolute_url()
-            })
-    if limit:
-        books = search.hint_books(prefix)
-        for b in books:
-            if not limit:
-                break
-            limit -= 1
-            data.append({
+    data = [
+        {
+            'label': author.name,
+            'category': _('author'),
+            'id': author.id,
+            'url': author.get_absolute_url(),
+        }
+        for author in Tag.objects.filter(category='author', name__iregex='\m' + prefix)[:10]
+    ]
+    if len(data) < limit:
+        data += [
+            {
                  'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
                  'category': _('book'),
                  'id': b.id,
                  'url': b.get_absolute_url()
                  'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
                  'category': _('book'),
                  'id': b.id,
                  'url': b.get_absolute_url()
-                })
-
+            }
+            for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
+        ]
      callback = request.GET.get('callback', None)
      if callback:
          return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
      callback = request.GET.get('callback', None)
      if callback:
          return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
@@ -153,52 +125,35 @@ def main(request):
  
      search = Search()
  
  
      search = Search()
  
-    # change hints
      tags = search.hint_tags(query, pdcounter=True, prefix=False)
      tags = split_tags(tags)
  
      tags = search.hint_tags(query, pdcounter=True, prefix=False)
      tags = split_tags(tags)
  
-    author_results = search.search_words(words, ['authors'])
-
-    title_results = search.search_words(words, ['title'])
-
-    author_title_mixed = search.search_words(words, ['authors', 'title', 'metadata'])
-    author_title_rest = []
-
-    for b in author_title_mixed:
-        also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
-        for b2 in also_in_mixed:
-            b2.boost *= 1.1
-        if not also_in_mixed:
-            author_title_rest.append(b)
-
-    text_phrase = SearchResult.aggregate(search.search_words(words, ['text'], book=False))
-
-    everywhere = search.search_words(words, ['metadata', 'text', 'themes_pl'], book=False)
-
-    def already_found(results):
-        def f(e):
-            for r in results:
-                if e.book_id == r.book_id:
-                    e.boost = 0.9
-                    results.append(e)
-                    return True
-            return False
-        return f
-    f = already_found(author_results + title_results + text_phrase)
-    everywhere = filter(lambda x: not f(x), everywhere)
-
-    author_results = SearchResult.aggregate(author_results, author_title_rest)
-    title_results = SearchResult.aggregate(title_results)
-
-    everywhere = SearchResult.aggregate(everywhere, author_title_rest)
-
-    for field, res in [('authors', author_results),
-                       ('title', title_results),
-                       ('text', text_phrase),
-                       ('text', everywhere)]:
-        res.sort(reverse=True)
-        for r in res:
-            search.get_snippets(r, query, field, 3)
+    results_parts = []
+
+    search_fields = []
+    fieldsets = (
+        (['authors'], True),
+        (['title'], True),
+        (['metadata'], True),
+        (['text', 'themes_pl'], False),
+    )
+    for fieldset, is_book in fieldsets:
+        search_fields += fieldset
+        results_parts.append(search.search_words(words, search_fields, book=is_book))
+
+    results = []
+    ids_results = {}
+    for results_part in results_parts:
+        for result in sorted(SearchResult.aggregate(results_part), reverse=True):
+            book_id = result.book_id
+            if book_id in ids_results:
+                ids_results[book_id].merge(result)
+            else:
+                results.append(result)
+                ids_results[book_id] = result
+
+    for result in results:
+        search.get_snippets(result, query, num=3)
  
      suggestion = u''
  
  
      suggestion = u''
  
@@ -208,16 +163,9 @@ def main(request):
          except Book.DoesNotExist:
              return False
  
          except Book.DoesNotExist:
              return False
  
-    author_results = filter(ensure_exists, author_results)
-    title_results = filter(ensure_exists, title_results)
-    text_phrase = filter(ensure_exists, text_phrase)
-    everywhere = filter(ensure_exists, everywhere)
+    results = filter(ensure_exists, results)
  
  
-    # ensure books do exists & sort them
-    for res in (author_results, title_results, text_phrase):
-        res.sort(reverse=True)
-
-    if not (author_results or title_results or text_phrase or everywhere):
+    if not results:
          form = PublishingSuggestForm(initial={"books": query + ", "})
          return render_to_response(
              'catalogue/search_no_hits.html',
          form = PublishingSuggestForm(initial={"books": query + ", "})
          return render_to_response(
              'catalogue/search_no_hits.html',
@@ -232,14 +180,9 @@ def main(request):
      return render_to_response(
          'catalogue/search_multiple_hits.html',
          {
      return render_to_response(
          'catalogue/search_multiple_hits.html',
          {
-            'tags': tags,
+            'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
              'prefix': query,
              'prefix': query,
-            'results': {
-                'author': author_results,
-                'title': title_results,
-                'content': text_phrase,
-                'other': everywhere
-            },
+            'results': results,
              'did_you_mean': suggestion
          },
          context_instance=RequestContext(request))
              'did_you_mean': suggestion
          },
          context_instance=RequestContext(request))