merge
[wolnelektury.git] / apps / search / views.py
index 166011c..052b2f1 100644 (file)
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
 # -*- coding: utf-8 -*-
 
+from django.conf import settings
 from django.shortcuts import render_to_response, get_object_or_404
 from django.template import RequestContext
 from django.contrib.auth.decorators import login_required
 from django.shortcuts import render_to_response, get_object_or_404
 from django.template import RequestContext
 from django.contrib.auth.decorators import login_required
@@ -14,17 +15,26 @@ from catalogue.views import JSONResponse
 from search import Search, JVM, SearchResult
 from lucene import StringReader
 from suggest.forms import PublishingSuggestForm
 from search import Search, JVM, SearchResult
 from lucene import StringReader
 from suggest.forms import PublishingSuggestForm
-
+import re
 import enchant
 
 dictionary = enchant.Dict('pl_PL')
 
 
 import enchant
 
 dictionary = enchant.Dict('pl_PL')
 
 
+def match_word_re(word):
+    if 'sqlite' in settings.DATABASES['default']['ENGINE']:
+        return r"\b%s\b" % word
+    elif 'mysql' in settings.DATABASES['default']['ENGINE']:
+        return "[[:<:]]%s[[:>:]]" % word
+
+
 def did_you_mean(query, tokens):
     change = {}
 def did_you_mean(query, tokens):
     change = {}
-    # sprawdzić, czy słowo nie jest aby autorem - proste szukanie termu w author!
     for t in tokens:
     for t in tokens:
-        print("%s ok? %s, sug: %s" %(t, dictionary.check(t), dictionary.suggest(t)))
+        authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
+        if len(authors) > 0:
+            continue
+        
         if not dictionary.check(t):
             try:
                 change[t] = dictionary.suggest(t)[0]
         if not dictionary.check(t):
             try:
                 change[t] = dictionary.suggest(t)[0]
@@ -109,39 +119,81 @@ def main(request):
             hint.books(book)
 
         toks = StringReader(query)
             hint.books(book)
 
         toks = StringReader(query)
+        tokens_cache = {}
         fuzzy = 'fuzzy' in request.GET
         if fuzzy:
             fuzzy = 0.7
 
         fuzzy = 'fuzzy' in request.GET
         if fuzzy:
             fuzzy = 0.7
 
-        results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_book(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
-
-        for r in results:
-            r.process_hits()
-
+        author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
+        title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
+
+        # Boost main author/title results with mixed search, and save some of its results for end of list.
+        # boost author, title results
+        author_title_mixed = srch.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
+        author_title_rest = []
+        for b in author_title_mixed:
+            bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
+            for b2 in bks:
+                b2.boost *= 1.1
+            if bks is []:
+                author_title_rest.append(b)
+        
+        text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False))
+        
+        everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
+
+        def already_found(results):
+            def f(e):
+                for r in results:
+                    if e.book_id == r.book_id:
+                        results.append(e)
+                        return True
+                return False
+            return f
+        f = already_found(author_results + title_results)
+        everywhere = filter(lambda x: not f(x), everywhere)
+
+        author_results = SearchResult.aggregate(author_results)
+        title_results = SearchResult.aggregate(title_results)
+        
+        everywhere = SearchResult.aggregate(everywhere, author_title_rest)
+
+        for res in [author_results, title_results, text_phrase, everywhere]:
+            res.sort(reverse=True)
+            for r in res:
+                for h in r.hits:
+                    h['snippets'] = map(lambda s:
+                                        re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", 
+                                                re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
+                    
+        suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
+        print "dym? %s" % repr(suggestion).encode('utf-8')
+        
+        results = author_results + title_results + text_phrase + everywhere
         results.sort(reverse=True)
         results.sort(reverse=True)
-
-        for r in results:
-            print "-----"
-            for h in r.hits:
-                print "- %s" % h
-
+        
         if len(results) == 1:
         if len(results) == 1:
-            if len(results[0].hits) == 0:
-                return HttpResponseRedirect(results[0].book.get_absolute_url())
-            elif len(results[0].hits) == 1 and results[0].hits[0] is not None:
-                frag = Fragment.objects.get(anchor=results[0].hits[0])
+            fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
+            if len(fragment_hits) == 1:
+                anchor = fragment_hits[0]['fragment']
+                frag = Fragment.objects.get(anchor=anchor)
                 return HttpResponseRedirect(frag.get_absolute_url())
                 return HttpResponseRedirect(frag.get_absolute_url())
+            return HttpResponseRedirect(results[0].book.get_absolute_url())
         elif len(results) == 0:
             form = PublishingSuggestForm(initial={"books": query + ", "})
             return render_to_response('catalogue/search_no_hits.html',
         elif len(results) == 0:
             form = PublishingSuggestForm(initial={"books": query + ", "})
             return render_to_response('catalogue/search_no_hits.html',
-                                      {'tags': tag_list, 'prefix': query,
-                                       "form": form},
+                                      {'tags': tag_list,
+                                       'prefix': query,
+                                       "form": form,
+                                       'did_you_mean': suggestion},
                 context_instance=RequestContext(request))
 
         return render_to_response('catalogue/search_multiple_hits.html',
                 context_instance=RequestContext(request))
 
         return render_to_response('catalogue/search_multiple_hits.html',
-                                  {'tags': tag_list, 'prefix': query,
-                                   'results': results},
+                                  {'tags': tag_list,
+                                   'prefix': query,
+                                   'results': { 'author': author_results,
+                                                'title': title_results,
+                                                'content': text_phrase,
+                                                'other': everywhere},
+                                   'did_you_mean': suggestion},
             context_instance=RequestContext(request))
             context_instance=RequestContext(request))