Merge branch 'pretty' of github.com:fnp/wolnelektury into pretty
[wolnelektury.git] / apps / search / views.py
index 710c6da..cf00870 100644 (file)
@@ -15,7 +15,7 @@ from catalogue.views import JSONResponse
 from search import Search, JVM, SearchResult
 from lucene import StringReader
 from suggest.forms import PublishingSuggestForm
 from search import Search, JVM, SearchResult
 from lucene import StringReader
 from suggest.forms import PublishingSuggestForm
-
+import re
 import enchant
 
 dictionary = enchant.Dict('pl_PL')
 import enchant
 
 dictionary = enchant.Dict('pl_PL')
@@ -119,34 +119,71 @@ def main(request):
             hint.books(book)
 
         toks = StringReader(query)
             hint.books(book)
 
         toks = StringReader(query)
+        tokens_cache = {}
         fuzzy = 'fuzzy' in request.GET
         if fuzzy:
             fuzzy = 0.7
 
         fuzzy = 'fuzzy' in request.GET
         if fuzzy:
             fuzzy = 0.7
 
-        results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_book(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
-                                         srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
-
-        for r in results:
-            r.process_hits()
-
-        results.sort(reverse=True)
-
-        for r in results:
-            print "-----"
-            for h in r.hits:
-                print "- %s" % h
-
-                # Did you mean?
+        author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
+        title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
+
+        # Boost main author/title results with mixed search, and save some of its results for end of list.
+        # boost author, title results
+        author_title_mixed = srch.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
+        author_title_rest = []
+        for b in author_title_mixed:
+            bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
+            for b2 in bks:
+                b2.boost *= 1.1
+            if bks is []:
+                author_title_rest.append(b)
+
+        # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
+        # Because the query is using only one field.
+        text_phrase = SearchResult.aggregate(
+            srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
+            srch.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
+
+        everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
+
+        def already_found(results):
+            def f(e):
+                for r in results:
+                    if e.book_id == r.book_id:
+                        e.boost = 0.9
+                        results.append(e)
+                        return True
+                return False
+            return f
+        f = already_found(author_results + title_results + text_phrase)
+        everywhere = filter(lambda x: not f(x), everywhere)
+
+        author_results = SearchResult.aggregate(author_results)
+        title_results = SearchResult.aggregate(title_results)
+        
+        everywhere = SearchResult.aggregate(everywhere, author_title_rest)
+
+        for res in [author_results, title_results, text_phrase, everywhere]:
+            res.sort(reverse=True)
+            for r in res:
+                for h in r.hits:
+                    h['snippets'] = map(lambda s:
+                                        re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", 
+                                                re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
+                    
         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
-
+        print "dym? %s" % repr(suggestion).encode('utf-8')
+        
+        results = author_results + title_results + text_phrase + everywhere
+        results.sort(reverse=True)
+        
         if len(results) == 1:
         if len(results) == 1:
-            if len(results[0].hits) == 0:
-                return HttpResponseRedirect(results[0].book.get_absolute_url())
-            elif len(results[0].hits) == 1 and results[0].hits[0] is not None:
-                frag = Fragment.objects.get(anchor=results[0].hits[0])
+            fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
+            if len(fragment_hits) == 1:
+                anchor = fragment_hits[0]['fragment']
+                frag = Fragment.objects.get(anchor=anchor)
                 return HttpResponseRedirect(frag.get_absolute_url())
                 return HttpResponseRedirect(frag.get_absolute_url())
+            return HttpResponseRedirect(results[0].book.get_absolute_url())
         elif len(results) == 0:
             form = PublishingSuggestForm(initial={"books": query + ", "})
             return render_to_response('catalogue/search_no_hits.html',
         elif len(results) == 0:
             form = PublishingSuggestForm(initial={"books": query + ", "})
             return render_to_response('catalogue/search_no_hits.html',
@@ -159,6 +196,9 @@ def main(request):
         return render_to_response('catalogue/search_multiple_hits.html',
                                   {'tags': tag_list,
                                    'prefix': query,
         return render_to_response('catalogue/search_multiple_hits.html',
                                   {'tags': tag_list,
                                    'prefix': query,
-                                   'results': results,
+                                   'results': { 'author': author_results,
+                                                'title': title_results,
+                                                'content': text_phrase,
+                                                'other': everywhere},
                                    'did_you_mean': suggestion},
             context_instance=RequestContext(request))
                                    'did_you_mean': suggestion},
             context_instance=RequestContext(request))