Some search fixes.
[wolnelektury.git] / apps / search / views.py
index e9b2564..052b2f1 100644 (file)
@@ -15,7 +15,7 @@ from catalogue.views import JSONResponse
 from search import Search, JVM, SearchResult
 from lucene import StringReader
 from suggest.forms import PublishingSuggestForm
 from search import Search, JVM, SearchResult
 from lucene import StringReader
 from suggest.forms import PublishingSuggestForm
-
+import re
 import enchant
 
 dictionary = enchant.Dict('pl_PL')
 import enchant
 
 dictionary = enchant.Dict('pl_PL')
@@ -138,26 +138,47 @@ def main(request):
             if bks is []:
                 author_title_rest.append(b)
         
             if bks is []:
                 author_title_rest.append(b)
         
-        text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache))
-        [r.process_hits() for r in text_phrase]
+        text_phrase = SearchResult.aggregate(srch.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False))
+        
+        everywhere = srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
+
+        def already_found(results):
+            def f(e):
+                for r in results:
+                    if e.book_id == r.book_id:
+                        results.append(e)
+                        return True
+                return False
+            return f
+        f = already_found(author_results + title_results)
+        everywhere = filter(lambda x: not f(x), everywhere)
+
+        author_results = SearchResult.aggregate(author_results)
+        title_results = SearchResult.aggregate(title_results)
         
         
-        everywhere = SearchResult.aggregate(srch.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache), author_title_rest)
-        [r.process_hits() for r in everywhere]
+        everywhere = SearchResult.aggregate(everywhere, author_title_rest)
 
         for res in [author_results, title_results, text_phrase, everywhere]:
             res.sort(reverse=True)
 
         for res in [author_results, title_results, text_phrase, everywhere]:
             res.sort(reverse=True)
-
+            for r in res:
+                for h in r.hits:
+                    h['snippets'] = map(lambda s:
+                                        re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", 
+                                                re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
+                    
         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
         suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE"))
-
+        print "dym? %s" % repr(suggestion).encode('utf-8')
+        
         results = author_results + title_results + text_phrase + everywhere
         results.sort(reverse=True)
         
         if len(results) == 1:
         results = author_results + title_results + text_phrase + everywhere
         results.sort(reverse=True)
         
         if len(results) == 1:
-            if len(results[0].hits) == 0:
-                return HttpResponseRedirect(results[0].book.get_absolute_url())
-            elif len(results[0].hits) == 1 and results[0].hits[0] is not None:
-                frag = Fragment.objects.get(anchor=results[0].hits[0])
+            fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
+            if len(fragment_hits) == 1:
+                anchor = fragment_hits[0]['fragment']
+                frag = Fragment.objects.get(anchor=anchor)
                 return HttpResponseRedirect(frag.get_absolute_url())
                 return HttpResponseRedirect(frag.get_absolute_url())
+            return HttpResponseRedirect(results[0].book.get_absolute_url())
         elif len(results) == 0:
             form = PublishingSuggestForm(initial={"books": query + ", "})
             return render_to_response('catalogue/search_no_hits.html',
         elif len(results) == 0:
             form = PublishingSuggestForm(initial={"books": query + ", "})
             return render_to_response('catalogue/search_no_hits.html',