fixes on staging

author Marcin Koziej <marcin@lolownia.org>

Wed, 1 Feb 2012 12:06:41 +0000 (13:06 +0100)

committer Marcin Koziej <marcin@lolownia.org>

Wed, 1 Feb 2012 12:06:41 +0000 (13:06 +0100)
author Marcin Koziej <marcin@lolownia.org>
Wed, 1 Feb 2012 12:06:41 +0000 (13:06 +0100)
committer Marcin Koziej <marcin@lolownia.org>
Wed, 1 Feb 2012 12:06:41 +0000 (13:06 +0100)
diff --git a/apps/search/index.py b/apps/search/index.py

index 97145d3..8ea3124 100644 (file)
--- a/apps/search/index.py
+++ b/apps/search/index.py
@@ -1212,17 +1212,25 @@ class Search(IndexStore):
          if position is None or length is None:
              return None
          # locate content.
-        snippets = Snippets(stored.get('book_id')).open()
+        book_id = int(stored.get('book_id'))
+        snippets = Snippets(book_id).open()
          try:
-            text = snippets.get((int(position),
-                                 int(length)))
-        finally:
-            snippets.close()
+            try:
+                text = snippets.get((int(position),
+                                     int(length)))
+            finally:
+                snippets.close()
  
-        tokenStream = TokenSources.getAnyTokenStream(self.searcher.getIndexReader(), scoreDoc.doc, field, self.analyzer)
-        #  highlighter.getBestTextFragments(tokenStream, text, False, 10)
-        snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
+            tokenStream = TokenSources.getAnyTokenStream(self.searcher.getIndexReader(), scoreDoc.doc, field, self.analyzer)
+            #  highlighter.getBestTextFragments(tokenStream, text, False, 10)
+            snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
  
+        except Exception, e:
+            e2 = e
+            if hasattr(e, 'getJavaException'):
+                e2 = unicode(e.getJavaException())
+            raise Exception("Problem fetching snippets for book %d, @%d len=%d" % (book_id, int(position), int(length)),
+                e2)
          return snip
  
      @staticmethod
@@ -1302,7 +1310,7 @@ class Search(IndexStore):
  
          return only_term
  
-    def hint_tags(self, string, max_results=50, pdcounter=True, prefix=True):
+    def hint_tags(self, string, max_results=50, pdcounter=True, prefix=True, fuzzy=False):
          """
          Return auto-complete hints for tags
          using prefix search.
@@ -1314,14 +1322,14 @@ class Search(IndexStore):
              if prefix:
                  q = self.make_prefix_phrase(toks, field)
              else:
-                q = self.make_term_query(toks, field)
+                q = self.make_term_query(toks, field, fuzzy=fuzzy)
              top.add(BooleanClause(q, BooleanClause.Occur.SHOULD))
  
          no_book_cat = self.term_filter(Term("tag_category", "book"), inverse=True)
  
          return self.search_tags(top, no_book_cat, max_results=max_results, pdcounter=pdcounter)
  
-    def hint_books(self, string, max_results=50, prefix=True):
+    def hint_books(self, string, max_results=50, prefix=True, fuzzy=False):
          """
          Returns auto-complete hints for book titles
          Because we do not index 'pseudo' title-tags.
@@ -1332,7 +1340,7 @@ class Search(IndexStore):
          if prefix:
              q = self.make_prefix_phrase(toks, 'title')
          else:
-            q = self.make_term_query(toks, 'title')
+            q = self.make_term_query(toks, 'title', fuzzy=fuzzy)
  
          return self.search_books(q, self.term_filter(Term("is_book", "true")), max_results=max_results)
  
diff --git a/apps/search/management/commands/reindex.py b/apps/search/management/commands/reindex.py

index bce4708..8901102 100755 (executable)
--- a/apps/search/management/commands/reindex.py
+++ b/apps/search/management/commands/reindex.py
@@ -1,9 +1,14 @@
  from django.core.management.base import BaseCommand
  
+from optparse import make_option
  class Command(BaseCommand):
      help = 'Reindex everything.'
      args = ''
-
+    
+    option_list = BaseCommand.option_list + (
+        make_option('-n', '--book-id', action='store_true', dest='book_id', default=False,
+            help='book id'),
+    )
      def handle(self, *args, **opts):
          from catalogue.models import Book
          import search
@@ -13,13 +18,16 @@ class Command(BaseCommand):
          if args:
              books = []
              for a in args:
-                books += Book.objects.filter(slug=a).all()
+                if opts['book_id']:
+                    books += Book.objects.filter(id=int(a)).all()
+                else:
+                    books += Book.objects.filter(slug=a).all()
          else:
              books = Book.objects.all()
              
          for b in books:
              print b.title
-            idx.index_book(b, None)
+            idx.index_book(b)
          print 'Reindexing tags.'
          idx.index_tags()
          idx.close()
diff --git a/apps/search/management/commands/snippets.py b/apps/search/management/commands/snippets.py

new file mode 100755 (executable)

index 0000000..058ea05
--- /dev/null
+++ b/apps/search/management/commands/snippets.py
@@ -0,0 +1,60 @@
+from django.core.management.base import BaseCommand
+
+from glob import glob
+from optparse import make_option
+from os import path
+from sys import stdout
+from django.conf import settings
+
+class Command(BaseCommand):
+    help = 'Reindex everything.'
+    args = ''
+
+    option_list = BaseCommand.option_list + (
+        make_option('-C', '--check-just-read', action='store_true', dest='check', default=False,
+            help='Check snippets utf-8'),
+        make_option('-c', '--check', action='store_true', dest='check2', default=False,
+            help='Check snippets utf-8 by walking through index'),
+        )
+
+
+    def handle(self, *args, **opts):
+        from catalogue.models import Book
+        import search
+
+        if opts['check']:
+            sfn = glob(settings.SEARCH_INDEX+'snippets/*')
+            print sfn
+            for fn in sfn:
+                print fn
+                bkid = int(path.basename(fn))
+                with open(fn) as f:
+                    cont = f.read()
+                    try:
+                        uc = cont.decode('utf-8')
+                    except UnicodeDecodeError, ude:
+                        print "error in snippets %d" % bkid
+        if opts['check2']:
+            s = search.Search()
+            reader = s.searcher.getIndexReader()
+            numdocs = reader.numDocs()
+            for did in range(numdocs):
+                doc = reader.document(did)
+                if doc and doc.get('book_id'):
+                    bkid = int(doc.get('book_id'))
+                    #import pdb; pdb.set_trace()
+                    stdout.write("\r%d / %d" % (did, numdocs))
+                    stdout.flush()
+                    ss  = doc.get('snippet_position')
+                    sl  = doc.get('snippet_length')
+                    if ss and sl:
+                        snips = Snippets(bkid)
+                        try:
+                            txt = snips.get((ss,sl))
+                            assert len(txt) == sl
+                        except UnicodeDecodeError, ude:
+                            stdout.write("\nerror in snippets %d\n" % bkid)
+                            raise ude
+
+            stdout.write("\ndone.\n")
+
diff --git a/apps/search/views.py b/apps/search/views.py

index 623b311..de2337f 100644 (file)
--- a/apps/search/views.py
+++ b/apps/search/views.py
@@ -37,7 +37,9 @@ def did_you_mean(query, tokens):
  
          if not dictionary.check(t):
              try:
-                change[t] = dictionary.suggest(t)[0]
+                change_to = dictionary.suggest(t)[0].lower()
+                if change_to != t.lower():
+                    change[t] = change_to
              except IndexError:
                  pass
  
@@ -94,7 +96,7 @@ def main(request):
  
      results = None
      query = None
-    fuzzy = False
+    fuzzy = False #0.8
  
      if 'q' in request.GET:
          # tags = request.GET.get('tags', '')
@@ -117,14 +119,11 @@ def main(request):
          # hint.tags(tag_list)
          # if book:
          #     hint.books(book)
-        tags = srch.hint_tags(query, pdcounter=True, prefix=False)
+        tags = srch.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
          tags = split_tags(tags)
  
          toks = StringReader(query)
          tokens_cache = {}
-        fuzzy = 'fuzzy' in request.GET
-        if fuzzy:
-            fuzzy = 0.7
  
          author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
          title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
@@ -182,9 +181,9 @@ def main(request):
          if len(results) == 1:
              fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
              if len(fragment_hits) == 1:
-                anchor = fragment_hits[0]['fragment']
-                frag = Fragment.objects.get(anchor=anchor)
-                return HttpResponseRedirect(frag.get_absolute_url())
+                #anchor = fragment_hits[0]['fragment']
+                #frag = Fragment.objects.get(anchor=anchor)
+                return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
              return HttpResponseRedirect(results[0].book.get_absolute_url())
          elif len(results) == 0:
              form = PublishingSuggestForm(initial={"books": query + ", "})
diff --git a/wolnelektury/static/css/book_box.css b/wolnelektury/static/css/book_box.css

index abf07de..876bf06 100755 (executable)
--- a/wolnelektury/static/css/book_box.css
+++ b/wolnelektury/static/css/book_box.css
@@ -88,10 +88,12 @@
   * ingenous float containment hack 
   * http://www.mikepadgett.com/technology/technical/alternative-to-the-pie-clearfix-hack/
   */
+/*
  .search-result .book-box-inner {
      height: 1%;
      overflow: hidden;
  }
+*/
  
  .book-mini-box img, .book-box img, .book-wide-box img, .search-result img {
      width: 13.9em;
author	Marcin Koziej <marcin@lolownia.org>
	Wed, 1 Feb 2012 12:06:41 +0000 (13:06 +0100)
committer	Marcin Koziej <marcin@lolownia.org>
	Wed, 1 Feb 2012 12:06:41 +0000 (13:06 +0100)
apps/search/index.py		patch \| blob \| history
apps/search/management/commands/reindex.py		patch \| blob \| history
apps/search/management/commands/snippets.py	[new file with mode: 0755]	patch \| blob
apps/search/views.py		patch \| blob \| history
wolnelektury/static/css/book_box.css		patch \| blob \| history