if position is None or length is None:
return None
# locate content.
- snippets = Snippets(stored.get('book_id')).open()
+ book_id = int(stored.get('book_id'))
+ snippets = Snippets(book_id).open()
- text = snippets.get((int(position),
- int(length)))
- finally:
- snippets.close()
+ try:
+ text = snippets.get((int(position),
+ int(length)))
+ finally:
+ snippets.close()
- tokenStream = TokenSources.getAnyTokenStream(self.searcher.getIndexReader(), scoreDoc.doc, field, self.analyzer)
- # highlighter.getBestTextFragments(tokenStream, text, False, 10)
- snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
+ tokenStream = TokenSources.getAnyTokenStream(self.searcher.getIndexReader(), scoreDoc.doc, field, self.analyzer)
+ # highlighter.getBestTextFragments(tokenStream, text, False, 10)
+ snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
+ except Exception, e:
+ e2 = e
+ if hasattr(e, 'getJavaException'):
+ e2 = unicode(e.getJavaException())
+ raise Exception("Problem fetching snippets for book %d, @%d len=%d" % (book_id, int(position), int(length)),
+ e2)
return snip
return only_term
- def hint_tags(self, string, max_results=50, pdcounter=True, prefix=True):
+ def hint_tags(self, string, max_results=50, pdcounter=True, prefix=True, fuzzy=False):
Return auto-complete hints for tags
using prefix search.
if prefix:
q = self.make_prefix_phrase(toks, field)
- q = self.make_term_query(toks, field)
+ q = self.make_term_query(toks, field, fuzzy=fuzzy)
top.add(BooleanClause(q, BooleanClause.Occur.SHOULD))
no_book_cat = self.term_filter(Term("tag_category", "book"), inverse=True)
return self.search_tags(top, no_book_cat, max_results=max_results, pdcounter=pdcounter)
- def hint_books(self, string, max_results=50, prefix=True):
+ def hint_books(self, string, max_results=50, prefix=True, fuzzy=False):
Returns auto-complete hints for book titles
Because we do not index 'pseudo' title-tags.
if prefix:
q = self.make_prefix_phrase(toks, 'title')
- q = self.make_term_query(toks, 'title')
+ q = self.make_term_query(toks, 'title', fuzzy=fuzzy)
return self.search_books(q, self.term_filter(Term("is_book", "true")), max_results=max_results)
from django.core.management.base import BaseCommand
+from optparse import make_option
class Command(BaseCommand):
help = 'Reindex everything.'
args = ''
+ option_list = BaseCommand.option_list + (
+ make_option('-n', '--book-id', action='store_true', dest='book_id', default=False,
+ help='book id'),
+ )
def handle(self, *args, **opts):
from catalogue.models import Book
import search
if args:
books = []
for a in args:
- books += Book.objects.filter(slug=a).all()
+ if opts['book_id']:
+ books += Book.objects.filter(id=int(a)).all()
+ else:
+ books += Book.objects.filter(slug=a).all()
books = Book.objects.all()
for b in books:
print b.title
- idx.index_book(b, None)
+ idx.index_book(b)
print 'Reindexing tags.'
--- /dev/null
+from django.core.management.base import BaseCommand
+from glob import glob
+from optparse import make_option
+from os import path
+from sys import stdout
+from django.conf import settings
+class Command(BaseCommand):
+ help = 'Reindex everything.'
+ args = ''
+ option_list = BaseCommand.option_list + (
+ make_option('-C', '--check-just-read', action='store_true', dest='check', default=False,
+ help='Check snippets utf-8'),
+ make_option('-c', '--check', action='store_true', dest='check2', default=False,
+ help='Check snippets utf-8 by walking through index'),
+ )
+ def handle(self, *args, **opts):
+ from catalogue.models import Book
+ import search
+ if opts['check']:
+ sfn = glob(settings.SEARCH_INDEX+'snippets/*')
+ print sfn
+ for fn in sfn:
+ print fn
+ bkid = int(path.basename(fn))
+ with open(fn) as f:
+ cont = f.read()
+ try:
+ uc = cont.decode('utf-8')
+ except UnicodeDecodeError, ude:
+ print "error in snippets %d" % bkid
+ if opts['check2']:
+ s = search.Search()
+ reader = s.searcher.getIndexReader()
+ numdocs = reader.numDocs()
+ for did in range(numdocs):
+ doc = reader.document(did)
+ if doc and doc.get('book_id'):
+ bkid = int(doc.get('book_id'))
+ #import pdb; pdb.set_trace()
+ stdout.write("\r%d / %d" % (did, numdocs))
+ stdout.flush()
+ ss = doc.get('snippet_position')
+ sl = doc.get('snippet_length')
+ if ss and sl:
+ snips = Snippets(bkid)
+ try:
+ txt = snips.get((ss,sl))
+ assert len(txt) == sl
+ except UnicodeDecodeError, ude:
+ stdout.write("\nerror in snippets %d\n" % bkid)
+ raise ude
+ stdout.write("\ndone.\n")
if not dictionary.check(t):
- change[t] = dictionary.suggest(t)[0]
+ change_to = dictionary.suggest(t)[0].lower()
+ if change_to != t.lower():
+ change[t] = change_to
except IndexError:
results = None
query = None
- fuzzy = False
+ fuzzy = False #0.8
if 'q' in request.GET:
# tags = request.GET.get('tags', '')
# hint.tags(tag_list)
# if book:
# hint.books(book)
- tags = srch.hint_tags(query, pdcounter=True, prefix=False)
+ tags = srch.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
tags = split_tags(tags)
toks = StringReader(query)
tokens_cache = {}
- fuzzy = 'fuzzy' in request.GET
- if fuzzy:
- fuzzy = 0.7
author_results = srch.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
title_results = srch.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
if len(results) == 1:
fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
if len(fragment_hits) == 1:
- anchor = fragment_hits[0]['fragment']
- frag = Fragment.objects.get(anchor=anchor)
- return HttpResponseRedirect(frag.get_absolute_url())
+ #anchor = fragment_hits[0]['fragment']
+ #frag = Fragment.objects.get(anchor=anchor)
+ return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
return HttpResponseRedirect(results[0].book.get_absolute_url())
elif len(results) == 0:
form = PublishingSuggestForm(initial={"books": query + ", "})
* ingenous float containment hack
* http://www.mikepadgett.com/technology/technical/alternative-to-the-pie-clearfix-hack/
.search-result .book-box-inner {
height: 1%;
overflow: hidden;
.book-mini-box img, .book-box img, .book-wide-box img, .search-result img {
width: 13.9em;