From dbb6eb3883a5f5e371f4bf7c89e74326feca0fd1 Mon Sep 17 00:00:00 2001 From: Marcin Koziej Date: Wed, 21 Mar 2012 17:16:54 +0100 Subject: [PATCH] Search engine now should be db change change-proof --- apps/catalogue/models.py | 2 + apps/pdcounter/models.py | 19 ++- apps/search/index.py | 231 ++++++++++++++++++++++++-------- apps/search/views.py | 215 +++++++++++++++-------------- wolnelektury/settings/cache.py | 1 - wolnelektury/settings/static.py | 1 + 6 files changed, 307 insertions(+), 162 deletions(-) diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index 556a8a1b8..ea09c048f 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -1087,9 +1087,11 @@ post_save.connect(_post_save_handler) @django.dispatch.receiver(post_delete, sender=Book) def _remove_book_from_index_handler(sender, instance, **kwargs): """ remove the book from search index, when it is deleted.""" + search.JVM.attachCurrentThread() idx = search.Index() idx.open(timeout=10000) # 10 seconds timeout. try: idx.remove_book(instance) + idx.index_tags() finally: idx.close() diff --git a/apps/pdcounter/models.py b/apps/pdcounter/models.py index f4832931f..af88bdb09 100644 --- a/apps/pdcounter/models.py +++ b/apps/pdcounter/models.py @@ -6,7 +6,8 @@ from django.db import models from django.db.models import permalink from django.utils.translation import ugettext as _ from datetime import datetime - +from django.db.models.signals import post_save, post_delete +import search class Author(models.Model): name = models.CharField(_('name'), max_length=50, db_index=True) @@ -85,3 +86,19 @@ class BookStub(models.Model): def pretty_title(self, html_links=False): return ', '.join((self.author, self.title)) + + +def update_index(sender, instance, **kwargs): + print "update pd index %s [update %s]" % (instance, 'created' in kwargs) + search.JVM.attachCurrentThread() + idx = search.Index() + idx.open() + try: + idx.index_tags(instance, remove_only=not 'created' in kwargs) + finally: + idx.close() + +post_delete.connect(update_index, Author) +post_delete.connect(update_index, BookStub) +post_save.connect(update_index, Author) +post_save.connect(update_index, BookStub) diff --git a/apps/search/index.py b/apps/search/index.py index b3e932285..9d6d59861 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- from django.conf import settings -from lucene import SimpleFSDirectory, NIOFSDirectory, IndexWriter, IndexWriterConfig, CheckIndex, \ +from django.dispatch import Signal +from lucene import SimpleFSDirectory, NIOFSDirectory, IndexWriter, IndexReader, IndexWriterConfig, CheckIndex, \ File, Field, Integer, \ NumericField, Version, Document, JavaError, IndexSearcher, \ QueryParser, PerFieldAnalyzerWrapper, \ @@ -92,6 +93,9 @@ class IndexStore(object): pass else: raise + def close(self): + self.store.close() + class IndexChecker(IndexStore): def __init__(self): @@ -111,7 +115,7 @@ class Snippets(object): """ SNIPPET_DIR = "snippets" - def __init__(self, book_id): + def __init__(self, book_id, revision=None): try: os.makedirs(os.path.join(settings.SEARCH_INDEX, self.SNIPPET_DIR)) except OSError as exc: @@ -119,15 +123,33 @@ class Snippets(object): pass else: raise self.book_id = book_id + self.revision = revision self.file = None + @property + def path(self): + if self.revision: fn = "%d.%d" % (self.book_id, self.revision) + else: fn = "%d" % self.book_id + + return os.path.join(settings.SEARCH_INDEX, self.SNIPPET_DIR, fn) + def open(self, mode='r'): """ Open the snippet file. Call .close() afterwards. """ if not 'b' in mode: mode += 'b' - self.file = open(os.path.join(settings.SEARCH_INDEX, self.SNIPPET_DIR, str(self.book_id)), mode) + + if 'w' in mode: + if os.path.exists(self.path): + self.revision = 1 + while True: + if not os.path.exists(self.path): + break + self.revision += 1 + print "using %s" % self.path + + self.file = open(self.path, mode) self.position = 0 return self @@ -156,6 +178,17 @@ class Snippets(object): """Close snippet file""" self.file.close() + def remove(self): + self.revision = None + try: + os.unlink(self.path) + self.revision = 0 + while True: + self.revision += 1 + os.unlink(self.path) + except OSError: + pass + class BaseIndex(IndexStore): """ @@ -190,6 +223,10 @@ class BaseIndex(IndexStore): self.index.close() self.index = None + index_changed.send_robust(self) + + super(BaseIndex, self).close() + def __enter__(self): self.open() return self @@ -198,6 +235,9 @@ class BaseIndex(IndexStore): self.close() +index_changed = Signal() + + class Index(BaseIndex): """ Class indexing books. @@ -205,40 +245,66 @@ class Index(BaseIndex): def __init__(self, analyzer=None): super(Index, self).__init__(analyzer) - def index_tags(self): + def index_tags(self, *tags, **kw): """ Re-index global tag list. Removes all tags from index, then index them again. Indexed fields include: id, name (with and without polish stems), category """ - q = NumericRangeQuery.newIntRange("tag_id", 0, Integer.MAX_VALUE, True, True) - self.index.deleteDocuments(q) + remove_only = kw.get('remove_only', False) + # first, remove tags from index. + if tags: + q = BooleanQuery() + for tag in tags: + b_id_cat = BooleanQuery() + + q_id = NumericRangeQuery.newIntRange("tag_id", tag.id, tag.id, True, True) + b_id_cat.add(q_id, BooleanClause.Occur.MUST) - for tag in catalogue.models.Tag.objects.exclude(category='set'): - doc = Document() - doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id))) - doc.add(Field("tag_name", tag.name, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_name_pl", tag.name, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_category", tag.category, Field.Store.NO, Field.Index.NOT_ANALYZED)) - self.index.addDocument(doc) - - for pdtag in PDCounterAuthor.objects.all(): - doc = Document() - doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(pdtag.id))) - doc.add(Field("tag_name", pdtag.name, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_name_pl", pdtag.name, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_category", 'pd_author', Field.Store.YES, Field.Index.NOT_ANALYZED)) - doc.add(Field("is_pdcounter", 'true', Field.Store.YES, Field.Index.NOT_ANALYZED)) - self.index.addDocument(doc) - - for pdtag in PDCounterBook.objects.all(): - doc = Document() - doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(pdtag.id))) - doc.add(Field("tag_name", pdtag.title, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_name_pl", pdtag.title, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_category", 'pd_book', Field.Store.YES, Field.Index.NOT_ANALYZED)) - doc.add(Field("is_pdcounter", 'true', Field.Store.YES, Field.Index.NOT_ANALYZED)) - self.index.addDocument(doc) + if isinstance(tag, PDCounterAuthor): + q_cat = TermQuery(Term('tag_category', 'pd_author')) + elif isinstance(tag, PDCounterBook): + q_cat = TermQuery(Term('tag_category', 'pd_book')) + else: + q_cat = TermQuery(Term('tag_category', tag.category)) + b_id_cat.add(q_cat, BooleanClause.Occur.MUST) + + q.add(b_id_cat, BooleanClause.Occur.SHOULD) + else: # all + q = NumericRangeQuery.newIntRange("tag_id", 0, Integer.MAX_VALUE, True, True) + self.index.deleteDocuments(q) + + if not remove_only: + # then add them [all or just one passed] + if not tags: + tags = catalogue.models.Tag.objects.exclude(category='set') + \ + PDCounterAuthor.objects.all() + \ + PDCounterBook.objects.all() + + for tag in tags: + if isinstance(tag, PDCounterAuthor): + doc = Document() + doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id))) + doc.add(Field("tag_name", tag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_name_pl", tag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_category", 'pd_author', Field.Store.YES, Field.Index.NOT_ANALYZED)) + doc.add(Field("is_pdcounter", 'true', Field.Store.YES, Field.Index.NOT_ANALYZED)) + self.index.addDocument(doc) + elif isinstance(tag, PDCounterBook): + doc = Document() + doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id))) + doc.add(Field("tag_name", tag.title, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_name_pl", tag.title, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_category", 'pd_book', Field.Store.YES, Field.Index.NOT_ANALYZED)) + doc.add(Field("is_pdcounter", 'true', Field.Store.YES, Field.Index.NOT_ANALYZED)) + self.index.addDocument(doc) + else: + doc = Document() + doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id))) + doc.add(Field("tag_name", tag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_name_pl", tag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_category", tag.category, Field.Store.NO, Field.Index.NOT_ANALYZED)) + self.index.addDocument(doc) def create_book_doc(self, book): """ @@ -250,12 +316,16 @@ class Index(BaseIndex): doc.add(NumericField("parent_id", Field.Store.YES, True).setIntValue(int(book.parent.id))) return doc - def remove_book(self, book): + def remove_book(self, book, remove_snippets=True): """Removes a book from search index. book - Book instance.""" q = NumericRangeQuery.newIntRange("book_id", book.id, book.id, True, True) self.index.deleteDocuments(q) + if remove_snippets: + snippets = Snippets(book.id) + snippets.remove() + def index_book(self, book, book_info=None, overwrite=True): """ Indexes the book. @@ -263,7 +333,9 @@ class Index(BaseIndex): and calls self.index_content() to index the contents of the book. """ if overwrite: - self.remove_book(book) + # we don't remove snippets, since they might be still needed by + # threads using not reopened index + self.remove_book(book, remove_snippets=False) book_doc = self.create_book_doc(book) meta_fields = self.extract_metadata(book, book_info) @@ -420,12 +492,16 @@ class Index(BaseIndex): .setIntValue('header_span' in fields and fields['header_span'] or 1)) doc.add(Field('header_type', fields["header_type"], Field.Store.YES, Field.Index.NOT_ANALYZED)) + print ">>[%s]>%s<<<" % (fields.get('fragment_anchor', ''), fields['content']) + doc.add(Field('content', fields["content"], Field.Store.NO, Field.Index.ANALYZED, \ Field.TermVector.WITH_POSITIONS_OFFSETS)) snip_pos = snippets.add(fields["content"]) doc.add(NumericField("snippets_position", Field.Store.YES, True).setIntValue(snip_pos[0])) doc.add(NumericField("snippets_length", Field.Store.YES, True).setIntValue(snip_pos[1])) + if snippets.revision: + doc.add(NumericField("snippets_revision", Field.Store.YES, True).setIntValue(snippets.revision)) if 'fragment_anchor' in fields: doc.add(Field("fragment_anchor", fields['fragment_anchor'], @@ -584,6 +660,8 @@ class ReusableIndex(Index): ReusableIndex.index.close() ReusableIndex.index = None + index_changed.send_robust(None) + def close(self): if ReusableIndex.index: ReusableIndex.index.commit() @@ -687,6 +765,8 @@ class SearchResult(object): return self def get_book(self): + if hasattr(self, '_book'): + return self._book return catalogue.models.Book.objects.get(id=self.book_id) book = property(get_book) @@ -705,7 +785,10 @@ class SearchResult(object): # to sections and fragments frags = filter(lambda r: r[FRAGMENT] is not None, self._hits) + sect = filter(lambda r: r[FRAGMENT] is None, self._hits) + + # sections not covered by fragments sect = filter(lambda s: 0 == len(filter( lambda f: s[POSITION][POSITION_INDEX] >= f[POSITION][POSITION_INDEX] and s[POSITION][POSITION_INDEX] < f[POSITION][POSITION_INDEX] + f[POSITION][POSITION_SPAN], @@ -713,15 +796,20 @@ class SearchResult(object): hits = [] - # remove duplicate fragments - fragments = {} - for f in frags: - fid = f[FRAGMENT] - if fid in fragments: - if fragments[fid][SCORE] >= f[SCORE]: - continue - fragments[fid] = f - frags = fragments.values() + def remove_duplicates(lst, keyfn, compare): + els = {} + for e in lst: + eif = keyfn(e) + if eif in els: + if compare(els[eif], e) >= 1: + continue + els[eif] = e + return els.values() + + # remove fragments with duplicated fid's and duplicated snippets + frags = remove_duplicates(frags, lambda f: f[FRAGMENT], lambda a, b: cmp(a[SCORE], b[SCORE])) + frags = remove_duplicates(frags, lambda f: f[OTHER]['snippets'] and f[OTHER]['snippets'][0] or hash(f), + lambda a, b: cmp(a[SCORE], b[SCORE])) # remove duplicate sections sections = {} @@ -903,12 +991,32 @@ class Search(IndexStore): IndexStore.__init__(self) self.analyzer = WLAnalyzer() # PolishAnalyzer(Version.LUCENE_34) # self.analyzer = WLAnalyzer() - self.searcher = IndexSearcher(self.store, True) + reader = IndexReader.open(self.store, True) + self.searcher = IndexSearcher(reader) self.parser = QueryParser(Version.LUCENE_34, default_field, self.analyzer) self.parent_filter = TermsFilter() self.parent_filter.addTerm(Term("is_book", "true")) + index_changed.connect(self.reopen) + + def close(self): + reader = self.searcher.getIndexReader() + self.searcher.close() + reader.close() + super(Search, self).close() + index_changed.disconnect(self.reopen) + + def reopen(self, **unused): + reader = self.searcher.getIndexReader() + rdr = reader.reopen() + print "got signal to reopen index" + if not rdr.equals(reader): + print "will reopen index" + oldsearch = self.searcher + self.searcher = IndexSearcher(rdr) + oldsearch.close() + reader.close() def query(self, query): """Parse query in default Lucene Syntax. (for humans) @@ -1222,9 +1330,11 @@ class Search(IndexStore): length = stored.get('snippets_length') if position is None or length is None: return None + revision = stored.get('snippets_revision') + if revision: revision = int(revision) # locate content. book_id = int(stored.get('book_id')) - snippets = Snippets(book_id).open() + snippets = Snippets(book_id, revision=revision).open() try: try: text = snippets.get((int(position), @@ -1274,19 +1384,24 @@ class Search(IndexStore): doc = self.searcher.doc(found.doc) is_pdcounter = doc.get('is_pdcounter') category = doc.get('tag_category') - if is_pdcounter == 'true': - if category == 'pd_author': - tag = PDCounterAuthor.objects.get(id=doc.get('tag_id')) - elif category == 'pd_book': - tag = PDCounterBook.objects.get(id=doc.get('tag_id')) - tag.category = 'pd_book' # make it look more lik a tag. + try: + if is_pdcounter == 'true': + if category == 'pd_author': + tag = PDCounterAuthor.objects.get(id=doc.get('tag_id')) + elif category == 'pd_book': + tag = PDCounterBook.objects.get(id=doc.get('tag_id')) + tag.category = 'pd_book' # make it look more lik a tag. + else: + print "Warning. cannot get pdcounter tag_id=%d from db; cat=%s" % (int(doc.get('tag_id')), category) else: - print "Warning. cannot get pdcounter tag_id=%d from db; cat=%s" % (int(doc.get('tag_id')), category) - else: - tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id")) - # don't add the pdcounter tag if same tag already exists - if not (is_pdcounter and filter(lambda t: tag.slug == t.slug, tags)): - tags.append(tag) + tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id")) + # don't add the pdcounter tag if same tag already exists + if not (is_pdcounter and filter(lambda t: tag.slug == t.slug, tags)): + tags.append(tag) + except catalogue.models.Tag.DoesNotExist: pass + except PDCounterAuthor.DoesNotExist: pass + except PDCounterBook.DoesNotExist: pass + # print "%s (%d) -> %f" % (tag, tag.id, found.score) print 'returning %s' % tags return tags @@ -1299,7 +1414,9 @@ class Search(IndexStore): tops = self.searcher.search(query, filter, max_results) for found in tops.scoreDocs: doc = self.searcher.doc(found.doc) - bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id"))) + try: + bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id"))) + except catalogue.models.Book.DoesNotExist: pass return bks def make_prefix_phrase(self, toks, field): diff --git a/apps/search/views.py b/apps/search/views.py index 2945c42ad..dcd80b29f 100644 --- a/apps/search/views.py +++ b/apps/search/views.py @@ -54,6 +54,7 @@ def did_you_mean(query, tokens): JVM.attachCurrentThread() search = Search() + def hint(request): prefix = request.GET.get('term', '') if len(prefix) < 2: @@ -77,7 +78,7 @@ def hint(request): def category_name(c): if c.startswith('pd_'): - c=c[len('pd_'):] + c = c[len('pd_'):] return _(c) return JSONResponse( @@ -101,109 +102,117 @@ def main(request): query = None fuzzy = False #0.8 - if 'q' in request.GET: - # tags = request.GET.get('tags', '') - query = request.GET['q'] - # book_id = request.GET.get('book', None) - # book = None - # if book_id is not None: - # book = get_object_or_404(Book, id=book_id) - - # hint = search.hint() - # try: - # tag_list = Tag.get_tag_list(tags) - # except: - # tag_list = [] - - if len(query) < 2: - return render_to_response('catalogue/search_too_short.html', {'prefix': query}, - context_instance=RequestContext(request)) - - # hint.tags(tag_list) - # if book: - # hint.books(book) - tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy) - tags = split_tags(tags) - - toks = StringReader(query) - tokens_cache = {} - - author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache) - title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache) - - # Boost main author/title results with mixed search, and save some of its results for end of list. - # boost author, title results - author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache) - author_title_rest = [] - for b in author_title_mixed: - bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results) - for b2 in bks: - b2.boost *= 1.1 - if bks is []: - author_title_rest.append(b) - - # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere, - # Because the query is using only one field. - text_phrase = SearchResult.aggregate( - search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4), - search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False)) - - everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache) - - def already_found(results): - def f(e): - for r in results: - if e.book_id == r.book_id: - e.boost = 0.9 - results.append(e) - return True - return False - return f - f = already_found(author_results + title_results + text_phrase) - everywhere = filter(lambda x: not f(x), everywhere) - - author_results = SearchResult.aggregate(author_results) - title_results = SearchResult.aggregate(title_results) - - everywhere = SearchResult.aggregate(everywhere, author_title_rest) - - for res in [author_results, title_results, text_phrase, everywhere]: - res.sort(reverse=True) - for r in res: - for h in r.hits: - h['snippets'] = map(lambda s: - re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", - re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets']) - - suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE")) - print "dym? %s" % repr(suggestion).encode('utf-8') - - results = author_results + title_results + text_phrase + everywhere - results.sort(reverse=True) - - if len(results) == 1: - fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits) - if len(fragment_hits) == 1: - #anchor = fragment_hits[0]['fragment'] - #frag = Fragment.objects.get(anchor=anchor) - return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url()) - return HttpResponseRedirect(results[0].book.get_absolute_url()) - elif len(results) == 0: - form = PublishingSuggestForm(initial={"books": query + ", "}) - return render_to_response('catalogue/search_no_hits.html', - {'tags': tags, - 'prefix': query, - "form": form, - 'did_you_mean': suggestion}, - context_instance=RequestContext(request)) - - print "TAGS: %s" % tags - return render_to_response('catalogue/search_multiple_hits.html', + query = request.GET.get('q','') + # book_id = request.GET.get('book', None) + # book = None + # if book_id is not None: + # book = get_object_or_404(Book, id=book_id) + + # hint = search.hint() + # try: + # tag_list = Tag.get_tag_list(tags) + # except: + # tag_list = [] + + if len(query) < 2: + return render_to_response('catalogue/search_too_short.html', {'prefix': query}, + context_instance=RequestContext(request)) + + # hint.tags(tag_list) + # if book: + # hint.books(book) + tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy) + tags = split_tags(tags) + + toks = StringReader(query) + tokens_cache = {} + + author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache) + title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache) + + # Boost main author/title results with mixed search, and save some of its results for end of list. + # boost author, title results + author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache) + author_title_rest = [] + for b in author_title_mixed: + bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results) + for b2 in bks: + b2.boost *= 1.1 + if bks is []: + author_title_rest.append(b) + + # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere, + # Because the query is using only one field. + text_phrase = SearchResult.aggregate( + search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4), + search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False)) + + everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache) + + def already_found(results): + def f(e): + for r in results: + if e.book_id == r.book_id: + e.boost = 0.9 + results.append(e) + return True + return False + return f + f = already_found(author_results + title_results + text_phrase) + everywhere = filter(lambda x: not f(x), everywhere) + + author_results = SearchResult.aggregate(author_results) + title_results = SearchResult.aggregate(title_results) + + everywhere = SearchResult.aggregate(everywhere, author_title_rest) + + for res in [author_results, title_results, text_phrase, everywhere]: + res.sort(reverse=True) + for r in res: + for h in r.hits: + h['snippets'] = map(lambda s: + re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", + re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets']) + + suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE")) + + def ensure_exists(r): + try: + return r.book + except Book.DoesNotExist: + return False + + author_results = filter(ensure_exists, author_results) + title_results = filter(ensure_exists, title_results) + text_phrase = filter(ensure_exists, text_phrase) + everywhere = filter(ensure_exists, everywhere) + + results = author_results + title_results + text_phrase + everywhere + # ensure books do exists & sort them + results.sort(reverse=True) + + if len(results) == 1: + fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits) + if len(fragment_hits) == 1: + #anchor = fragment_hits[0]['fragment'] + #frag = Fragment.objects.get(anchor=anchor) + return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url()) + return HttpResponseRedirect(results[0].book.get_absolute_url()) + elif len(results) == 0: + form = PublishingSuggestForm(initial={"books": query + ", "}) + return render_to_response('catalogue/search_no_hits.html', {'tags': tags, 'prefix': query, - 'results': { 'author': author_results, - 'title': title_results, - 'content': text_phrase, - 'other': everywhere}, + "form": form, 'did_you_mean': suggestion}, context_instance=RequestContext(request)) + + return render_to_response('catalogue/search_multiple_hits.html', + {'tags': tags, + 'prefix': query, + 'results': { 'author': author_results, + 'title': title_results, + 'content': text_phrase, + 'other': everywhere}, + 'did_you_mean': suggestion}, + context_instance=RequestContext(request)) diff --git a/wolnelektury/settings/cache.py b/wolnelektury/settings/cache.py index cc9024063..61d3bb5c0 100644 --- a/wolnelektury/settings/cache.py +++ b/wolnelektury/settings/cache.py @@ -23,4 +23,3 @@ CACHES = { }, } CACHE_MIDDLEWARE_ANONYMOUS_ONLY=True -SEARCH_INDEX = path.join(PROJECT_DIR, '../search_index/') diff --git a/wolnelektury/settings/static.py b/wolnelektury/settings/static.py index 797433dcd..188400dc9 100644 --- a/wolnelektury/settings/static.py +++ b/wolnelektury/settings/static.py @@ -5,6 +5,7 @@ from settings.paths import PROJECT_DIR # Example: "/home/media/media.lawrence.com/" MEDIA_ROOT = path.join(PROJECT_DIR, '../media/') STATIC_ROOT = path.join(PROJECT_DIR, 'static/') +SEARCH_INDEX = path.join(PROJECT_DIR, '../search_index/') # URL that handles the media served from MEDIA_ROOT. Make sure to use a # trailing slash if there is a path component (optional in other cases). -- 2.20.1