doc.add(NumericField("parent_id", Field.Store.YES, True).setIntValue(int(book.parent.id)))
return doc
- def remove_book(self, book, remove_snippets=True):
+ def remove_book(self, book_or_id, remove_snippets=True):
"""Removes a book from search index.
book - Book instance."""
- q = NumericRangeQuery.newIntRange("book_id", book.id, book.id, True, True)
+ if isinstance(book_or_id, catalogue.models.Book):
+ book_id = book_or_id.id
+ else:
+ book_id = book_or_id
+
+ q = NumericRangeQuery.newIntRange("book_id", book_id, book_id, True, True)
self.index.deleteDocuments(q)
if remove_snippets:
- snippets = Snippets(book.id)
+ snippets = Snippets(book_id)
snippets.remove()
def index_book(self, book, book_info=None, overwrite=True):
return toks
+ @staticmethod
def fuzziness(self, fuzzy):
"""Helper method to sanitize fuzziness"""
if not fuzzy:
phrase.add(term)
return phrase
+ @staticmethod
def make_term_query(self, tokens, field='content', modal=BooleanClause.Occur.SHOULD, fuzzy=False):
"""
Returns term queries joined by boolean query.
from django.core.management.base import BaseCommand
-from search import Index
+from search import Index, Search
+from lucene import IndexReader, IndexSearcher, Term
+from catalogue.models import Book
+
class Command(BaseCommand):
help = 'Optimize Lucene search index'
args = ''
+ def delete_old(self, index):
+ existing_ids = set([book.id for book in Book.objects.all()])
+
+ reader = IndexReader.open(index.index, False)
+ searcher = IndexSearcher(reader)
+ try:
+ num = searcher.docFreq(Term('is_book', 'true'))
+ docs = searcher.search(Search.make_term_query(['true'], 'is_book'), num)
+ for result in docs.scoreDocs:
+ stored = searcher.doc(result.doc)
+ book_id = int(stored.get('book_id'))
+ if not book_id in existing_ids:
+ print "book id %d doesn't exist." % book_id
+ index.remove_book(book_id)
+ finally:
+ searcher.close()
+ reader.close()
+
def handle(self, *args, **opts):
index = Index()
index.open()
+
+ self.delete_old(index)
+
try:
index.optimize()
finally: