X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/2d850297174fc2800a93ca9d2e0a2bc855907ea6..f2e5eb8d76c57ee0198207e68a198d2b9cfd6fec:/apps/search/management/commands/optimizeindex.py diff --git a/apps/search/management/commands/optimizeindex.py b/apps/search/management/commands/optimizeindex.py index a8a4cf9dd..51bf95b4e 100644 --- a/apps/search/management/commands/optimizeindex.py +++ b/apps/search/management/commands/optimizeindex.py @@ -1,14 +1,38 @@ from django.core.management.base import BaseCommand -from search import Index +from search import Index, Search +from lucene import IndexReader, IndexSearcher, Term +from catalogue.models import Book + class Command(BaseCommand): help = 'Optimize Lucene search index' args = '' + def delete_old(self, index): + existing_ids = set([book.id for book in Book.objects.all()]) + + reader = IndexReader.open(index.index, False) + searcher = IndexSearcher(reader) + try: + num = searcher.docFreq(Term('is_book', 'true')) + docs = searcher.search(Search.make_term_query(['true'], 'is_book'), num) + for result in docs.scoreDocs: + stored = searcher.doc(result.doc) + book_id = int(stored.get('book_id')) + if not book_id in existing_ids: + print "book id %d doesn't exist." % book_id + index.remove_book(book_id) + finally: + searcher.close() + reader.close() + def handle(self, *args, **opts): index = Index() index.open() + + self.delete_old(index) + try: index.optimize() finally: