X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/d52c76021eaf1621b4e55a6eba1df1c0404bd7dd..b9c23cfd467a8674f9c7efa2abe4ab8e1e8a3c63:/apps/search/management/commands/optimizeindex.py

diff --git a/apps/search/management/commands/optimizeindex.py b/apps/search/management/commands/optimizeindex.py
index a8a4cf9dd..51bf95b4e 100644
--- a/apps/search/management/commands/optimizeindex.py
+++ b/apps/search/management/commands/optimizeindex.py
@@ -1,14 +1,38 @@
 
 from django.core.management.base import BaseCommand
-from search import Index
+from search import Index, Search
+from lucene import IndexReader, IndexSearcher, Term
+from catalogue.models import Book
+
 
 class Command(BaseCommand):
     help = 'Optimize Lucene search index'
     args = ''
 
+    def delete_old(self, index):
+        existing_ids = set([book.id for book in Book.objects.all()])
+
+        reader = IndexReader.open(index.index, False)
+        searcher = IndexSearcher(reader)
+        try:
+            num = searcher.docFreq(Term('is_book', 'true'))
+            docs = searcher.search(Search.make_term_query(['true'], 'is_book'), num)
+            for result in docs.scoreDocs:
+                stored = searcher.doc(result.doc)
+                book_id = int(stored.get('book_id'))
+                if not book_id in existing_ids:
+                    print "book id %d doesn't exist." % book_id
+                    index.remove_book(book_id)
+        finally:
+            searcher.close()
+            reader.close()
+
     def handle(self, *args, **opts):
         index = Index()
         index.open()
+
+        self.delete_old(index)
+
         try:
             index.optimize()
         finally: