51bf95b4eba4330dd3aa6740a8cc40e104d1938a
[wolnelektury.git] / apps / search / management / commands / optimizeindex.py
1
2 from django.core.management.base import BaseCommand
3 from search import Index, Search
4 from lucene import IndexReader, IndexSearcher, Term
5 from catalogue.models import Book
6
7
8 class Command(BaseCommand):
9     help = 'Optimize Lucene search index'
10     args = ''
11
12     def delete_old(self, index):
13         existing_ids = set([book.id for book in Book.objects.all()])
14
15         reader = IndexReader.open(index.index, False)
16         searcher = IndexSearcher(reader)
17         try:
18             num = searcher.docFreq(Term('is_book', 'true'))
19             docs = searcher.search(Search.make_term_query(['true'], 'is_book'), num)
20             for result in docs.scoreDocs:
21                 stored = searcher.doc(result.doc)
22                 book_id = int(stored.get('book_id'))
23                 if not book_id in existing_ids:
24                     print "book id %d doesn't exist." % book_id
25                     index.remove_book(book_id)
26         finally:
27             searcher.close()
28             reader.close()
29
30     def handle(self, *args, **opts):
31         index = Index()
32         index.open()
33
34         self.delete_old(index)
35
36         try:
37             index.optimize()
38         finally:
39             index.close()