- def close(self):
- try:
- self.index.optimize()
- except JavaError, je:
- print "Error during optimize phase, check index: %s" % je
-
- self.index.close()
- self.index = None
-
- def index_tags(self):
- q = NumericRangeQuery.newIntRange("tag_id", 0, Integer.MAX_VALUE, True, True)
- self.index.deleteDocuments(q)
-
- for tag in catalogue.models.Tag.objects.all():
- doc = Document()
- doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(tag.id))
- doc.add(Field("tag_name", tag.name, Field.Store.NO, Field.Index.ANALYZED))
- doc.add(Field("tag_name_pl", tag.name, Field.Store.NO, Field.Index.ANALYZED))
- doc.add(Field("tag_category", tag.category, Field.Store.NO, Field.Index.NOT_ANALYZED))
- self.index.addDocument(doc)
-
- def remove_book(self, book):
- q = NumericRangeQuery.newIntRange("book_id", book.id, book.id, True, True)
- self.index.deleteDocuments(q)
-
- def index_book(self, book, overwrite=True):
+ def index_tags(self, *tags, **kw):
+ """
+ Re-index global tag list.
+ Removes all tags from index, then index them again.
+ Indexed fields include: id, name (with and without polish stems), category
+ """
+ log.debug("Indexing tags")
+ remove_only = kw.get('remove_only', False)
+ # first, remove tags from index.
+ if tags:
+ tag_qs = []
+ for tag in tags:
+ q_id = self.index.Q(tag_id=tag.id)
+
+ if isinstance(tag, PDCounterAuthor):
+ q_cat = self.index.Q(tag_category='pd_author')
+ elif isinstance(tag, PDCounterBook):
+ q_cat = self.index.Q(tag_category='pd_book')
+ else:
+ q_cat = self.index.Q(tag_category=tag.category)
+
+ q_id_cat = self.index.Q(q_id & q_cat)
+ tag_qs.append(q_id_cat)
+ self.delete_query(tag_qs)
+ else: # all
+ q = self.index.Q(tag_id__any=True)
+ self.delete_query(q)
+
+ if not remove_only:
+ # then add them [all or just one passed]
+ if not tags:
+ tags = chain(catalogue.models.Tag.objects.exclude(category='set'), \
+ PDCounterAuthor.objects.all(), \
+ PDCounterBook.objects.all())
+
+ for tag in tags:
+ if isinstance(tag, PDCounterAuthor):
+ doc = {
+ "tag_id": int(tag.id),
+ "tag_name": tag.name,
+ "tag_name_pl": tag.name,
+ "tag_category": 'pd_author',
+ "is_pdcounter": True,
+ "uid": "tag%d_pd_a" % tag.id
+ }
+ elif isinstance(tag, PDCounterBook):
+ doc = {
+ "tag_id": int(tag.id),
+ "tag_name": tag.title,
+ "tag_name_pl": tag.title,
+ "tag_category": 'pd_book',
+ "is_pdcounter": True,
+ "uid": "tag%d_pd_b" % tag.id
+ }
+ else:
+ doc = {
+ "tag_id": int(tag.id),
+ "tag_name": tag.name,
+ "tag_name_pl": tag.name,
+ "tag_category": tag.category,
+ "is_pdcounter": False,
+ "uid": "tag%d" % tag.id
+ }
+ self.index.add(doc)
+
+ def create_book_doc(self, book):
+ """
+ Create a lucene document referring book id.
+ """
+ doc = {
+ 'book_id': int(book.id),
+ }
+ if book.parent is not None:
+ doc["parent_id"] = int(book.parent.id)
+ return doc
+
+ def remove_book(self, book_or_id, remove_snippets=True):
+ """Removes a book from search index.
+ book - Book instance."""
+ if isinstance(book_or_id, catalogue.models.Book):
+ book_id = book_or_id.id
+ else:
+ book_id = book_or_id
+
+ self.delete_query(self.index.Q(book_id=book_id))
+
+ if remove_snippets:
+ snippets = Snippets(book_id)
+ snippets.remove()
+
+ def index_book(self, book, book_info=None, overwrite=True):
+ """
+ Indexes the book.
+ Creates a lucene document for extracted metadata
+ and calls self.index_content() to index the contents of the book.
+ """