import re
from django.conf import settings
from librarian import dcparser
-import librarian.meta.types.date
import librarian.meta.types.person
import librarian.meta.types.text
from librarian.parser import WLDocument
doc['parent_id'] = int(book.parent.id)
return doc
- def remove_book(self, book, remove_snippets=True):
+ def remove_book(self, book, remove_snippets=True, legacy=True):
"""Removes a book from search index.
book - Book instance."""
- self.delete_query(self.index.Q(book_id=book.id))
+ if legacy:
+ self.delete_query(self.index.Q(book_id=book.id))
- if remove_snippets:
+ if remove_snippets:
snippets = Snippets(book.id)
snippets.remove()
self.remove_snippets(book)
- def index_book(self, book, book_info=None, overwrite=True):
+ def index_book(self, book, book_info=None, overwrite=True, legacy=True):
"""
Indexes the book.
Creates a lucene document for extracted metadata
if overwrite:
# we don't remove snippets, since they might be still needed by
# threads using not reopened index
- self.remove_book(book, remove_snippets=False)
+ self.remove_book(book, remove_snippets=False, legacy=legacy)
book_doc = self.create_book_doc(book)
meta_fields = self.extract_metadata(book, book_info, dc_only=[
book_doc[n] = f
book_doc['uid'] = "book%s" % book_doc['book_id']
- self.index.add(book_doc)
+ if legacy:
+ self.index.add(book_doc)
del book_doc
book_fields = {
'title': meta_fields['title'],
if tag_name in meta_fields:
book_fields[tag_name] = meta_fields[tag_name]
- self.index_content(book, book_fields=book_fields)
+ self.index_content(book, book_fields=book_fields, legacy=legacy)
master_tags = [
'opowiadanie',
else:
persons = ', '.join(map(str, p))
fields[field.name] = persons
- elif issubclass(type_indicator, librarian.meta.types.date.DateValue):
- dt = getattr(book_info, field.name)
- fields[field.name] = dt
# get published date
pd = None
if master.tag in self.master_tags:
return master
- def index_content(self, book, book_fields):
+ def index_content(self, book, book_fields, legacy=True):
"""
Walks the book XML and extract content from it.
Adds parts for each header tag and for each fragment.
doc = add_part(snippets, header_index=position, header_type=header.tag,
text=''.join(footnote))
self.add_snippet(book, doc)
- self.index.add(doc)
+ if legacy:
+ self.index.add(doc)
footnote = []
# handle fragments and themes.
themes=frag['themes'])
# Add searchable fragment
self.add_snippet(book, doc)
- self.index.add(doc)
+ if legacy:
+ self.index.add(doc)
# Collect content.
header_type=header.tag, text=fix_format(content))
self.add_snippet(book, doc)
- self.index.add(doc)
+ if legacy:
+ self.index.add(doc)
finally:
snippets.close()