From: Marcin Koziej Date: Wed, 29 Aug 2012 13:02:13 +0000 (+0200) Subject: importbooks / tasks for indexing X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/281a6f613c78ffd1fa81951db829a5551fd2e42d?hp=-c importbooks / tasks for indexing --- 281a6f613c78ffd1fa81951db829a5551fd2e42d diff --git a/apps/catalogue/management/commands/importbooks.py b/apps/catalogue/management/commands/importbooks.py index b323edc49..93c68a2f7 100644 --- a/apps/catalogue/management/commands/importbooks.py +++ b/apps/catalogue/management/commands/importbooks.py @@ -49,11 +49,11 @@ class Command(BaseCommand): build_pdf=options.get('build_pdf'), build_mobi=options.get('build_mobi'), search_index=options.get('search_index'), - search_index_reuse=True, search_index_tags=False) + search_index_tags=False) for ebook_format in Book.ebook_formats: if os.path.isfile(file_base + '.' + ebook_format): getattr(book, '%s_file' % ebook_format).save( - '%s.%s' % (book.slug, ebook_format), + '%s.%s' % (book.slug, ebook_format), File(file(file_base + '.' + ebook_format))) if verbose: print "Importing %s.%s" % (file_base, ebook_format) @@ -82,13 +82,15 @@ class Command(BaseCommand): time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(wait_until)), wait_until - time.time()) + index = None if options.get('search_index') and not settings.NO_SEARCH_INDEX: index = Index() - index.open() try: index.index_tags() - finally: - index.close() + index.index.commit() + except Exception, e: + index.index.rollback() + raise e # Start transaction management. transaction.commit_unless_managed() @@ -97,7 +99,7 @@ class Command(BaseCommand): files_imported = 0 files_skipped = 0 - + for dir_name in directories: if not os.path.isdir(dir_name): print self.style.ERROR("%s: Not a directory. Skipping." % dir_name) @@ -128,7 +130,7 @@ class Command(BaseCommand): self.import_book(file_path, options) files_imported += 1 transaction.commit() - + except (Book.AlreadyExists, Picture.AlreadyExists): print self.style.ERROR('%s: Book or Picture already imported. Skipping. To overwrite use --force.' % file_path) @@ -160,4 +162,3 @@ class Command(BaseCommand): transaction.commit() transaction.leave_transaction_management() - diff --git a/apps/catalogue/models/book.py b/apps/catalogue/models/book.py index a8b342d74..d3df67e6f 100644 --- a/apps/catalogue/models/book.py +++ b/apps/catalogue/models/book.py @@ -269,20 +269,20 @@ class Book(models.Model): paths = map(lambda bm: (None, bm.file.path), bm) return create_zip(paths, "%s_%s" % (self.slug, format_)) - def search_index(self, book_info=None, reuse_index=False, index_tags=True): + def search_index(self, book_info=None, index=None, index_tags=True, commit=True): import search - if reuse_index: - idx = search.ReusableIndex() - else: - idx = search.Index() - - idx.open() + if index is None: + index = search.Index() try: - idx.index_book(self, book_info) + index.index_book(self, book_info) if index_tags: idx.index_tags() - finally: - idx.close() + if commit: + index.index.commit() + except Exception, e: + index.index.rollback() + raise e + @classmethod def from_xml_file(cls, xml_file, **kwargs): @@ -303,7 +303,7 @@ class Book(models.Model): @classmethod def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, build_fb2=True, - search_index=True, search_index_tags=True, search_index_reuse=False): + search_index=True, search_index_tags=True): # check for parts before we do anything children = [] @@ -381,8 +381,7 @@ class Book(models.Model): book.build_fb2() if not settings.NO_SEARCH_INDEX and search_index: - book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse) - #index_book.delay(book.id, book_info) + tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags) tasks.fix_tree_tags.delay(book) cls.published.send(sender=book) diff --git a/apps/catalogue/tasks.py b/apps/catalogue/tasks.py index 8cba04fd7..8906a141c 100644 --- a/apps/catalogue/tasks.py +++ b/apps/catalogue/tasks.py @@ -24,10 +24,10 @@ def fix_tree_tags(book): @task -def index_book(book_id, book_info=None): +def index_book(book_id, book_info=None, **kwargs): from catalogue.models import Book try: - return Book.objects.get(id=book_id).search_index(book_info) + return Book.objects.get(id=book_id).search_index(book_info, **kwargs) except Exception, e: print "Exception during index: %s" % e print_exc() diff --git a/apps/search/custom.py b/apps/search/custom.py index 33ce47d32..6cb18fd24 100644 --- a/apps/search/custom.py +++ b/apps/search/custom.py @@ -85,9 +85,9 @@ class CustomSolrInterface(sunburnt.SolrInterface): def __init__(self, url, schemadoc=None, http_connection=None, mode='', retry_timeout=-1, max_length_get_url=sunburnt.MAX_LENGTH_GET_URL): self.conn = CustomSolrConnection(url, http_connection, retry_timeout, max_length_get_url) self.schemadoc = schemadoc - if mode == 'r': + if 'w' not in mode: self.writeable = False - elif mode == 'w': + elif 'r' not in mode: self.readable = False self.init_schema() diff --git a/apps/search/index.py b/apps/search/index.py index 4962cae99..da54fe7f0 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -111,7 +111,7 @@ class Index(SolrIndex): Class indexing books. """ def __init__(self): - super(Index, self).__init__() + super(Index, self).__init__(mode='rw') def delete_query(self, *queries): """ @@ -201,6 +201,7 @@ class Index(SolrIndex): "is_pdcounter": False, "uid": "tag%d" % tag.id } + print "ADD 1 %s" % doc self.index.add(doc) def create_book_doc(self, book): @@ -249,6 +250,7 @@ class Index(SolrIndex): book_doc[n] = f book_doc['uid'] = "book%s" % book_doc['book_id'] + print "ADD 2 %s" % book_doc self.index.add(book_doc) del book_doc book_fields = { @@ -460,7 +462,7 @@ class Index(SolrIndex): doc = add_part(snippets, header_index=position, header_type=header.tag, text=u''.join(footnote), is_footnote=True) - + print "ADD 3 %s" % doc self.index.add(doc) #print "@ footnote text: %s" % footnote footnote = [] @@ -496,6 +498,7 @@ class Index(SolrIndex): text=fix_format(frag['text']), themes=frag['themes']) #print '@ FRAG %s' % frag['content'] + print "ADD 4 %s" % doc self.index.add(doc) # Collect content. @@ -510,6 +513,7 @@ class Index(SolrIndex): header_type=header.tag, text=fix_format(content)) #print '@ CONTENT: %s' % fix_format(content) + print "ADD 5 %s" % doc self.index.add(doc) finally: @@ -722,7 +726,7 @@ class Search(SolrIndex): Search facilities. """ def __init__(self, default_field="text"): - super(Search, self).__init__() + super(Search, self).__init__(mode='r') # def get_tokens(self, searched, field='text', cached=None): # """returns tokens analyzed by a proper (for a field) analyzer