importbooks / tasks for indexing
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Wed, 29 Aug 2012 13:02:13 +0000 (15:02 +0200)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Wed, 29 Aug 2012 13:02:13 +0000 (15:02 +0200)
apps/catalogue/management/commands/importbooks.py
apps/catalogue/models/book.py
apps/catalogue/tasks.py
apps/search/custom.py
apps/search/index.py

index b323edc..93c68a2 100644 (file)
@@ -49,11 +49,11 @@ class Command(BaseCommand):
                                                     build_pdf=options.get('build_pdf'),
                                                     build_mobi=options.get('build_mobi'),
                                                     search_index=options.get('search_index'),
-                                                    search_index_reuse=True, search_index_tags=False)
+                                                    search_index_tags=False)
         for ebook_format in Book.ebook_formats:
             if os.path.isfile(file_base + '.' + ebook_format):
                 getattr(book, '%s_file' % ebook_format).save(
-                    '%s.%s' % (book.slug, ebook_format), 
+                    '%s.%s' % (book.slug, ebook_format),
                     File(file(file_base + '.' + ebook_format)))
                 if verbose:
                     print "Importing %s.%s" % (file_base, ebook_format)
@@ -82,13 +82,15 @@ class Command(BaseCommand):
                     time.strftime('%Y-%m-%d %H:%M:%S',
                     time.localtime(wait_until)), wait_until - time.time())
 
+        index = None
         if options.get('search_index') and not settings.NO_SEARCH_INDEX:
             index = Index()
-            index.open()
             try:
                 index.index_tags()
-            finally:
-                index.close()
+                index.index.commit()
+            except Exception, e:
+                index.index.rollback()
+                raise e
 
         # Start transaction management.
         transaction.commit_unless_managed()
@@ -97,7 +99,7 @@ class Command(BaseCommand):
 
         files_imported = 0
         files_skipped = 0
-        
+
         for dir_name in directories:
             if not os.path.isdir(dir_name):
                 print self.style.ERROR("%s: Not a directory. Skipping." % dir_name)
@@ -128,7 +130,7 @@ class Command(BaseCommand):
                             self.import_book(file_path, options)
                         files_imported += 1
                         transaction.commit()
-                        
+
                     except (Book.AlreadyExists, Picture.AlreadyExists):
                         print self.style.ERROR('%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
                             file_path)
@@ -160,4 +162,3 @@ class Command(BaseCommand):
 
         transaction.commit()
         transaction.leave_transaction_management()
-
index a8b342d..d3df67e 100644 (file)
@@ -269,20 +269,20 @@ class Book(models.Model):
         paths = map(lambda bm: (None, bm.file.path), bm)
         return create_zip(paths, "%s_%s" % (self.slug, format_))
 
-    def search_index(self, book_info=None, reuse_index=False, index_tags=True):
+    def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
         import search
-        if reuse_index:
-            idx = search.ReusableIndex()
-        else:
-            idx = search.Index()
-            
-        idx.open()
+        if index is None:
+            index = search.Index()
         try:
-            idx.index_book(self, book_info)
+            index.index_book(self, book_info)
             if index_tags:
                 idx.index_tags()
-        finally:
-            idx.close()
+            if commit:
+                index.index.commit()
+        except Exception, e:
+            index.index.rollback()
+            raise e
+
 
     @classmethod
     def from_xml_file(cls, xml_file, **kwargs):
@@ -303,7 +303,7 @@ class Book(models.Model):
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, build_fb2=True,
-            search_index=True, search_index_tags=True, search_index_reuse=False):
+            search_index=True, search_index_tags=True):
 
         # check for parts before we do anything
         children = []
@@ -381,8 +381,7 @@ class Book(models.Model):
             book.build_fb2()
 
         if not settings.NO_SEARCH_INDEX and search_index:
-            book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
-            #index_book.delay(book.id, book_info)
+            tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 
         tasks.fix_tree_tags.delay(book)
         cls.published.send(sender=book)
index 8cba04f..8906a14 100644 (file)
@@ -24,10 +24,10 @@ def fix_tree_tags(book):
 
 
 @task
-def index_book(book_id, book_info=None):
+def index_book(book_id, book_info=None, **kwargs):
     from catalogue.models import Book
     try:
-        return Book.objects.get(id=book_id).search_index(book_info)
+        return Book.objects.get(id=book_id).search_index(book_info, **kwargs)
     except Exception, e:
         print "Exception during index: %s" % e
         print_exc()
index 33ce47d..6cb18fd 100644 (file)
@@ -85,9 +85,9 @@ class CustomSolrInterface(sunburnt.SolrInterface):
     def __init__(self, url, schemadoc=None, http_connection=None, mode='', retry_timeout=-1, max_length_get_url=sunburnt.MAX_LENGTH_GET_URL):
         self.conn = CustomSolrConnection(url, http_connection, retry_timeout, max_length_get_url)
         self.schemadoc = schemadoc
-        if mode == 'r':
+        if 'w' not in mode:
             self.writeable = False
-        elif mode == 'w':
+        elif 'r' not in mode:
             self.readable = False
         self.init_schema()
 
index 4962cae..da54fe7 100644 (file)
@@ -111,7 +111,7 @@ class Index(SolrIndex):
     Class indexing books.
     """
     def __init__(self):
-        super(Index, self).__init__()
+        super(Index, self).__init__(mode='rw')
 
     def delete_query(self, *queries):
         """
@@ -201,6 +201,7 @@ class Index(SolrIndex):
                         "is_pdcounter": False,
                         "uid": "tag%d" % tag.id
                         }
+                print "ADD 1 %s" % doc
                 self.index.add(doc)
 
     def create_book_doc(self, book):
@@ -249,6 +250,7 @@ class Index(SolrIndex):
             book_doc[n] = f
 
         book_doc['uid'] = "book%s" % book_doc['book_id']
+        print "ADD 2 %s" % book_doc
         self.index.add(book_doc)
         del book_doc
         book_fields = {
@@ -460,7 +462,7 @@ class Index(SolrIndex):
                         doc = add_part(snippets, header_index=position, header_type=header.tag,
                                        text=u''.join(footnote),
                                        is_footnote=True)
-
+                        print "ADD 3 %s" % doc
                         self.index.add(doc)
                         #print "@ footnote text: %s" % footnote
                         footnote = []
@@ -496,6 +498,7 @@ class Index(SolrIndex):
                                        text=fix_format(frag['text']),
                                        themes=frag['themes'])
                         #print '@ FRAG %s' % frag['content']
+                        print "ADD 4 %s" % doc
                         self.index.add(doc)
 
                         # Collect content.
@@ -510,6 +513,7 @@ class Index(SolrIndex):
                                header_type=header.tag, text=fix_format(content))
                 #print '@ CONTENT: %s' % fix_format(content)
 
+                print "ADD 5 %s" % doc
                 self.index.add(doc)
 
         finally:
@@ -722,7 +726,7 @@ class Search(SolrIndex):
     Search facilities.
     """
     def __init__(self, default_field="text"):
-        super(Search, self).__init__()
+        super(Search, self).__init__(mode='r')
 
     # def get_tokens(self, searched, field='text', cached=None):
     #     """returns tokens analyzed by a proper (for a field) analyzer