Support longer tag names.
[wolnelektury.git] / apps / catalogue / management / commands / importbooks.py
index 3ff0e5c..a15faa0 100644 (file)
@@ -1,12 +1,23 @@
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
 import os
 import sys
+import time
 from optparse import make_option
-
+from django.conf import settings
 from django.core.management.base import BaseCommand
 from django.core.management.color import color_style
 from django.core.files import File
+from catalogue.utils import trim_query_log
+from librarian.picture import ImageStore
+from wolnelektury_core.management.profile import profile
 
 from catalogue.models import Book
+from picture.models import Picture
+
+from search.index import Index
 
 
 class Command(BaseCommand):
@@ -14,19 +25,79 @@ class Command(BaseCommand):
         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
             help='Verbosity level; 0=minimal output, 1=normal output, 2=all output'),
         make_option('-f', '--force', action='store_true', dest='force', default=False,
-            help='Print status messages to stdout')
+            help='Overwrite works already in the catalogue'),
+        make_option('-D', '--dont-build', dest='dont_build',
+            metavar="FORMAT,...",
+            help="Skip building specified formats"),
+        make_option('-S', '--no-search-index', action='store_false', dest='search_index', default=True,
+            help='Skip indexing imported works for search'),
+        make_option('-w', '--wait-until', dest='wait_until', metavar='TIME',
+            help='Wait until specified time (Y-M-D h:m:s)'),
+        make_option('-p', '--picture', action='store_true', dest='import_picture', default=False,
+            help='Import pictures'),
     )
     help = 'Imports books from the specified directories.'
     args = 'directory [directory ...]'
 
+    def import_book(self, file_path, options):
+        verbose = options.get('verbose')
+        if options.get('dont_build'):
+            dont_build = options.get('dont_build').lower().split(',')
+        else:
+            dont_build = None
+        file_base, ext = os.path.splitext(file_path)
+        book = Book.from_xml_file(file_path, overwrite=options.get('force'),
+                                  dont_build=dont_build,
+                                  search_index_tags=False)
+        for ebook_format in Book.ebook_formats:
+            if os.path.isfile(file_base + '.' + ebook_format):
+                getattr(book, '%s_file' % ebook_format).save(
+                    '%s.%s' % (book.slug, ebook_format),
+                    File(file(file_base + '.' + ebook_format)),
+                    save=False
+                    )
+                if verbose:
+                    print "Importing %s.%s" % (file_base, ebook_format)
+        book.save()
+
+    def import_picture(self, file_path, options, continue_on_error=True):
+        try:
+            image_store = ImageStore(os.path.dirname(file_path))
+            picture = Picture.from_xml_file(file_path, image_store=image_store, overwrite=options.get('force'))
+        except Exception, ex:
+            if continue_on_error:
+                print "%s: %s" % (file_path, ex)
+                return
+            else:
+                raise ex
+        return picture
+
+    #    @profile
     def handle(self, *directories, **options):
         from django.db import transaction
 
         self.style = color_style()
 
         verbose = options.get('verbose')
-        force = options.get('force')
-        show_traceback = options.get('traceback', False)
+        import_picture = options.get('import_picture')
+
+        wait_until = None
+        if options.get('wait_until'):
+            wait_until = time.mktime(time.strptime(options.get('wait_until'), '%Y-%m-%d %H:%M:%S'))
+            if verbose > 0:
+                print "Will wait until %s; it's %f seconds from now" % (
+                    time.strftime('%Y-%m-%d %H:%M:%S',
+                    time.localtime(wait_until)), wait_until - time.time())
+
+        index = None
+        if options.get('search_index') and not settings.NO_SEARCH_INDEX:
+            index = Index()
+            try:
+                index.index_tags()
+                index.index.commit()
+            except Exception, e:
+                index.index.rollback()
+                raise e
 
         # Start transaction management.
         transaction.commit_unless_managed()
@@ -35,64 +106,68 @@ class Command(BaseCommand):
 
         files_imported = 0
         files_skipped = 0
-        
+
         for dir_name in directories:
             if not os.path.isdir(dir_name):
                 print self.style.ERROR("%s: Not a directory. Skipping." % dir_name)
             else:
-                for file_name in os.listdir(dir_name):
+                # files queue
+                files = sorted(os.listdir(dir_name))
+                postponed = {}
+                while files:
+                    trim_query_log(0)
+                    file_name = files.pop(0)
                     file_path = os.path.join(dir_name, file_name)
                     file_base, ext = os.path.splitext(file_path)
-                    
+
                     # Skip files that are not XML files
                     if not ext == '.xml':
                         continue
-                    
+
                     if verbose > 0:
                         print "Parsing '%s'" % file_path
                     else:
                         sys.stdout.write('.')
                         sys.stdout.flush()
-                    
+
                     # Import book files
                     try:
-                        book = Book.from_xml_file(file_path, overwrite=force)
+                        if import_picture:
+                            self.import_picture(file_path, options)
+                        else:
+                            self.import_book(file_path, options)
+
                         files_imported += 1
-                        
-                        if os.path.isfile(file_base + '.pdf'):
-                            book.pdf_file.save('%s.pdf' % book.slug, File(file(file_base + '.pdf')))
-                            if verbose:
-                                print "Importing %s.pdf" % file_base 
-                        if os.path.isfile(file_base + '.odt'):
-                            book.odt_file.save('%s.odt' % book.slug, File(file(file_base + '.odt')))
-                            if verbose:
-                                print "Importing %s.odt" % file_base
-                        if os.path.isfile(file_base + '.txt'):
-                            book.txt_file.save('%s.txt' % book.slug, File(file(file_base + '.txt')))
-                            if verbose:
-                                print "Importing %s.txt" % file_base
-                        if os.path.isfile(file_base + '.mp3'):
-                            book.mp3_file.save('%s.mp3' % book.slug, File(file(file_base + '.mp3')))
-                            if verbose:
-                                print "Importing %s.txt" % file_base
-                        if os.path.isfile(file_base + '.ogg'):
-                            book.ogg_file.save('%s.ogg' % book.slug, File(file(file_base + '.ogg')))
-                            if verbose:
-                                print "Importing %s.ogg" % file_base
-                            
-                        book.save()
-                    
-                    except Book.AlreadyExists, msg:
-                        print self.style.ERROR('%s: Book already imported. Skipping. To overwrite use --force.' %
+                        transaction.commit()
+
+                    except (Book.AlreadyExists, Picture.AlreadyExists):
+                        print self.style.ERROR('%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
                             file_path)
                         files_skipped += 1
-                        
+
+                    except Book.DoesNotExist, e:
+                        if file_name not in postponed or postponed[file_name] < files_imported:
+                            # push it back into the queue, maybe the missing child will show up
+                            if verbose:
+                                print self.style.NOTICE('Waiting for missing children')
+                            files.append(file_name)
+                            postponed[file_name] = files_imported
+                        else:
+                            # we're in a loop, nothing's being imported - some child is really missing
+                            raise e
+
         # Print results
         print
         print "Results: %d files imported, %d skipped, %d total." % (
             files_imported, files_skipped, files_imported + files_skipped)
         print
-                        
+
+        if wait_until:
+            print 'Waiting...'
+            try:
+                time.sleep(wait_until - time.time())
+            except IOError:
+                print "it's already too late"
+
         transaction.commit()
         transaction.leave_transaction_management()
-