From: Marek Stępniowski Date: Thu, 18 Sep 2008 13:30:28 +0000 (+0200) Subject: Added capability to import PDF, ODT and TXT files to importbook management command. X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/f27fd052b9f3419cc4048711565db9984ca1a818?ds=inline Added capability to import PDF, ODT and TXT files to importbook management command. --- diff --git a/apps/catalogue/management/commands/importbooks.py b/apps/catalogue/management/commands/importbooks.py index d1dac54e4..c0a5df1ae 100644 --- a/apps/catalogue/management/commands/importbooks.py +++ b/apps/catalogue/management/commands/importbooks.py @@ -1,4 +1,5 @@ import os +import sys from django.core.management.base import BaseCommand from django.core.management.color import color_style @@ -9,11 +10,10 @@ from catalogue.models import Book class Command(BaseCommand): option_list = BaseCommand.option_list + ( - make_option('--verbosity', action='store', dest='verbosity', default='1', - type='choice', choices=['0', '1', '2'], + make_option('-q', '--quiet', action='store_false', dest='verbose', default=True, help='Verbosity level; 0=minimal output, 1=normal output, 2=all output'), - make_option('--force', action='store_true', dest='force', default=False, - help='Overwrite previously imported files with the same id') + make_option('-f', '--force', action='store_true', dest='force', default=False, + help='Print status messages to stdout') ) help = 'Imports books from the specified directories.' args = 'directory [directory ...]' @@ -23,7 +23,7 @@ class Command(BaseCommand): self.style = color_style() - verbosity = int(options.get('verbosity', 1)) + verbose = options.get('verbose') force = options.get('force') show_traceback = options.get('traceback', False) @@ -32,20 +32,61 @@ class Command(BaseCommand): transaction.enter_transaction_management() transaction.managed(True) + files_imported = 0 + files_skipped = 0 + for dir_name in directories: if not os.path.isdir(dir_name): - print self.style.ERROR("Skipping '%s': not a directory." % dir_name) + print self.style.ERROR("%s: Not a directory. Skipping." % dir_name) + files_skipped += 1 else: for file_name in os.listdir(dir_name): file_path = os.path.join(dir_name, file_name) - if not os.path.splitext(file_name)[1] == '.xml': - print self.style.NOTICE("Skipping '%s': not an XML file." % file_path) + file_base, ext = os.path.splitext(file_path) + + # Skip files that are not XML files + if not ext == '.xml': + print self.style.NOTICE("%s: Not an XML file. Skipping." % file_path) + files_skipped += 1 continue - if verbosity > 0: + + if verbose > 0: print "Parsing '%s'" % file_path + else: + sys.stdout.write('.') + sys.stdout.flush() - Book.from_xml_file(file_path) - + # Import book files + try: + book = Book.from_xml_file(file_path, overwrite=force) + files_imported += 1 + + if os.path.isfile(file_base + '.pdf'): + book.pdf_file.save('%s.pdf' % book.slug, File(file(file_base + '.pdf'))) + if verbose: + print "Importing %s.pdf" % file_base + if os.path.isfile(file_base + '.odt'): + book.odt_file.save('%s.odt' % book.slug, File(file(file_base + '.odt'))) + if verbose: + print "Importing %s.odt" % file_base + if os.path.isfile(file_base + '.txt'): + book.txt_file.save('%s.txt' % book.slug, File(file(file_base + '.txt'))) + if verbose: + print "Importing %s.txt" % file_base + + book.save() + + except Book.AlreadyExists, msg: + print self.style.ERROR('%s: Book already imported. Skipping. To overwrite use --force.' % + file_path) + files_skipped += 1 + + # Print results + print + print "Results: %d files imported, %d skipped, %d total." % ( + files_imported, files_skipped, files_imported + files_skipped) + print + transaction.commit() transaction.leave_transaction_management() diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index b5970e9f1..486172a2c 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -137,8 +137,11 @@ class Book(models.Model): has_html_file.short_description = 'HTML' has_html_file.boolean = True + class AlreadyExists(Exception): + pass + @staticmethod - def from_xml_file(xml_file): + def from_xml_file(xml_file, overwrite=False): from tempfile import NamedTemporaryFile from slughifi import slughifi from markupstring import MarkupString @@ -146,7 +149,11 @@ class Book(models.Model): # Read book metadata book_info = dcparser.parse(xml_file) book_base, book_slug = book_info.url.rsplit('/', 1) - book = Book(title=book_info.title, slug=book_slug) + book, created = Book.objects.get_or_create(slug=book_slug) + if not created and not overwrite: + raise Book.AlreadyExists('Book %s already exists' % book_slug) + + book.title = book_info.title book.save() book_tags = [] @@ -210,7 +217,8 @@ class Book(models.Model): book_themes = set(book_themes) book.tags = list(book.tags) + list(book_themes) - return book.save() + book.save() + return book @permalink def get_absolute_url(self):