Update to new librarian api for html, txt.
[wolnelektury.git] / src / catalogue / management / commands / importbooks.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 import os
5 import sys
6 from django.conf import settings
7 from django.core.management.base import BaseCommand
8 from django.core.management.color import color_style
9 from django.core.files import File
10 from django.db import transaction
11 from catalogue.models import Book
12
13
14 class Command(BaseCommand):
15     help = 'Imports books from the specified directories.'
16
17     def add_arguments(self, parser):
18         parser.add_argument(
19                 '-q', '--quiet', action='store_false', dest='verbose', default=True,
20                 help='Verbosity level; 0=minimal output, 1=normal output, 2=all output')
21         parser.add_argument(
22                 '-f', '--force', action='store_true', dest='force',
23                 default=False, help='Overwrite works already in the catalogue')
24         parser.add_argument(
25                 '-D', '--dont-build', dest='dont_build', metavar="FORMAT,...",
26                 help="Skip building specified formats")
27         parser.add_argument(
28                 '-F', '--not-findable', action='store_false',
29                 dest='findable', default=True,
30                 help='Set book as not findable.')
31         parser.add_argument('directory', nargs='+')
32
33     def import_book(self, file_path, options):
34         verbose = options.get('verbose')
35         if options.get('dont_build'):
36             dont_build = options.get('dont_build').lower().split(',')
37         else:
38             dont_build = None
39         file_base, ext = os.path.splitext(file_path)
40         book = Book.from_xml_file(file_path, overwrite=options.get('force'),
41                                   dont_build=dont_build,
42                                   findable=options.get('findable'),
43                                   remote_gallery_url='file://' + os.path.dirname(os.path.abspath(file_base)) + '/img/'
44                                   )
45         for ebook_format in Book.ebook_formats:
46             if os.path.isfile(file_base + '.' + ebook_format):
47                 getattr(book, '%s_file' % ebook_format).save(
48                     '%s.%s' % (book.slug, ebook_format),
49                     File(file(file_base + '.' + ebook_format)),
50                     save=False
51                     )
52                 if verbose:
53                     print("Importing %s.%s" % (file_base, ebook_format))
54         book.save()
55
56     @transaction.atomic
57     def handle(self, **options):
58         self.style = color_style()
59
60         verbose = options.get('verbose')
61
62         files_imported = 0
63         files_skipped = 0
64
65         for dir_name in options['directory']:
66             if not os.path.isdir(dir_name):
67                 print(self.style.ERROR("%s: Not a directory. Skipping." % dir_name))
68             else:
69                 # files queue
70                 files = sorted(os.listdir(dir_name))
71                 postponed = {}
72                 while files:
73                     file_name = files.pop(0)
74                     file_path = os.path.join(dir_name, file_name)
75                     file_base, ext = os.path.splitext(file_path)
76
77                     # Skip files that are not XML files
78                     if not ext == '.xml':
79                         continue
80
81                     if verbose > 0:
82                         print("Parsing '%s'" % file_path)
83                     else:
84                         sys.stdout.write('.')
85                         sys.stdout.flush()
86
87                     # Import book files
88                     try:
89                         self.import_book(file_path, options)
90                         files_imported += 1
91
92                     except Book.AlreadyExists:
93                         print(self.style.ERROR(
94                             '%s: Book already imported. Skipping. To overwrite use --force.' %
95                             file_path))
96                         files_skipped += 1
97
98                     except Book.DoesNotExist as e:
99                         if file_name not in postponed or postponed[file_name] < files_imported:
100                             # push it back into the queue, maybe the missing child will show up
101                             if verbose:
102                                 print(self.style.NOTICE('Waiting for missing children'))
103                             files.append(file_name)
104                             postponed[file_name] = files_imported
105                         else:
106                             # we're in a loop, nothing's being imported - some child is really missing
107                             raise e
108
109         # Print results
110         print()
111         print("Results: %d files imported, %d skipped, %d total." % (
112             files_imported, files_skipped, files_imported + files_skipped))
113         print()