fixes
[wolnelektury.git] / src / catalogue / management / commands / importbooks.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 import os
5 import sys
6 from django.conf import settings
7 from django.core.management.base import BaseCommand
8 from django.core.management.color import color_style
9 from django.core.files import File
10 from django.db import transaction
11 from librarian.picture import ImageStore
12
13 from catalogue.models import Book
14 from picture.models import Picture
15 from search.index import Index
16
17
18 class Command(BaseCommand):
19     help = 'Imports books from the specified directories.'
20
21     def add_arguments(self, parser):
22         parser.add_argument(
23                 '-q', '--quiet', action='store_false', dest='verbose', default=True,
24                 help='Verbosity level; 0=minimal output, 1=normal output, 2=all output')
25         parser.add_argument(
26                 '-f', '--force', action='store_true', dest='force',
27                 default=False, help='Overwrite works already in the catalogue')
28         parser.add_argument(
29                 '-D', '--dont-build', dest='dont_build', metavar="FORMAT,...",
30                 help="Skip building specified formats")
31         parser.add_argument(
32                 '-S', '--no-search-index', action='store_false',
33                 dest='search_index', default=True,
34                 help='Skip indexing imported works for search')
35         parser.add_argument(
36                 '-F', '--not-findable', action='store_false',
37                 dest='findable', default=True,
38                 help='Set book as not findable.')
39         parser.add_argument(
40                 '-p', '--picture', action='store_true', dest='import_picture',
41                 default=False, help='Import pictures')
42         parser.add_argument('directory', nargs='+')
43
44     def import_book(self, file_path, options):
45         verbose = options.get('verbose')
46         if options.get('dont_build'):
47             dont_build = options.get('dont_build').lower().split(',')
48         else:
49             dont_build = None
50         file_base, ext = os.path.splitext(file_path)
51         book = Book.from_xml_file(file_path, overwrite=options.get('force'),
52                                   dont_build=dont_build,
53                                   search_index_tags=False,
54                                   findable=options.get('findable'),
55                                   remote_gallery_url='file://' + os.path.dirname(os.path.abspath(file_base)) + '/img/'
56                                   )
57         for ebook_format in Book.ebook_formats:
58             if os.path.isfile(file_base + '.' + ebook_format):
59                 getattr(book, '%s_file' % ebook_format).save(
60                     '%s.%s' % (book.slug, ebook_format),
61                     File(file(file_base + '.' + ebook_format)),
62                     save=False
63                     )
64                 if verbose:
65                     print("Importing %s.%s" % (file_base, ebook_format))
66         book.save()
67
68     def import_picture(self, file_path, options, continue_on_error=True):
69         try:
70             image_store = ImageStore(os.path.dirname(file_path))
71             picture = Picture.from_xml_file(file_path, image_store=image_store, overwrite=options.get('force'))
72         except Exception as ex:
73             if continue_on_error:
74                 print("%s: %s" % (file_path, ex))
75                 return
76             else:
77                 raise ex
78         return picture
79
80     @transaction.atomic
81     def handle(self, **options):
82         self.style = color_style()
83
84         verbose = options.get('verbose')
85         import_picture = options.get('import_picture')
86
87         if options.get('search_index') and not settings.NO_SEARCH_INDEX:
88             index = Index()
89             try:
90                 index.index_tags()
91                 index.index.commit()
92             except Exception as e:
93                 index.index.rollback()
94                 raise e
95
96         files_imported = 0
97         files_skipped = 0
98
99         for dir_name in options['directory']:
100             if not os.path.isdir(dir_name):
101                 print(self.style.ERROR("%s: Not a directory. Skipping." % dir_name))
102             else:
103                 # files queue
104                 files = sorted(os.listdir(dir_name))
105                 postponed = {}
106                 while files:
107                     file_name = files.pop(0)
108                     file_path = os.path.join(dir_name, file_name)
109                     file_base, ext = os.path.splitext(file_path)
110
111                     # Skip files that are not XML files
112                     if not ext == '.xml':
113                         continue
114
115                     if verbose > 0:
116                         print("Parsing '%s'" % file_path)
117                     else:
118                         sys.stdout.write('.')
119                         sys.stdout.flush()
120
121                     # Import book files
122                     try:
123                         if import_picture:
124                             self.import_picture(file_path, options)
125                         else:
126                             self.import_book(file_path, options)
127
128                         files_imported += 1
129
130                     except (Book.AlreadyExists, Picture.AlreadyExists):
131                         print(self.style.ERROR(
132                             '%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
133                             file_path))
134                         files_skipped += 1
135
136                     except Book.DoesNotExist as e:
137                         if file_name not in postponed or postponed[file_name] < files_imported:
138                             # push it back into the queue, maybe the missing child will show up
139                             if verbose:
140                                 print(self.style.NOTICE('Waiting for missing children'))
141                             files.append(file_name)
142                             postponed[file_name] = files_imported
143                         else:
144                             # we're in a loop, nothing's being imported - some child is really missing
145                             raise e
146
147         # Print results
148         print()
149         print("Results: %d files imported, %d skipped, %d total." % (
150             files_imported, files_skipped, files_imported + files_skipped))
151         print()