Cite base
[wolnelektury.git] / src / catalogue / management / commands / importbooks.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 import os
5 import sys
6 from django.conf import settings
7 from django.core.management.base import BaseCommand
8 from django.core.management.color import color_style
9 from django.core.files import File
10 from django.db import transaction
11 from librarian.picture import ImageStore
12
13 from catalogue.models import Book
14 from picture.models import Picture
15 from search.index import Index
16
17
18 class Command(BaseCommand):
19     help = 'Imports books from the specified directories.'
20
21     def add_arguments(self, parser):
22         parser.add_argument(
23                 '-q', '--quiet', action='store_false', dest='verbose', default=True,
24                 help='Verbosity level; 0=minimal output, 1=normal output, 2=all output')
25         parser.add_argument(
26                 '-f', '--force', action='store_true', dest='force',
27                 default=False, help='Overwrite works already in the catalogue')
28         parser.add_argument(
29                 '-D', '--dont-build', dest='dont_build', metavar="FORMAT,...",
30                 help="Skip building specified formats")
31         parser.add_argument(
32                 '-S', '--no-search-index', action='store_false',
33                 dest='search_index', default=True,
34                 help='Skip indexing imported works for search')
35         parser.add_argument(
36                 '-p', '--picture', action='store_true', dest='import_picture',
37                 default=False, help='Import pictures')
38         parser.add_argument('directory', nargs='+')
39
40     def import_book(self, file_path, options):
41         verbose = options.get('verbose')
42         if options.get('dont_build'):
43             dont_build = options.get('dont_build').lower().split(',')
44         else:
45             dont_build = None
46         file_base, ext = os.path.splitext(file_path)
47         book = Book.from_xml_file(file_path, overwrite=options.get('force'),
48                                   dont_build=dont_build,
49                                   search_index_tags=False)
50         for ebook_format in Book.ebook_formats:
51             if os.path.isfile(file_base + '.' + ebook_format):
52                 getattr(book, '%s_file' % ebook_format).save(
53                     '%s.%s' % (book.slug, ebook_format),
54                     File(file(file_base + '.' + ebook_format)),
55                     save=False
56                     )
57                 if verbose:
58                     print("Importing %s.%s" % (file_base, ebook_format))
59         book.save()
60
61     def import_picture(self, file_path, options, continue_on_error=True):
62         try:
63             image_store = ImageStore(os.path.dirname(file_path))
64             picture = Picture.from_xml_file(file_path, image_store=image_store, overwrite=options.get('force'))
65         except Exception as ex:
66             if continue_on_error:
67                 print("%s: %s" % (file_path, ex))
68                 return
69             else:
70                 raise ex
71         return picture
72
73     @transaction.atomic
74     def handle(self, **options):
75         self.style = color_style()
76
77         verbose = options.get('verbose')
78         import_picture = options.get('import_picture')
79
80         if options.get('search_index') and not settings.NO_SEARCH_INDEX:
81             index = Index()
82             try:
83                 index.index_tags()
84                 index.index.commit()
85             except Exception as e:
86                 index.index.rollback()
87                 raise e
88
89         files_imported = 0
90         files_skipped = 0
91
92         for dir_name in options['directory']:
93             if not os.path.isdir(dir_name):
94                 print(self.style.ERROR("%s: Not a directory. Skipping." % dir_name))
95             else:
96                 # files queue
97                 files = sorted(os.listdir(dir_name))
98                 postponed = {}
99                 while files:
100                     file_name = files.pop(0)
101                     file_path = os.path.join(dir_name, file_name)
102                     file_base, ext = os.path.splitext(file_path)
103
104                     # Skip files that are not XML files
105                     if not ext == '.xml':
106                         continue
107
108                     if verbose > 0:
109                         print("Parsing '%s'" % file_path)
110                     else:
111                         sys.stdout.write('.')
112                         sys.stdout.flush()
113
114                     # Import book files
115                     try:
116                         if import_picture:
117                             self.import_picture(file_path, options)
118                         else:
119                             self.import_book(file_path, options)
120
121                         files_imported += 1
122
123                     except (Book.AlreadyExists, Picture.AlreadyExists):
124                         print(self.style.ERROR(
125                             '%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
126                             file_path))
127                         files_skipped += 1
128
129                     except Book.DoesNotExist as e:
130                         if file_name not in postponed or postponed[file_name] < files_imported:
131                             # push it back into the queue, maybe the missing child will show up
132                             if verbose:
133                                 print(self.style.NOTICE('Waiting for missing children'))
134                             files.append(file_name)
135                             postponed[file_name] = files_imported
136                         else:
137                             # we're in a loop, nothing's being imported - some child is really missing
138                             raise e
139
140         # Print results
141         print()
142         print("Results: %d files imported, %d skipped, %d total." % (
143             files_imported, files_skipped, files_imported + files_skipped))
144         print()