Generally working version.
[wolnelektury.git] / src / catalogue / management / commands / importbooks.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os
6 import sys
7 from optparse import make_option
8 from django.conf import settings
9 from django.core.management.base import BaseCommand
10 from django.core.management.color import color_style
11 from django.core.files import File
12 from librarian.picture import ImageStore
13 from wolnelektury.management.profile import profile
14
15 from catalogue.models import Book
16 from picture.models import Picture
17
18 from search.index import Index
19
20
21 class Command(BaseCommand):
22     option_list = BaseCommand.option_list + (
23         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
24             help='Verbosity level; 0=minimal output, 1=normal output, 2=all output'),
25         make_option('-f', '--force', action='store_true', dest='force', default=False,
26             help='Overwrite works already in the catalogue'),
27         make_option('-D', '--dont-build', dest='dont_build',
28             metavar="FORMAT,...",
29             help="Skip building specified formats"),
30         make_option('-S', '--no-search-index', action='store_false', dest='search_index', default=True,
31             help='Skip indexing imported works for search'),
32         make_option('-p', '--picture', action='store_true', dest='import_picture', default=False,
33             help='Import pictures'),
34     )
35     help = 'Imports books from the specified directories.'
36     args = 'directory [directory ...]'
37
38     def import_book(self, file_path, options):
39         verbose = options.get('verbose')
40         if options.get('dont_build'):
41             dont_build = options.get('dont_build').lower().split(',')
42         else:
43             dont_build = None
44         file_base, ext = os.path.splitext(file_path)
45         book = Book.from_xml_file(file_path, overwrite=options.get('force'),
46                                   dont_build=dont_build,
47                                   search_index_tags=False)
48         for ebook_format in Book.ebook_formats:
49             if os.path.isfile(file_base + '.' + ebook_format):
50                 getattr(book, '%s_file' % ebook_format).save(
51                     '%s.%s' % (book.slug, ebook_format),
52                     File(file(file_base + '.' + ebook_format)),
53                     save=False
54                     )
55                 if verbose:
56                     print "Importing %s.%s" % (file_base, ebook_format)
57         book.save()
58
59     def import_picture(self, file_path, options, continue_on_error=True):
60         try:
61             image_store = ImageStore(os.path.dirname(file_path))
62             picture = Picture.from_xml_file(file_path, image_store=image_store, overwrite=options.get('force'))
63         except Exception, ex:
64             if continue_on_error:
65                 print "%s: %s" % (file_path, ex)
66                 return
67             else:
68                 raise ex
69         return picture
70
71     #    @profile
72     def handle(self, *directories, **options):
73         from django.db import transaction
74
75         self.style = color_style()
76
77         verbose = options.get('verbose')
78         import_picture = options.get('import_picture')
79
80         index = None
81         if options.get('search_index') and not settings.NO_SEARCH_INDEX:
82             index = Index()
83             try:
84                 index.index_tags()
85                 index.index.commit()
86             except Exception, e:
87                 index.index.rollback()
88                 raise e
89
90         # Start transaction management.
91         with transaction.atomic():
92             files_imported = 0
93             files_skipped = 0
94
95             for dir_name in directories:
96                 if not os.path.isdir(dir_name):
97                     print self.style.ERROR("%s: Not a directory. Skipping." % dir_name)
98                 else:
99                     # files queue
100                     files = sorted(os.listdir(dir_name))
101                     postponed = {}
102                     while files:
103                         file_name = files.pop(0)
104                         file_path = os.path.join(dir_name, file_name)
105                         file_base, ext = os.path.splitext(file_path)
106
107                         # Skip files that are not XML files
108                         if not ext == '.xml':
109                             continue
110
111                         if verbose > 0:
112                             print "Parsing '%s'" % file_path
113                         else:
114                             sys.stdout.write('.')
115                             sys.stdout.flush()
116
117                         # Import book files
118                         try:
119                             if import_picture:
120                                 self.import_picture(file_path, options)
121                             else:
122                                 self.import_book(file_path, options)
123
124                             files_imported += 1
125
126                         except (Book.AlreadyExists, Picture.AlreadyExists):
127                             print self.style.ERROR('%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
128                                 file_path)
129                             files_skipped += 1
130
131                         except Book.DoesNotExist, e:
132                             if file_name not in postponed or postponed[file_name] < files_imported:
133                                 # push it back into the queue, maybe the missing child will show up
134                                 if verbose:
135                                     print self.style.NOTICE('Waiting for missing children')
136                                 files.append(file_name)
137                                 postponed[file_name] = files_imported
138                             else:
139                                 # we're in a loop, nothing's being imported - some child is really missing
140                                 raise e
141
142         # Print results
143         print
144         print "Results: %d files imported, %d skipped, %d total." % (
145             files_imported, files_skipped, files_imported + files_skipped)
146         print