Minor fix in OAI-PMH.
[wolnelektury.git] / apps / catalogue / management / commands / importbooks.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os
6 import sys
7 import time
8 from optparse import make_option
9 from django.conf import settings
10 from django.core.management.base import BaseCommand
11 from django.core.management.color import color_style
12 from django.core.files import File
13 from catalogue.utils import trim_query_log
14 from librarian.picture import ImageStore
15 from wolnelektury_core.management.profile import profile
16
17 from catalogue.models import Book
18 from picture.models import Picture
19
20 from search.index import Index
21
22
23 class Command(BaseCommand):
24     option_list = BaseCommand.option_list + (
25         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
26             help='Verbosity level; 0=minimal output, 1=normal output, 2=all output'),
27         make_option('-f', '--force', action='store_true', dest='force', default=False,
28             help='Overwrite works already in the catalogue'),
29         make_option('-D', '--dont-build', dest='dont_build',
30             metavar="FORMAT,...",
31             help="Skip building specified formats"),
32         make_option('-S', '--no-search-index', action='store_false', dest='search_index', default=True,
33             help='Skip indexing imported works for search'),
34         make_option('-w', '--wait-until', dest='wait_until', metavar='TIME',
35             help='Wait until specified time (Y-M-D h:m:s)'),
36         make_option('-p', '--picture', action='store_true', dest='import_picture', default=False,
37             help='Import pictures'),
38     )
39     help = 'Imports books from the specified directories.'
40     args = 'directory [directory ...]'
41
42     def import_book(self, file_path, options):
43         verbose = options.get('verbose')
44         if options.get('dont_build'):
45             dont_build = options.get('dont_build').lower().split(',')
46         else:
47             dont_build = None
48         file_base, ext = os.path.splitext(file_path)
49         book = Book.from_xml_file(file_path, overwrite=options.get('force'),
50                                   dont_build=dont_build,
51                                   search_index_tags=False)
52         for ebook_format in Book.ebook_formats:
53             if os.path.isfile(file_base + '.' + ebook_format):
54                 getattr(book, '%s_file' % ebook_format).save(
55                     '%s.%s' % (book.slug, ebook_format),
56                     File(file(file_base + '.' + ebook_format)),
57                     save=False
58                     )
59                 if verbose:
60                     print "Importing %s.%s" % (file_base, ebook_format)
61         book.save()
62
63     def import_picture(self, file_path, options, continue_on_error=True):
64         try:
65             image_store = ImageStore(os.path.dirname(file_path))
66             picture = Picture.from_xml_file(file_path, image_store=image_store, overwrite=options.get('force'))
67         except Exception, ex:
68             if continue_on_error:
69                 print "%s: %s" % (file_path, ex)
70                 return
71             else:
72                 raise ex
73         return picture
74
75     #    @profile
76     def handle(self, *directories, **options):
77         from django.db import transaction
78
79         self.style = color_style()
80
81         verbose = options.get('verbose')
82         import_picture = options.get('import_picture')
83
84         wait_until = None
85         if options.get('wait_until'):
86             wait_until = time.mktime(time.strptime(options.get('wait_until'), '%Y-%m-%d %H:%M:%S'))
87             if verbose > 0:
88                 print "Will wait until %s; it's %f seconds from now" % (
89                     time.strftime('%Y-%m-%d %H:%M:%S',
90                     time.localtime(wait_until)), wait_until - time.time())
91
92         index = None
93         if options.get('search_index') and not settings.NO_SEARCH_INDEX:
94             index = Index()
95             try:
96                 index.index_tags()
97                 index.index.commit()
98             except Exception, e:
99                 index.index.rollback()
100                 raise e
101
102         # Start transaction management.
103         transaction.commit_unless_managed()
104         transaction.enter_transaction_management()
105         transaction.managed(True)
106
107         files_imported = 0
108         files_skipped = 0
109
110         for dir_name in directories:
111             if not os.path.isdir(dir_name):
112                 print self.style.ERROR("%s: Not a directory. Skipping." % dir_name)
113             else:
114                 # files queue
115                 files = sorted(os.listdir(dir_name))
116                 postponed = {}
117                 while files:
118                     trim_query_log(0)
119                     file_name = files.pop(0)
120                     file_path = os.path.join(dir_name, file_name)
121                     file_base, ext = os.path.splitext(file_path)
122
123                     # Skip files that are not XML files
124                     if not ext == '.xml':
125                         continue
126
127                     if verbose > 0:
128                         print "Parsing '%s'" % file_path
129                     else:
130                         sys.stdout.write('.')
131                         sys.stdout.flush()
132
133                     # Import book files
134                     try:
135                         if import_picture:
136                             self.import_picture(file_path, options)
137                         else:
138                             self.import_book(file_path, options)
139
140                         files_imported += 1
141                         transaction.commit()
142
143                     except (Book.AlreadyExists, Picture.AlreadyExists):
144                         print self.style.ERROR('%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
145                             file_path)
146                         files_skipped += 1
147
148                     except Book.DoesNotExist, e:
149                         if file_name not in postponed or postponed[file_name] < files_imported:
150                             # push it back into the queue, maybe the missing child will show up
151                             if verbose:
152                                 print self.style.NOTICE('Waiting for missing children')
153                             files.append(file_name)
154                             postponed[file_name] = files_imported
155                         else:
156                             # we're in a loop, nothing's being imported - some child is really missing
157                             raise e
158
159         # Print results
160         print
161         print "Results: %d files imported, %d skipped, %d total." % (
162             files_imported, files_skipped, files_imported + files_skipped)
163         print
164
165         if wait_until:
166             print 'Waiting...'
167             try:
168                 time.sleep(wait_until - time.time())
169             except IOError:
170                 print "it's already too late"
171
172         transaction.commit()
173         transaction.leave_transaction_management()