some fixes to oaipmh: namespaces
[wolnelektury.git] / apps / catalogue / management / commands / importbooks.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os
6 import sys
7 import time
8 from optparse import make_option
9 from django.conf import settings
10 from django.core.management.base import BaseCommand
11 from django.core.management.color import color_style
12 from django.core.files import File
13
14 from catalogue.models import Book
15 from picture.models import Picture
16
17 from search import Index
18
19 class Command(BaseCommand):
20     option_list = BaseCommand.option_list + (
21         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
22             help='Verbosity level; 0=minimal output, 1=normal output, 2=all output'),
23         make_option('-f', '--force', action='store_true', dest='force', default=False,
24             help='Overwrite works already in the catalogue'),
25         make_option('-D', '--dont-build', dest='dont_build',
26             metavar="FORMAT,...",
27             help="Skip building specified formats"),
28         make_option('-S', '--no-search-index', action='store_false', dest='search_index', default=True,
29             help='Skip indexing imported works for search'),
30         make_option('-w', '--wait-until', dest='wait_until', metavar='TIME',
31             help='Wait until specified time (Y-M-D h:m:s)'),
32         make_option('-p', '--picture', action='store_true', dest='import_picture', default=False,
33             help='Import pictures'),
34     )
35     help = 'Imports books from the specified directories.'
36     args = 'directory [directory ...]'
37
38     def import_book(self, file_path, options):
39         verbose = options.get('verbose')
40         if options.get('dont_build'):
41             dont_build = options.get('dont_build').lower().split(',')
42         else:
43             dont_build = None
44         file_base, ext = os.path.splitext(file_path)
45         book = Book.from_xml_file(file_path, overwrite=options.get('force'),
46                                   dont_build=dont_build,
47                                   search_index=options.get('search_index'),
48                                   search_index_reuse=True,
49                                   search_index_tags=False)
50         for ebook_format in Book.ebook_formats:
51             if os.path.isfile(file_base + '.' + ebook_format):
52                 getattr(book, '%s_file' % ebook_format).save(
53                     '%s.%s' % (book.slug, ebook_format), 
54                     File(file(file_base + '.' + ebook_format)),
55                     save=False
56                     )
57                 if verbose:
58                     print "Importing %s.%s" % (file_base, ebook_format)
59         book.save()
60
61     def import_picture(self, file_path, options):
62         picture = Picture.from_xml_file(file_path, overwrite=options.get('force'))
63         return picture
64
65     def handle(self, *directories, **options):
66         from django.db import transaction
67
68         self.style = color_style()
69
70         verbose = options.get('verbose')
71         force = options.get('force')
72         show_traceback = options.get('traceback', False)
73         import_picture = options.get('import_picture')
74
75         wait_until = None
76         if options.get('wait_until'):
77             wait_until = time.mktime(time.strptime(options.get('wait_until'), '%Y-%m-%d %H:%M:%S'))
78             if verbose > 0:
79                 print "Will wait until %s; it's %f seconds from now" % (
80                     time.strftime('%Y-%m-%d %H:%M:%S',
81                     time.localtime(wait_until)), wait_until - time.time())
82
83         if options.get('search_index') and not settings.NO_SEARCH_INDEX:
84             index = Index()
85             index.open()
86             try:
87                 index.index_tags()
88             finally:
89                 index.close()
90
91         # Start transaction management.
92         transaction.commit_unless_managed()
93         transaction.enter_transaction_management()
94         transaction.managed(True)
95
96         files_imported = 0
97         files_skipped = 0
98         
99         for dir_name in directories:
100             if not os.path.isdir(dir_name):
101                 print self.style.ERROR("%s: Not a directory. Skipping." % dir_name)
102             else:
103                 # files queue
104                 files = sorted(os.listdir(dir_name))
105                 postponed = {}
106                 while files:
107                     file_name = files.pop(0)
108                     file_path = os.path.join(dir_name, file_name)
109                     file_base, ext = os.path.splitext(file_path)
110
111                     # Skip files that are not XML files
112                     if not ext == '.xml':
113                         continue
114
115                     if verbose > 0:
116                         print "Parsing '%s'" % file_path
117                     else:
118                         sys.stdout.write('.')
119                         sys.stdout.flush()
120
121                     # Import book files
122                     try:
123                         if import_picture:
124                             self.import_picture(file_path, options)
125                         else:
126                             self.import_book(file_path, options)
127                         files_imported += 1
128                         transaction.commit()
129                         
130                     except (Book.AlreadyExists, Picture.AlreadyExists):
131                         print self.style.ERROR('%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
132                             file_path)
133                         files_skipped += 1
134
135                     except Book.DoesNotExist, e:
136                         if file_name not in postponed or postponed[file_name] < files_imported:
137                             # push it back into the queue, maybe the missing child will show up
138                             if verbose:
139                                 print self.style.NOTICE('Waiting for missing children')
140                             files.append(file_name)
141                             postponed[file_name] = files_imported
142                         else:
143                             # we're in a loop, nothing's being imported - some child is really missing
144                             raise e
145
146         # Print results
147         print
148         print "Results: %d files imported, %d skipped, %d total." % (
149             files_imported, files_skipped, files_imported + files_skipped)
150         print
151
152         if wait_until:
153             print 'Waiting...'
154             try:
155                 time.sleep(wait_until - time.time())
156             except IOError:
157                 print "it's already too late"
158
159         transaction.commit()
160         transaction.leave_transaction_management()
161