remove gazeta links - lektury.gazeta.pl is dead
[wolnelektury.git] / src / catalogue / management / commands / importbooks.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os
6 import sys
7 from optparse import make_option
8 from django.conf import settings
9 from django.core.management.base import BaseCommand
10 from django.core.management.color import color_style
11 from django.core.files import File
12 from django.db import transaction
13 from librarian.picture import ImageStore
14 # from wolnelektury.management.profile import profile
15
16 from catalogue.models import Book
17 from picture.models import Picture
18
19 from search.index import Index
20
21
22 class Command(BaseCommand):
23     option_list = BaseCommand.option_list + (
24         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
25                     help='Verbosity level; 0=minimal output, 1=normal output, 2=all output'),
26         make_option('-f', '--force', action='store_true', dest='force', default=False,
27                     help='Overwrite works already in the catalogue'),
28         make_option('-D', '--dont-build', dest='dont_build',
29                     metavar="FORMAT,...",
30                     help="Skip building specified formats"),
31         make_option('-S', '--no-search-index', action='store_false', dest='search_index', default=True,
32                     help='Skip indexing imported works for search'),
33         make_option('-p', '--picture', action='store_true', dest='import_picture', default=False,
34                     help='Import pictures'),
35     )
36     help = 'Imports books from the specified directories.'
37     args = 'directory [directory ...]'
38
39     def import_book(self, file_path, options):
40         verbose = options.get('verbose')
41         if options.get('dont_build'):
42             dont_build = options.get('dont_build').lower().split(',')
43         else:
44             dont_build = None
45         file_base, ext = os.path.splitext(file_path)
46         book = Book.from_xml_file(file_path, overwrite=options.get('force'),
47                                   dont_build=dont_build,
48                                   search_index_tags=False)
49         for ebook_format in Book.ebook_formats:
50             if os.path.isfile(file_base + '.' + ebook_format):
51                 getattr(book, '%s_file' % ebook_format).save(
52                     '%s.%s' % (book.slug, ebook_format),
53                     File(file(file_base + '.' + ebook_format)),
54                     save=False
55                     )
56                 if verbose:
57                     print "Importing %s.%s" % (file_base, ebook_format)
58         book.save()
59
60     def import_picture(self, file_path, options, continue_on_error=True):
61         try:
62             image_store = ImageStore(os.path.dirname(file_path))
63             picture = Picture.from_xml_file(file_path, image_store=image_store, overwrite=options.get('force'))
64         except Exception, ex:
65             if continue_on_error:
66                 print "%s: %s" % (file_path, ex)
67                 return
68             else:
69                 raise ex
70         return picture
71
72     # @profile
73     @transaction.atomic
74     def handle(self, *directories, **options):
75         self.style = color_style()
76
77         verbose = options.get('verbose')
78         import_picture = options.get('import_picture')
79
80         if options.get('search_index') and not settings.NO_SEARCH_INDEX:
81             index = Index()
82             try:
83                 index.index_tags()
84                 index.index.commit()
85             except Exception, e:
86                 index.index.rollback()
87                 raise e
88
89         files_imported = 0
90         files_skipped = 0
91
92         for dir_name in directories:
93             if not os.path.isdir(dir_name):
94                 print self.style.ERROR("%s: Not a directory. Skipping." % dir_name)
95             else:
96                 # files queue
97                 files = sorted(os.listdir(dir_name))
98                 postponed = {}
99                 while files:
100                     file_name = files.pop(0)
101                     file_path = os.path.join(dir_name, file_name)
102                     file_base, ext = os.path.splitext(file_path)
103
104                     # Skip files that are not XML files
105                     if not ext == '.xml':
106                         continue
107
108                     if verbose > 0:
109                         print "Parsing '%s'" % file_path
110                     else:
111                         sys.stdout.write('.')
112                         sys.stdout.flush()
113
114                     # Import book files
115                     try:
116                         if import_picture:
117                             self.import_picture(file_path, options)
118                         else:
119                             self.import_book(file_path, options)
120
121                         files_imported += 1
122
123                     except (Book.AlreadyExists, Picture.AlreadyExists):
124                         print self.style.ERROR(
125                             '%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
126                             file_path)
127                         files_skipped += 1
128
129                     except Book.DoesNotExist, e:
130                         if file_name not in postponed or postponed[file_name] < files_imported:
131                             # push it back into the queue, maybe the missing child will show up
132                             if verbose:
133                                 print self.style.NOTICE('Waiting for missing children')
134                             files.append(file_name)
135                             postponed[file_name] = files_imported
136                         else:
137                             # we're in a loop, nothing's being imported - some child is really missing
138                             raise e
139
140         # Print results
141         print
142         print "Results: %d files imported, %d skipped, %d total." % (
143             files_imported, files_skipped, files_imported + files_skipped)
144         print