profiling
[wolnelektury.git] / apps / catalogue / management / commands / importbooks.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os
6 import sys
7 import time
8 from optparse import make_option
9 from django.conf import settings
10 from django.core.management.base import BaseCommand
11 from django.core.management.color import color_style
12 from django.core.files import File
13
14 from wolnelektury_core.management.profile import profile
15 import objgraph
16 import gc
17
18 from catalogue.models import Book
19 from picture.models import Picture
20
21 from search import Index
22
23 class Command(BaseCommand):
24     option_list = BaseCommand.option_list + (
25         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
26             help='Verbosity level; 0=minimal output, 1=normal output, 2=all output'),
27         make_option('-f', '--force', action='store_true', dest='force', default=False,
28             help='Overwrite works already in the catalogue'),
29         make_option('-D', '--dont-build', dest='dont_build',
30             metavar="FORMAT,...",
31             help="Skip building specified formats"),
32         make_option('-S', '--no-search-index', action='store_false', dest='search_index', default=True,
33             help='Skip indexing imported works for search'),
34         make_option('-w', '--wait-until', dest='wait_until', metavar='TIME',
35             help='Wait until specified time (Y-M-D h:m:s)'),
36         make_option('-p', '--picture', action='store_true', dest='import_picture', default=False,
37             help='Import pictures'),
38     )
39     help = 'Imports books from the specified directories.'
40     args = 'directory [directory ...]'
41
42     def import_book(self, file_path, options):
43         verbose = options.get('verbose')
44         if options.get('dont_build'):
45             dont_build = options.get('dont_build').lower().split(',')
46         else:
47             dont_build = None
48         file_base, ext = os.path.splitext(file_path)
49         book = Book.from_xml_file(file_path, overwrite=options.get('force'),
50                                   dont_build=dont_build,
51                                   search_index_tags=False)
52         for ebook_format in Book.ebook_formats:
53             if os.path.isfile(file_base + '.' + ebook_format):
54                 getattr(book, '%s_file' % ebook_format).save(
55                     '%s.%s' % (book.slug, ebook_format), 
56                     File(file(file_base + '.' + ebook_format)),
57                     save=False
58                     )
59                 if verbose:
60                     print "Importing %s.%s" % (file_base, ebook_format)
61         book.save()
62
63     def import_picture(self, file_path, options):
64         picture = Picture.from_xml_file(file_path, overwrite=options.get('force'))
65         return picture
66
67     #    @profile
68     def handle(self, *directories, **options):
69         from django.db import transaction
70
71         self.style = color_style()
72         
73         verbose = options.get('verbose')
74         force = options.get('force')
75         show_traceback = options.get('traceback', False)
76         import_picture = options.get('import_picture')
77
78         wait_until = None
79         if options.get('wait_until'):
80             wait_until = time.mktime(time.strptime(options.get('wait_until'), '%Y-%m-%d %H:%M:%S'))
81             if verbose > 0:
82                 print "Will wait until %s; it's %f seconds from now" % (
83                     time.strftime('%Y-%m-%d %H:%M:%S',
84                     time.localtime(wait_until)), wait_until - time.time())
85
86         index = None
87         if options.get('search_index') and not settings.NO_SEARCH_INDEX:
88             index = Index()
89             try:
90                 index.index_tags()
91                 index.index.commit()
92             except Exception, e:
93                 index.index.rollback()
94                 raise e
95
96         # Start transaction management.
97         transaction.commit_unless_managed()
98         transaction.enter_transaction_management()
99         transaction.managed(True)
100
101         files_imported = 0
102         files_skipped = 0
103
104         for dir_name in directories:
105             if not os.path.isdir(dir_name):
106                 print self.style.ERROR("%s: Not a directory. Skipping." % dir_name)
107             else:
108                 # files queue
109                 files = sorted(os.listdir(dir_name))
110                 postponed = {}
111                 while files:
112                     file_name = files.pop(0)
113                     file_path = os.path.join(dir_name, file_name)
114                     file_base, ext = os.path.splitext(file_path)
115
116                     # Skip files that are not XML files
117                     if not ext == '.xml':
118                         continue
119
120                     if verbose > 0:
121                         print "Parsing '%s'" % file_path
122                     else:
123                         sys.stdout.write('.')
124                         sys.stdout.flush()
125
126                     # Import book files
127                     try:
128                         if import_picture:
129                             self.import_picture(file_path, options)
130                         else:
131                             objgraph.show_growth()
132                             self.import_book(file_path, options)
133                             objgraph.show_growth()
134                             print "--------------------"
135                             
136                         files_imported += 1
137                         transaction.commit()
138                         ## track.
139                         
140                     except (Book.AlreadyExists, Picture.AlreadyExists):
141                         print self.style.ERROR('%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
142                             file_path)
143                         files_skipped += 1
144
145                     except Book.DoesNotExist, e:
146                         if file_name not in postponed or postponed[file_name] < files_imported:
147                             # push it back into the queue, maybe the missing child will show up
148                             if verbose:
149                                 print self.style.NOTICE('Waiting for missing children')
150                             files.append(file_name)
151                             postponed[file_name] = files_imported
152                         else:
153                             # we're in a loop, nothing's being imported - some child is really missing
154                             raise e
155
156         # Print results
157         print
158         print "Results: %d files imported, %d skipped, %d total." % (
159             files_imported, files_skipped, files_imported + files_skipped)
160         print
161
162         if wait_until:
163             print 'Waiting...'
164             try:
165                 time.sleep(wait_until - time.time())
166             except IOError:
167                 print "it's already too late"
168
169         transaction.commit()
170         transaction.leave_transaction_management()