so apparent memory leak seems to be just connection.queries debugging of django.
[wolnelektury.git] / apps / catalogue / management / commands / importbooks.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os
6 import sys
7 import time
8 from optparse import make_option
9 from django.conf import settings
10 from django.core.management.base import BaseCommand
11 from django.core.management.color import color_style
12 from django.core.files import File
13 from catalogue.utils import trim_query_log
14
15 from wolnelektury_core.management.profile import profile
16 import objgraph
17 import gc
18
19 from catalogue.models import Book
20 from picture.models import Picture
21
22 from search import Index
23
24 class Command(BaseCommand):
25     option_list = BaseCommand.option_list + (
26         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
27             help='Verbosity level; 0=minimal output, 1=normal output, 2=all output'),
28         make_option('-f', '--force', action='store_true', dest='force', default=False,
29             help='Overwrite works already in the catalogue'),
30         make_option('-D', '--dont-build', dest='dont_build',
31             metavar="FORMAT,...",
32             help="Skip building specified formats"),
33         make_option('-S', '--no-search-index', action='store_false', dest='search_index', default=True,
34             help='Skip indexing imported works for search'),
35         make_option('-w', '--wait-until', dest='wait_until', metavar='TIME',
36             help='Wait until specified time (Y-M-D h:m:s)'),
37         make_option('-p', '--picture', action='store_true', dest='import_picture', default=False,
38             help='Import pictures'),
39     )
40     help = 'Imports books from the specified directories.'
41     args = 'directory [directory ...]'
42
43     def import_book(self, file_path, options):
44         verbose = options.get('verbose')
45         if options.get('dont_build'):
46             dont_build = options.get('dont_build').lower().split(',')
47         else:
48             dont_build = None
49         file_base, ext = os.path.splitext(file_path)
50         book = Book.from_xml_file(file_path, overwrite=options.get('force'),
51                                   dont_build=dont_build,
52                                   search_index_tags=False)
53         for ebook_format in Book.ebook_formats:
54             if os.path.isfile(file_base + '.' + ebook_format):
55                 getattr(book, '%s_file' % ebook_format).save(
56                     '%s.%s' % (book.slug, ebook_format), 
57                     File(file(file_base + '.' + ebook_format)),
58                     save=False
59                     )
60                 if verbose:
61                     print "Importing %s.%s" % (file_base, ebook_format)
62         book.save()
63
64     def import_picture(self, file_path, options):
65         picture = Picture.from_xml_file(file_path, overwrite=options.get('force'))
66         return picture
67
68     #    @profile
69     def handle(self, *directories, **options):
70         from django.db import transaction
71
72         self.style = color_style()
73         
74         verbose = options.get('verbose')
75         force = options.get('force')
76         show_traceback = options.get('traceback', False)
77         import_picture = options.get('import_picture')
78
79         wait_until = None
80         if options.get('wait_until'):
81             wait_until = time.mktime(time.strptime(options.get('wait_until'), '%Y-%m-%d %H:%M:%S'))
82             if verbose > 0:
83                 print "Will wait until %s; it's %f seconds from now" % (
84                     time.strftime('%Y-%m-%d %H:%M:%S',
85                     time.localtime(wait_until)), wait_until - time.time())
86
87         index = None
88         if options.get('search_index') and not settings.NO_SEARCH_INDEX:
89             index = Index()
90             try:
91                 index.index_tags()
92                 index.index.commit()
93             except Exception, e:
94                 index.index.rollback()
95                 raise e
96
97         # Start transaction management.
98         transaction.commit_unless_managed()
99         transaction.enter_transaction_management()
100         transaction.managed(True)
101
102         files_imported = 0
103         files_skipped = 0
104
105         for dir_name in directories:
106             if not os.path.isdir(dir_name):
107                 print self.style.ERROR("%s: Not a directory. Skipping." % dir_name)
108             else:
109                 # files queue
110                 files = sorted(os.listdir(dir_name))
111                 postponed = {}
112                 while files:
113                     trim_query_log(0)
114                     file_name = files.pop(0)
115                     file_path = os.path.join(dir_name, file_name)
116                     file_base, ext = os.path.splitext(file_path)
117
118                     # Skip files that are not XML files
119                     if not ext == '.xml':
120                         continue
121
122                     if verbose > 0:
123                         print "Parsing '%s'" % file_path
124                     else:
125                         sys.stdout.write('.')
126                         sys.stdout.flush()
127
128                     # Import book files
129                     try:
130                         if import_picture:
131                             self.import_picture(file_path, options)
132                         else:
133                             objgraph.show_growth()
134                             self.import_book(file_path, options)
135                             objgraph.show_growth()
136                             print "--------------------"
137                             
138                         files_imported += 1
139                         transaction.commit()
140                         ## track.
141                         
142                     except (Book.AlreadyExists, Picture.AlreadyExists):
143                         print self.style.ERROR('%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
144                             file_path)
145                         files_skipped += 1
146
147                     except Book.DoesNotExist, e:
148                         if file_name not in postponed or postponed[file_name] < files_imported:
149                             # push it back into the queue, maybe the missing child will show up
150                             if verbose:
151                                 print self.style.NOTICE('Waiting for missing children')
152                             files.append(file_name)
153                             postponed[file_name] = files_imported
154                         else:
155                             # we're in a loop, nothing's being imported - some child is really missing
156                             raise e
157
158         # Print results
159         print
160         print "Results: %d files imported, %d skipped, %d total." % (
161             files_imported, files_skipped, files_imported + files_skipped)
162         print
163
164         if wait_until:
165             print 'Waiting...'
166             try:
167                 time.sleep(wait_until - time.time())
168             except IOError:
169                 print "it's already too late"
170
171         transaction.commit()
172         transaction.leave_transaction_management()