increase word spacing in WL XML
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
64
65     # files generated during publication
66     cover = EbookField(
67         'cover', _('cover'),
68         null=True, blank=True,
69         upload_to=_cover_upload_to,
70         storage=bofh_storage, max_length=255)
71     # Cleaner version of cover for thumbs
72     cover_thumb = EbookField(
73         'cover_thumb', _('cover thumbnail'),
74         null=True, blank=True,
75         upload_to=_cover_thumb_upload_to,
76         max_length=255)
77     ebook_formats = constants.EBOOK_FORMATS
78     formats = ebook_formats + ['html', 'xml']
79
80     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
81     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
82
83     objects = models.Manager()
84     tagged = managers.ModelTaggedItemManager(Tag)
85     tags = managers.TagDescriptor(Tag)
86     tag_relations = GenericRelation(Tag.intermediary_table_model)
87
88     html_built = django.dispatch.Signal()
89     published = django.dispatch.Signal()
90
91     short_html_url_name = 'catalogue_book_short'
92
93     class AlreadyExists(Exception):
94         pass
95
96     class Meta:
97         ordering = ('sort_key_author', 'sort_key')
98         verbose_name = _('book')
99         verbose_name_plural = _('books')
100         app_label = 'catalogue'
101
102     def __unicode__(self):
103         return self.title
104
105     def get_initial(self):
106         try:
107             return re.search(r'\w', self.title, re.U).group(0)
108         except AttributeError:
109             return ''
110
111     def authors(self):
112         return self.tags.filter(category='author')
113
114     def tag_unicode(self, category):
115         relations = prefetched_relations(self, category)
116         if relations:
117             return ', '.join(rel.tag.name for rel in relations)
118         else:
119             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
120
121     def author_unicode(self):
122         return self.tag_unicode('author')
123
124     def translator(self):
125         translators = self.extra_info.get('translators')
126         if not translators:
127             return None
128         if len(translators) > 3:
129             translators = translators[:2]
130             others = ' i inni'
131         else:
132             others = ''
133         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
134
135     def save(self, force_insert=False, force_update=False, **kwargs):
136         from sortify import sortify
137
138         self.sort_key = sortify(self.title)[:120]
139         self.title = unicode(self.title)  # ???
140
141         try:
142             author = self.authors().first().sort_key
143         except AttributeError:
144             author = u''
145         self.sort_key_author = author
146
147         ret = super(Book, self).save(force_insert, force_update, **kwargs)
148
149         return ret
150
151     @permalink
152     def get_absolute_url(self):
153         return 'catalogue.views.book_detail', [self.slug]
154
155     @staticmethod
156     @permalink
157     def create_url(slug):
158         return 'catalogue.views.book_detail', [slug]
159
160     def gallery_path(self):
161         return gallery_path(self.slug)
162
163     def gallery_url(self):
164         return gallery_url(self.slug)
165
166     @property
167     def name(self):
168         return self.title
169
170     def language_code(self):
171         return constants.LANGUAGES_3TO2.get(self.language, self.language)
172
173     def language_name(self):
174         return dict(settings.LANGUAGES).get(self.language_code(), "")
175
176     def is_foreign(self):
177         return self.language_code() != settings.LANGUAGE_CODE
178
179     def has_media(self, type_):
180         if type_ in Book.formats:
181             return bool(getattr(self, "%s_file" % type_))
182         else:
183             return self.media.filter(type=type_).exists()
184
185     def get_media(self, type_):
186         if self.has_media(type_):
187             if type_ in Book.formats:
188                 return getattr(self, "%s_file" % type_)
189             else:
190                 return self.media.filter(type=type_)
191         else:
192             return None
193
194     def get_mp3(self):
195         return self.get_media("mp3")
196
197     def get_odt(self):
198         return self.get_media("odt")
199
200     def get_ogg(self):
201         return self.get_media("ogg")
202
203     def get_daisy(self):
204         return self.get_media("daisy")
205
206     def has_description(self):
207         return len(self.description) > 0
208     has_description.short_description = _('description')
209     has_description.boolean = True
210
211     # ugly ugly ugly
212     def has_mp3_file(self):
213         return bool(self.has_media("mp3"))
214     has_mp3_file.short_description = 'MP3'
215     has_mp3_file.boolean = True
216
217     def has_ogg_file(self):
218         return bool(self.has_media("ogg"))
219     has_ogg_file.short_description = 'OGG'
220     has_ogg_file.boolean = True
221
222     def has_daisy_file(self):
223         return bool(self.has_media("daisy"))
224     has_daisy_file.short_description = 'DAISY'
225     has_daisy_file.boolean = True
226
227     def wldocument(self, parse_dublincore=True, inherit=True):
228         from catalogue.import_utils import ORMDocProvider
229         from librarian.parser import WLDocument
230
231         if inherit and self.parent:
232             meta_fallbacks = self.parent.cover_info()
233         else:
234             meta_fallbacks = None
235
236         return WLDocument.from_file(
237             self.xml_file.path,
238             provider=ORMDocProvider(self),
239             parse_dublincore=parse_dublincore,
240             meta_fallbacks=meta_fallbacks)
241
242     @staticmethod
243     def zip_format(format_):
244         def pretty_file_name(book):
245             return "%s/%s.%s" % (
246                 book.extra_info['author'],
247                 book.slug,
248                 format_)
249
250         field_name = "%s_file" % format_
251         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
252         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
253         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
254
255     def zip_audiobooks(self, format_):
256         bm = BookMedia.objects.filter(book=self, type=format_)
257         paths = map(lambda bm: (None, bm.file.path), bm)
258         return create_zip(paths, "%s_%s" % (self.slug, format_))
259
260     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
261         if index is None:
262             from search.index import Index
263             index = Index()
264         try:
265             index.index_book(self, book_info)
266             if index_tags:
267                 index.index_tags()
268             if commit:
269                 index.index.commit()
270         except Exception, e:
271             index.index.rollback()
272             raise e
273
274     def download_pictures(self, remote_gallery_url):
275         gallery_path = self.gallery_path()
276         # delete previous files, so we don't include old files in ebooks
277         if os.path.isdir(gallery_path):
278             for filename in os.listdir(gallery_path):
279                 file_path = os.path.join(gallery_path, filename)
280                 os.unlink(file_path)
281         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
282         if ilustr_elements:
283             makedirs(gallery_path)
284             for ilustr in ilustr_elements:
285                 ilustr_src = ilustr.get('src')
286                 ilustr_path = os.path.join(gallery_path, ilustr_src)
287                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
288
289     @classmethod
290     def from_xml_file(cls, xml_file, **kwargs):
291         from django.core.files import File
292         from librarian import dcparser
293
294         # use librarian to parse meta-data
295         book_info = dcparser.parse(xml_file)
296
297         if not isinstance(xml_file, File):
298             xml_file = File(open(xml_file))
299
300         try:
301             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
302         finally:
303             xml_file.close()
304
305     @classmethod
306     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
307                            search_index_tags=True, remote_gallery_url=None):
308         if dont_build is None:
309             dont_build = set()
310         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
311
312         # check for parts before we do anything
313         children = []
314         if hasattr(book_info, 'parts'):
315             for part_url in book_info.parts:
316                 try:
317                     children.append(Book.objects.get(slug=part_url.slug))
318                 except Book.DoesNotExist:
319                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
320
321         # Read book metadata
322         book_slug = book_info.url.slug
323         if re.search(r'[^a-z0-9-]', book_slug):
324             raise ValueError('Invalid characters in slug')
325         book, created = Book.objects.get_or_create(slug=book_slug)
326
327         if created:
328             book_shelves = []
329             old_cover = None
330         else:
331             if not overwrite:
332                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
333             # Save shelves for this book
334             book_shelves = list(book.tags.filter(category='set'))
335             old_cover = book.cover_info()
336
337         # Save XML file
338         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
339
340         book.language = book_info.language
341         book.title = book_info.title
342         if book_info.variant_of:
343             book.common_slug = book_info.variant_of.slug
344         else:
345             book.common_slug = book.slug
346         book.extra_info = book_info.to_dict()
347         book.save()
348
349         meta_tags = Tag.tags_from_info(book_info)
350
351         book.tags = set(meta_tags + book_shelves)
352
353         cover_changed = old_cover != book.cover_info()
354         obsolete_children = set(b for b in book.children.all()
355                                 if b not in children)
356         notify_cover_changed = []
357         for n, child_book in enumerate(children):
358             new_child = child_book.parent != book
359             child_book.parent = book
360             child_book.parent_number = n
361             child_book.save()
362             if new_child or cover_changed:
363                 notify_cover_changed.append(child_book)
364         # Disown unfaithful children and let them cope on their own.
365         for child in obsolete_children:
366             child.parent = None
367             child.parent_number = 0
368             child.save()
369             if old_cover:
370                 notify_cover_changed.append(child)
371
372         cls.repopulate_ancestors()
373         tasks.update_counters.delay()
374
375         if remote_gallery_url:
376             book.download_pictures(remote_gallery_url)
377
378         # No saves beyond this point.
379
380         # Build cover.
381         if 'cover' not in dont_build:
382             book.cover.build_delay()
383             book.cover_thumb.build_delay()
384
385         # Build HTML and ebooks.
386         book.html_file.build_delay()
387         if not children:
388             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
389                 if format_ not in dont_build:
390                     getattr(book, '%s_file' % format_).build_delay()
391         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
392             if format_ not in dont_build:
393                 getattr(book, '%s_file' % format_).build_delay()
394
395         if not settings.NO_SEARCH_INDEX and search_index:
396             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
397
398         for child in notify_cover_changed:
399             child.parent_cover_changed()
400
401         book.save()  # update sort_key_author
402         cls.published.send(sender=cls, instance=book)
403         return book
404
405     @classmethod
406     @transaction.atomic
407     def repopulate_ancestors(cls):
408         """Fixes the ancestry cache."""
409         # TODO: table names
410         cursor = connection.cursor()
411         if connection.vendor == 'postgres':
412             cursor.execute("TRUNCATE catalogue_book_ancestor")
413             cursor.execute("""
414                 WITH RECURSIVE ancestry AS (
415                     SELECT book.id, book.parent_id
416                     FROM catalogue_book AS book
417                     WHERE book.parent_id IS NOT NULL
418                     UNION
419                     SELECT ancestor.id, book.parent_id
420                     FROM ancestry AS ancestor, catalogue_book AS book
421                     WHERE ancestor.parent_id = book.id
422                         AND book.parent_id IS NOT NULL
423                     )
424                 INSERT INTO catalogue_book_ancestor
425                     (from_book_id, to_book_id)
426                     SELECT id, parent_id
427                     FROM ancestry
428                     ORDER BY id;
429                 """)
430         else:
431             cursor.execute("DELETE FROM catalogue_book_ancestor")
432             for b in cls.objects.exclude(parent=None):
433                 parent = b.parent
434                 while parent is not None:
435                     b.ancestor.add(parent)
436                     parent = parent.parent
437
438     def flush_includes(self, languages=True):
439         if not languages:
440             return
441         if languages is True:
442             languages = [lc for (lc, _ln) in settings.LANGUAGES]
443         flush_ssi_includes([
444             template % (self.pk, lang)
445             for template in [
446                 '/katalog/b/%d/mini.%s.html',
447                 '/katalog/b/%d/mini_nolink.%s.html',
448                 '/katalog/b/%d/short.%s.html',
449                 '/katalog/b/%d/wide.%s.html',
450                 '/api/include/book/%d.%s.json',
451                 '/api/include/book/%d.%s.xml',
452                 ]
453             for lang in languages
454             ])
455
456     def cover_info(self, inherit=True):
457         """Returns a dictionary to serve as fallback for BookInfo.
458
459         For now, the only thing inherited is the cover image.
460         """
461         need = False
462         info = {}
463         for field in ('cover_url', 'cover_by', 'cover_source'):
464             val = self.extra_info.get(field)
465             if val:
466                 info[field] = val
467             else:
468                 need = True
469         if inherit and need and self.parent is not None:
470             parent_info = self.parent.cover_info()
471             parent_info.update(info)
472             info = parent_info
473         return info
474
475     def related_themes(self):
476         return Tag.objects.usage_for_queryset(
477             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
478             counts=True).filter(category='theme')
479
480     def parent_cover_changed(self):
481         """Called when parent book's cover image is changed."""
482         if not self.cover_info(inherit=False):
483             if 'cover' not in app_settings.DONT_BUILD:
484                 self.cover.build_delay()
485                 self.cover_thumb.build_delay()
486             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
487                 if format_ not in app_settings.DONT_BUILD:
488                     getattr(self, '%s_file' % format_).build_delay()
489             for child in self.children.all():
490                 child.parent_cover_changed()
491
492     def other_versions(self):
493         """Find other versions (i.e. in other languages) of the book."""
494         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
495
496     def parents(self):
497         books = []
498         parent = self.parent
499         while parent is not None:
500             books.insert(0, parent)
501             parent = parent.parent
502         return books
503
504     def pretty_title(self, html_links=False):
505         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
506         books = self.parents() + [self]
507         names.extend([(b.title, b.get_absolute_url()) for b in books])
508
509         if html_links:
510             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
511         else:
512             names = [tag[0] for tag in names]
513         return ', '.join(names)
514
515     def publisher(self):
516         publisher = self.extra_info['publisher']
517         if isinstance(publisher, basestring):
518             return publisher
519         elif isinstance(publisher, list):
520             return ', '.join(publisher)
521
522     @classmethod
523     def tagged_top_level(cls, tags):
524         """ Returns top-level books tagged with `tags`.
525
526         It only returns those books which don't have ancestors which are
527         also tagged with those tags.
528
529         """
530         objects = cls.tagged.with_all(tags)
531         return objects.exclude(ancestor__in=objects)
532
533     @classmethod
534     def book_list(cls, book_filter=None):
535         """Generates a hierarchical listing of all books.
536
537         Books are optionally filtered with a test function.
538
539         """
540
541         books_by_parent = {}
542         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
543         if book_filter:
544             books = books.filter(book_filter).distinct()
545
546             book_ids = set(b['pk'] for b in books.values("pk").iterator())
547             for book in books.iterator():
548                 parent = book.parent_id
549                 if parent not in book_ids:
550                     parent = None
551                 books_by_parent.setdefault(parent, []).append(book)
552         else:
553             for book in books.iterator():
554                 books_by_parent.setdefault(book.parent_id, []).append(book)
555
556         orphans = []
557         books_by_author = OrderedDict()
558         for tag in Tag.objects.filter(category='author').iterator():
559             books_by_author[tag] = []
560
561         for book in books_by_parent.get(None, ()):
562             authors = list(book.authors().only('pk'))
563             if authors:
564                 for author in authors:
565                     books_by_author[author].append(book)
566             else:
567                 orphans.append(book)
568
569         return books_by_author, orphans, books_by_parent
570
571     _audiences_pl = {
572         "SP": (1, u"szkoła podstawowa"),
573         "SP1": (1, u"szkoła podstawowa"),
574         "SP2": (1, u"szkoła podstawowa"),
575         "P": (1, u"szkoła podstawowa"),
576         "G": (2, u"gimnazjum"),
577         "L": (3, u"liceum"),
578         "LP": (3, u"liceum"),
579     }
580
581     def audiences_pl(self):
582         audiences = self.extra_info.get('audiences', [])
583         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
584         return [a[1] for a in audiences]
585
586     def stage_note(self):
587         stage = self.extra_info.get('stage')
588         if stage and stage < '0.4':
589             return (_('This work needs modernisation'),
590                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
591         else:
592             return None, None
593
594     def choose_fragment(self):
595         fragments = self.fragments.order_by()
596         fragments_count = fragments.count()
597         if not fragments_count and self.children.exists():
598             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
599             fragments_count = fragments.count()
600         if fragments_count:
601             return fragments[randint(0, fragments_count - 1)]
602         elif self.parent:
603             return self.parent.choose_fragment()
604         else:
605             return None
606
607     def update_popularity(self):
608         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
609         try:
610             pop = self.popularity
611             pop.count = count
612             pop.save()
613         except BookPopularity.DoesNotExist:
614             BookPopularity.objects.create(book=self, count=count)
615
616     def ridero_link(self):
617         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
618
619
620 def add_file_fields():
621     for format_ in Book.formats:
622         field_name = "%s_file" % format_
623         # This weird globals() assignment makes Django migrations comfortable.
624         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
625         _upload_to.__name__ = '_%s_upload_to' % format_
626         globals()[_upload_to.__name__] = _upload_to
627
628         EbookField(
629             format_, _("%s file" % format_.upper()),
630             upload_to=_upload_to,
631             storage=bofh_storage,
632             max_length=255,
633             blank=True,
634             default=''
635         ).contribute_to_class(Book, field_name)
636
637 add_file_fields()
638
639
640 class BookPopularity(models.Model):
641     book = models.OneToOneField(Book, related_name='popularity')
642     count = models.IntegerField(default=0)