Audiobooks: narrators as authors, and top-level players.

[wolnelektury.git] / src / catalogue / models / book.py
diff --git a/src/catalogue/models/book.py b/src/catalogue/models/book.py

index 411476a..0400656 100644 (file)
--- a/src/catalogue/models/book.py
+++ b/src/catalogue/models/book.py
@@ -7,6 +7,8 @@ from datetime import date, timedelta
  from random import randint
  import os.path
  import re
+from slugify import slugify
+from sortify import sortify
  from urllib.request import urlretrieve
  from django.apps import apps
  from django.conf import settings
@@ -43,6 +45,7 @@ class Book(models.Model):
      common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
      language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
      description = models.TextField('opis', blank=True)
+    license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
      abstract = models.TextField('abstrakt', blank=True)
      toc = models.TextField('spis treści', blank=True)
      created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
@@ -62,6 +65,7 @@ class Book(models.Model):
      # files generated during publication
      xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
      html_file = fields.HtmlField(storage=bofh_storage)
+    html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
      fb2_file = fields.Fb2Field(storage=bofh_storage)
      txt_file = fields.TxtField(storage=bofh_storage)
      epub_file = fields.EpubField(storage=bofh_storage)
@@ -79,7 +83,7 @@ class Book(models.Model):
          'okładka dla Ebookpoint')
  
      ebook_formats = constants.EBOOK_FORMATS
-    formats = ebook_formats + ['html', 'xml']
+    formats = ebook_formats + ['html', 'xml', 'html_nonotes']
  
      parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
      ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
@@ -91,6 +95,9 @@ class Book(models.Model):
      tagged = managers.ModelTaggedItemManager(Tag)
      tags = managers.TagDescriptor(Tag)
      tag_relations = GenericRelation(Tag.intermediary_table_model)
+    translators = models.ManyToManyField(Tag, blank=True)
+    narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
+    has_audio = models.BooleanField(default=False)
  
      html_built = django.dispatch.Signal()
      published = django.dispatch.Signal()
@@ -154,12 +161,6 @@ class Book(models.Model):
      def genre_unicode(self):
          return self.tag_unicode('genre')
  
-    def translators(self):
-        translators = self.get_extra_info_json().get('translators') or []
-        return [
-            '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
-        ]
-
      def translator(self):
          translators = self.get_extra_info_json().get('translators')
          if not translators:
@@ -269,17 +270,6 @@ class Book(models.Model):
              return sibling.get_first_text()
          return self.parent.get_next_text(inside=False)
  
-    def get_child_audiobook(self):
-        BookMedia = apps.get_model('catalogue', 'BookMedia')
-        if not BookMedia.objects.filter(book__ancestor=self).exists():
-            return None
-        for child in self.children.order_by('parent_number').all():
-            if child.has_mp3_file():
-                return child
-            child_sub = child.get_child_audiobook()
-            if child_sub is not None:
-                return child_sub
-
      def get_siblings(self):
          if not self.parent:
              return []
@@ -331,15 +321,15 @@ class Book(models.Model):
              total += app_settings.GET_MP3_LENGTH(media.file.path)
          return int(total)
  
+    def get_time(self):
+        return round(self.xml_file.size / 1000 * 40)
+    
      def has_media(self, type_):
          if type_ in Book.formats:
              return bool(getattr(self, "%s_file" % type_))
          else:
              return self.media.filter(type=type_).exists()
  
-    def has_audio(self):
-        return self.has_media('mp3')
-
      def get_media(self, type_):
          if self.has_media(type_):
              if type_ in Book.formats:
@@ -377,6 +367,9 @@ class Book(models.Model):
      def html_url(self):
          return self.media_url('html')
  
+    def html_nonotes_url(self):
+        return self.media_url('html_nonotes')
+
      def pdf_url(self):
          return self.media_url('pdf')
  
@@ -416,8 +409,69 @@ class Book(models.Model):
      has_daisy_file.boolean = True
  
      def has_sync_file(self):
-        return self.has_media("sync")
+        return settings.FEATURE_SYNCHRO and self.has_media("sync")
+
+    def build_sync_file(self):
+        from lxml import html
+        from django.core.files.base import ContentFile
+        with self.html_file.open('rb') as f:
+            h = html.fragment_fromstring(f.read().decode('utf-8'))
+
+        durations = [
+            m['mp3'].duration
+            for m in self.get_audiobooks()[0]
+        ]
+        if settings.MOCK_DURATIONS:
+            durations = settings.MOCK_DURATIONS
+
+        sync = []
+        ts = None
+        sid = 1
+        dirty = False
+        for elem in h.iter():
+            if elem.get('data-audio-ts'):
+                part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
+                ts = str(round(sum(durations[:part - 1]) + ts, 3))
+                # check if inside verse
+                p = elem.getparent()
+                while p is not None:
+                    # Workaround for missing ids.
+                    if 'verse' in p.get('class', ''):
+                        if not p.get('id'):
+                            p.set('id', f'syn{sid}')
+                            dirty = True
+                            sid += 1
+                        sync.append((ts, p.get('id')))
+                        ts = None
+                        break
+                    p = p.getparent()
+            elif ts:
+                cls = elem.get('class', '')
+                # Workaround for missing ids.
+                if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
+                    if not elem.get('id'):
+                        elem.set('id', f'syn{sid}')
+                        dirty = True
+                        sid += 1
+                    sync.append((ts, elem.get('id')))
+                    ts = None
+        if dirty:
+            htext = html.tostring(h, encoding='utf-8')
+            with open(self.html_file.path, 'wb') as f:
+                f.write(htext)
+        try:
+            bm = self.media.get(type='sync')
+        except:
+            bm = BookMedia(book=self, type='sync')
+        sync = (
+            '27\n' + '\n'.join(
+                f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
+            )).encode('latin1')
+        bm.file.save(
+            None, ContentFile(sync)
+            )
  
+    
      def get_sync(self):
          with self.get_media('sync').first().file.open('r') as f:
              sync = f.read().split('\n')
@@ -441,7 +495,7 @@ class Book(models.Model):
      def media_audio_epub(self):
          return self.get_media('audio.epub')
  
-    def get_audiobooks(self):
+    def get_audiobooks(self, with_children=False, processing=False):
          ogg_files = {}
          for m in self.media.filter(type='ogg').order_by().iterator():
              ogg_files[m.name] = m
@@ -467,13 +521,27 @@ class Book(models.Model):
                  media['ogg'] = ogg
              audiobooks.append(media)
  
-        projects = sorted(projects)
-        total_duration = '%d:%02d' % (
-            total_duration // 60,
-            total_duration % 60
-        )
+        if with_children:
+            for child in self.get_children():
+                ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
+                    with_children=True, processing=True)
+                audiobooks.append({'part': child})
+                audiobooks += ch_audiobooks
+                projects.update(ch_projects)
+                total_duration += ch_duration
+
+        if not processing:
+            projects = sorted(projects)
+            total_duration = '%d:%02d' % (
+                total_duration // 60,
+                total_duration % 60
+            )
+
          return audiobooks, projects, total_duration
  
+    def get_audiobooks_with_children(self):
+        return self.get_audiobooks(with_children=True)
+    
      def wldocument(self, parse_dublincore=True, inherit=True):
          from catalogue.import_utils import ORMDocProvider
          from librarian.parser import WLDocument
@@ -594,7 +662,7 @@ class Book(models.Model):
  
      @classmethod
      def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
-                           remote_gallery_url=None, days=0, findable=True):
+                           remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
          from catalogue import tasks
  
          if dont_build is None:
@@ -637,25 +705,31 @@ class Book(models.Model):
          book.findable = findable
          book.language = book_info.language
          book.title = book_info.title
+        book.license = book_info.license or ''
          if book_info.variant_of:
              book.common_slug = book_info.variant_of.slug
          else:
              book.common_slug = book.slug
-        book.extra_info = json.dumps(book_info.to_dict())
+        extra = book_info.to_dict()
+        if logo:
+            extra['logo'] = logo
+        if logo_mono:
+            extra['logo_mono'] = logo_mono
+        if logo_alt:
+            extra['logo_alt'] = logo_alt
+        book.extra_info = json.dumps(extra)
          book.load_abstract()
          book.load_toc()
          book.save()
  
          meta_tags = Tag.tags_from_info(book_info)
  
-        for tag in meta_tags:
-            if not tag.for_books:
-                tag.for_books = True
-                tag.save()
-
-        book.tags = set(meta_tags + book_shelves)
+        just_tags = [t for (t, rel) in meta_tags if not rel]
+        book.tags = set(just_tags + book_shelves)
          book.save()  # update sort_key_author
  
+        book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
+
          cover_changed = old_cover != book.cover_info()
          obsolete_children = set(b for b in book.children.all()
                                  if b not in children)
@@ -701,6 +775,7 @@ class Book(models.Model):
          for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
              if format_ not in dont_build:
                  getattr(book, '%s_file' % format_).build_delay()
+        book.html_nonotes_file.build_delay()
  
          if not settings.NO_SEARCH_INDEX and search_index and findable:
              tasks.index_book.delay(book.id)
@@ -729,8 +804,12 @@ class Book(models.Model):
              else:
                  entity, entity_created = Entity.objects.get_or_create(uri=uri)
                  if entity_created:
-                    entity.populate()
-                    entity.save()
+                    try:
+                        entity.populate()
+                    except:
+                        pass
+                    else:
+                        entity.save()
                  ref, ref_created = entity.reference_set.get_or_create(book=self)
                  refs[uri] = ref
                  if not ref_created:
@@ -755,6 +834,42 @@ class Book(models.Model):
      def references(self):
          return self.reference_set.all().select_related('entity')
  
+    def update_has_audio(self):
+        self.has_audio = False
+        if self.media.filter(type='mp3').exists():
+            self.has_audio = True
+        if self.descendant.filter(has_audio=True).exists():
+            self.has_audio = True
+        self.save(update_fields=['has_audio'])
+        if self.parent is not None:
+            self.parent.update_has_audio()
+
+    def update_narrators(self):
+        narrator_names = set()
+        for bm in self.media.filter(type='mp3'):
+            narrator_names.update(set(
+                a.strip() for a in re.split(r',|\si\s', bm.artist)
+            ))
+        narrators = []
+
+        for name in narrator_names:
+            if not name: continue
+            slug = slugify(name)
+            try:
+                t = Tag.objects.get(category='author', slug=slug)
+            except Tag.DoesNotExist:
+                sort_key = sortify(
+                    ' '.join(name.rsplit(' ', 1)[::-1]).lower()
+                )
+                t = Tag.objects.create(
+                    category='author',
+                    name_pl=name,
+                    slug=slug,
+                    sort_key=sort_key,
+                )
+            narrators.append(t)
+        self.narrators.set(narrators)
+
      @classmethod
      @transaction.atomic
      def repopulate_ancestors(cls):