use ints for stats
[wolnelektury.git] / src / catalogue / models / book.py
index 4080cc2..d7c6f01 100644 (file)
@@ -7,6 +7,8 @@ from datetime import date, timedelta
 from random import randint
 import os.path
 import re
 from random import randint
 import os.path
 import re
+from slugify import slugify
+from sortify import sortify
 from urllib.request import urlretrieve
 from django.apps import apps
 from django.conf import settings
 from urllib.request import urlretrieve
 from django.apps import apps
 from django.conf import settings
@@ -92,9 +94,13 @@ class Book(models.Model):
     objects = models.Manager()
     tagged = managers.ModelTaggedItemManager(Tag)
     tags = managers.TagDescriptor(Tag)
     objects = models.Manager()
     tagged = managers.ModelTaggedItemManager(Tag)
     tags = managers.TagDescriptor(Tag)
-    tag_relations = GenericRelation(Tag.intermediary_table_model)
+    tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
     translators = models.ManyToManyField(Tag, blank=True)
     translators = models.ManyToManyField(Tag, blank=True)
-
+    narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
+    has_audio = models.BooleanField(default=False)
+    read_time = models.IntegerField(blank=True, null=True)
+    pages = models.IntegerField(blank=True, null=True)
+    
     html_built = django.dispatch.Signal()
     published = django.dispatch.Signal()
 
     html_built = django.dispatch.Signal()
     published = django.dispatch.Signal()
 
@@ -183,6 +189,10 @@ class Book(models.Model):
     def isbn_mobi(self):
         return self.get_extra_info_json().get('isbn_mobi')
 
     def isbn_mobi(self):
         return self.get_extra_info_json().get('isbn_mobi')
 
+    @property
+    def redakcja(self):
+        return self.get_extra_info_json().get('about')
+    
     def is_accessible_to(self, user):
         if not self.preview:
             return True
     def is_accessible_to(self, user):
         if not self.preview:
             return True
@@ -266,17 +276,6 @@ class Book(models.Model):
             return sibling.get_first_text()
         return self.parent.get_next_text(inside=False)
 
             return sibling.get_first_text()
         return self.parent.get_next_text(inside=False)
 
-    def get_child_audiobook(self):
-        BookMedia = apps.get_model('catalogue', 'BookMedia')
-        if not BookMedia.objects.filter(book__ancestor=self).exists():
-            return None
-        for child in self.children.order_by('parent_number').all():
-            if child.has_mp3_file():
-                return child
-            child_sub = child.get_child_audiobook()
-            if child_sub is not None:
-                return child_sub
-
     def get_siblings(self):
         if not self.parent:
             return []
     def get_siblings(self):
         if not self.parent:
             return []
@@ -337,9 +336,6 @@ class Book(models.Model):
         else:
             return self.media.filter(type=type_).exists()
 
         else:
             return self.media.filter(type=type_).exists()
 
-    def has_audio(self):
-        return self.has_media('mp3')
-
     def get_media(self, type_):
         if self.has_media(type_):
             if type_ in Book.formats:
     def get_media(self, type_):
         if self.has_media(type_):
             if type_ in Book.formats:
@@ -421,7 +417,69 @@ class Book(models.Model):
     def has_sync_file(self):
         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 
     def has_sync_file(self):
         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 
+    def build_sync_file(self):
+        from lxml import html
+        from django.core.files.base import ContentFile
+        with self.html_file.open('rb') as f:
+            h = html.fragment_fromstring(f.read().decode('utf-8'))
+
+        durations = [
+            m['mp3'].duration
+            for m in self.get_audiobooks()[0]
+        ]
+        if settings.MOCK_DURATIONS:
+            durations = settings.MOCK_DURATIONS
+
+        sync = []
+        ts = None
+        sid = 1
+        dirty = False
+        for elem in h.iter():
+            if elem.get('data-audio-ts'):
+                part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
+                ts = str(round(sum(durations[:part - 1]) + ts, 3))
+                # check if inside verse
+                p = elem.getparent()
+                while p is not None:
+                    # Workaround for missing ids.
+                    if 'verse' in p.get('class', ''):
+                        if not p.get('id'):
+                            p.set('id', f'syn{sid}')
+                            dirty = True
+                            sid += 1
+                        sync.append((ts, p.get('id')))
+                        ts = None
+                        break
+                    p = p.getparent()
+            elif ts:
+                cls = elem.get('class', '')
+                # Workaround for missing ids.
+                if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
+                    if not elem.get('id'):
+                        elem.set('id', f'syn{sid}')
+                        dirty = True
+                        sid += 1
+                    sync.append((ts, elem.get('id')))
+                    ts = None
+        if dirty:
+            htext = html.tostring(h, encoding='utf-8')
+            with open(self.html_file.path, 'wb') as f:
+                f.write(htext)
+        try:
+            bm = self.media.get(type='sync')
+        except:
+            bm = BookMedia(book=self, type='sync')
+        sync = (
+            '27\n' + '\n'.join(
+                f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
+            )).encode('latin1')
+        bm.file.save(
+            None, ContentFile(sync)
+            )
+
     def get_sync(self):
     def get_sync(self):
+        if not self.has_sync_file():
+            return []
         with self.get_media('sync').first().file.open('r') as f:
             sync = f.read().split('\n')
         offset = float(sync[0])
         with self.get_media('sync').first().file.open('r') as f:
             sync = f.read().split('\n')
         offset = float(sync[0])
@@ -431,8 +489,22 @@ class Book(models.Model):
                 continue
             start, end, elid = line.split()
             items.append([elid, float(start) + offset])
                 continue
             start, end, elid = line.split()
             items.append([elid, float(start) + offset])
-        return json.dumps(items)
-    
+        return items
+
+    def sync_ts(self, ts):
+        elid = None
+        for cur_id, t in self.get_sync():
+            if ts >= t:
+                elid = cur_id
+            else:
+                break
+        return elid
+
+    def sync_elid(self, elid):
+        for cur_id, t in self.get_sync():
+            if cur_id == elid:
+                return t
+
     def has_audio_epub_file(self):
         return self.has_media("audio.epub")
 
     def has_audio_epub_file(self):
         return self.has_media("audio.epub")
 
@@ -444,7 +516,7 @@ class Book(models.Model):
     def media_audio_epub(self):
         return self.get_media('audio.epub')
 
     def media_audio_epub(self):
         return self.get_media('audio.epub')
 
-    def get_audiobooks(self):
+    def get_audiobooks(self, with_children=False, processing=False):
         ogg_files = {}
         for m in self.media.filter(type='ogg').order_by().iterator():
             ogg_files[m.name] = m
         ogg_files = {}
         for m in self.media.filter(type='ogg').order_by().iterator():
             ogg_files[m.name] = m
@@ -470,13 +542,27 @@ class Book(models.Model):
                 media['ogg'] = ogg
             audiobooks.append(media)
 
                 media['ogg'] = ogg
             audiobooks.append(media)
 
-        projects = sorted(projects)
-        total_duration = '%d:%02d' % (
-            total_duration // 60,
-            total_duration % 60
-        )
+        if with_children:
+            for child in self.get_children():
+                ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
+                    with_children=True, processing=True)
+                audiobooks.append({'part': child})
+                audiobooks += ch_audiobooks
+                projects.update(ch_projects)
+                total_duration += ch_duration
+
+        if not processing:
+            projects = sorted(projects)
+            total_duration = '%d:%02d' % (
+                total_duration // 60,
+                total_duration % 60
+            )
+
         return audiobooks, projects, total_duration
 
         return audiobooks, projects, total_duration
 
+    def get_audiobooks_with_children(self):
+        return self.get_audiobooks(with_children=True)
+    
     def wldocument(self, parse_dublincore=True, inherit=True):
         from catalogue.import_utils import ORMDocProvider
         from librarian.parser import WLDocument
     def wldocument(self, parse_dublincore=True, inherit=True):
         from catalogue.import_utils import ORMDocProvider
         from librarian.parser import WLDocument
@@ -640,7 +726,7 @@ class Book(models.Model):
         book.findable = findable
         book.language = book_info.language
         book.title = book_info.title
         book.findable = findable
         book.language = book_info.language
         book.title = book_info.title
-        book.license = book_info.license
+        book.license = book_info.license or ''
         if book_info.variant_of:
             book.common_slug = book_info.variant_of.slug
         else:
         if book_info.variant_of:
             book.common_slug = book_info.variant_of.slug
         else:
@@ -657,6 +743,8 @@ class Book(models.Model):
         book.load_toc()
         book.save()
 
         book.load_toc()
         book.save()
 
+        book.update_stats()
+        
         meta_tags = Tag.tags_from_info(book_info)
 
         just_tags = [t for (t, rel) in meta_tags if not rel]
         meta_tags = Tag.tags_from_info(book_info)
 
         just_tags = [t for (t, rel) in meta_tags if not rel]
@@ -724,6 +812,16 @@ class Book(models.Model):
         cls.published.send(sender=cls, instance=book)
         return book
 
         cls.published.send(sender=cls, instance=book)
         return book
 
+    def update_stats(self):
+        stats = self.wldocument2().get_statistics()['total']
+        self.pages = round(
+            stats['verses_with_fn'] / 30 +
+            stats['chars_out_verse_with_fn'] / 1800)
+        self.read_time = round(self.get_time())
+        self.save(update_fields=['pages', 'read_time'])
+        if self.parent is not None:
+            self.parent.update_stats()
+
     def update_references(self):
         Entity = apps.get_model('references', 'Entity')
         doc = self.wldocument2()
     def update_references(self):
         Entity = apps.get_model('references', 'Entity')
         doc = self.wldocument2()
@@ -769,6 +867,42 @@ class Book(models.Model):
     def references(self):
         return self.reference_set.all().select_related('entity')
 
     def references(self):
         return self.reference_set.all().select_related('entity')
 
+    def update_has_audio(self):
+        self.has_audio = False
+        if self.media.filter(type='mp3').exists():
+            self.has_audio = True
+        if self.descendant.filter(has_audio=True).exists():
+            self.has_audio = True
+        self.save(update_fields=['has_audio'])
+        if self.parent is not None:
+            self.parent.update_has_audio()
+
+    def update_narrators(self):
+        narrator_names = set()
+        for bm in self.media.filter(type='mp3'):
+            narrator_names.update(set(
+                a.strip() for a in re.split(r',|\si\s', bm.artist)
+            ))
+        narrators = []
+
+        for name in narrator_names:
+            if not name: continue
+            slug = slugify(name)
+            try:
+                t = Tag.objects.get(category='author', slug=slug)
+            except Tag.DoesNotExist:
+                sort_key = sortify(
+                    ' '.join(name.rsplit(' ', 1)[::-1]).lower()
+                )
+                t = Tag.objects.create(
+                    category='author',
+                    name_pl=name,
+                    slug=slug,
+                    sort_key=sort_key,
+                )
+            narrators.append(t)
+        self.narrators.set(narrators)
+
     @classmethod
     @transaction.atomic
     def repopulate_ancestors(cls):
     @classmethod
     @transaction.atomic
     def repopulate_ancestors(cls):
@@ -885,6 +1019,12 @@ class Book(models.Model):
         elif isinstance(publisher, list):
             return ', '.join(publisher)
 
         elif isinstance(publisher, list):
             return ', '.join(publisher)
 
+    def get_recommended(self, limit=4):
+        books_qs = type(self).objects.filter(findable=True)
+        books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
+        books = type(self).tagged.related_to(self, books_qs)[:limit]
+        return books
+
     @classmethod
     def tagged_top_level(cls, tags):
         """ Returns top-level books tagged with `tags`.
     @classmethod
     def tagged_top_level(cls, tags):
         """ Returns top-level books tagged with `tags`.
@@ -993,7 +1133,7 @@ class Book(models.Model):
             return None
 
     def update_popularity(self):
             return None
 
     def update_popularity(self):
-        count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
+        count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
         try:
             pop = self.popularity
             pop.count = count
         try:
             pop = self.popularity
             pop.count = count
@@ -1004,16 +1144,23 @@ class Book(models.Model):
     def ridero_link(self):
         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 
     def ridero_link(self):
         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 
-    def like(self, user):
-        from social.utils import likes, get_set, set_sets
-        if not likes(user, self):
-            tag = get_set(user, '')
-            set_sets(user, self, [tag])
+    def elevenreader_link(self):
+        first_text = self.get_first_text()
+        if first_text is None:
+            return None
+        return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
 
 
-    def unlike(self, user):
-        from social.utils import likes, set_sets
-        if likes(user, self):
-            set_sets(user, self, [])
+    def content_warnings(self):
+        warnings_def = {
+            'wulgaryzmy': _('wulgaryzmy'),
+        }
+        warnings = self.get_extra_info_json().get('content_warnings', [])
+        warnings = [
+            warnings_def.get(w, w)
+            for w in warnings
+        ]
+        warnings.sort()
+        return warnings
 
     def full_sort_key(self):
         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
 
     def full_sort_key(self):
         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))