X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/e24e657ebf536f55c1bde66bd41563fae30a98b0..3d8d1e8e463efe111ed2e2a99861ada52759a1d9:/src/catalogue/models/book.py diff --git a/src/catalogue/models/book.py b/src/catalogue/models/book.py index 35d0e8c4e..0400656e2 100644 --- a/src/catalogue/models/book.py +++ b/src/catalogue/models/book.py @@ -1,5 +1,5 @@ -# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Wolne Lektury. See NOTICE for more information. # from collections import OrderedDict import json @@ -7,6 +7,8 @@ from datetime import date, timedelta from random import randint import os.path import re +from slugify import slugify +from sortify import sortify from urllib.request import urlretrieve from django.apps import apps from django.conf import settings @@ -43,6 +45,7 @@ class Book(models.Model): common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True) language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE) description = models.TextField('opis', blank=True) + license = models.CharField('licencja', max_length=255, blank=True, db_index=True) abstract = models.TextField('abstrakt', blank=True) toc = models.TextField('spis treści', blank=True) created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True) @@ -62,6 +65,7 @@ class Book(models.Model): # files generated during publication xml_file = fields.XmlField(storage=bofh_storage, with_etag=False) html_file = fields.HtmlField(storage=bofh_storage) + html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage) fb2_file = fields.Fb2Field(storage=bofh_storage) txt_file = fields.TxtField(storage=bofh_storage) epub_file = fields.EpubField(storage=bofh_storage) @@ -79,7 +83,7 @@ class Book(models.Model): 'okładka dla Ebookpoint') ebook_formats = constants.EBOOK_FORMATS - formats = ebook_formats + ['html', 'xml'] + formats = ebook_formats + ['html', 'xml', 'html_nonotes'] parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children') ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False) @@ -91,6 +95,9 @@ class Book(models.Model): tagged = managers.ModelTaggedItemManager(Tag) tags = managers.TagDescriptor(Tag) tag_relations = GenericRelation(Tag.intermediary_table_model) + translators = models.ManyToManyField(Tag, blank=True) + narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated') + has_audio = models.BooleanField(default=False) html_built = django.dispatch.Signal() published = django.dispatch.Signal() @@ -154,12 +161,6 @@ class Book(models.Model): def genre_unicode(self): return self.tag_unicode('genre') - def translators(self): - translators = self.get_extra_info_json().get('translators') or [] - return [ - '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators - ] - def translator(self): translators = self.get_extra_info_json().get('translators') if not translators: @@ -269,17 +270,6 @@ class Book(models.Model): return sibling.get_first_text() return self.parent.get_next_text(inside=False) - def get_child_audiobook(self): - BookMedia = apps.get_model('catalogue', 'BookMedia') - if not BookMedia.objects.filter(book__ancestor=self).exists(): - return None - for child in self.children.order_by('parent_number').all(): - if child.has_mp3_file(): - return child - child_sub = child.get_child_audiobook() - if child_sub is not None: - return child_sub - def get_siblings(self): if not self.parent: return [] @@ -331,15 +321,15 @@ class Book(models.Model): total += app_settings.GET_MP3_LENGTH(media.file.path) return int(total) + def get_time(self): + return round(self.xml_file.size / 1000 * 40) + def has_media(self, type_): if type_ in Book.formats: return bool(getattr(self, "%s_file" % type_)) else: return self.media.filter(type=type_).exists() - def has_audio(self): - return self.has_media('mp3') - def get_media(self, type_): if self.has_media(type_): if type_ in Book.formats: @@ -377,6 +367,9 @@ class Book(models.Model): def html_url(self): return self.media_url('html') + def html_nonotes_url(self): + return self.media_url('html_nonotes') + def pdf_url(self): return self.media_url('pdf') @@ -416,8 +409,69 @@ class Book(models.Model): has_daisy_file.boolean = True def has_sync_file(self): - return self.has_media("sync") + return settings.FEATURE_SYNCHRO and self.has_media("sync") + + def build_sync_file(self): + from lxml import html + from django.core.files.base import ContentFile + with self.html_file.open('rb') as f: + h = html.fragment_fromstring(f.read().decode('utf-8')) + + durations = [ + m['mp3'].duration + for m in self.get_audiobooks()[0] + ] + if settings.MOCK_DURATIONS: + durations = settings.MOCK_DURATIONS + + sync = [] + ts = None + sid = 1 + dirty = False + for elem in h.iter(): + if elem.get('data-audio-ts'): + part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts')) + ts = str(round(sum(durations[:part - 1]) + ts, 3)) + # check if inside verse + p = elem.getparent() + while p is not None: + # Workaround for missing ids. + if 'verse' in p.get('class', ''): + if not p.get('id'): + p.set('id', f'syn{sid}') + dirty = True + sid += 1 + sync.append((ts, p.get('id'))) + ts = None + break + p = p.getparent() + elif ts: + cls = elem.get('class', '') + # Workaround for missing ids. + if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'): + if not elem.get('id'): + elem.set('id', f'syn{sid}') + dirty = True + sid += 1 + sync.append((ts, elem.get('id'))) + ts = None + if dirty: + htext = html.tostring(h, encoding='utf-8') + with open(self.html_file.path, 'wb') as f: + f.write(htext) + try: + bm = self.media.get(type='sync') + except: + bm = BookMedia(book=self, type='sync') + sync = ( + '27\n' + '\n'.join( + f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1]) + )).encode('latin1') + bm.file.save( + None, ContentFile(sync) + ) + def get_sync(self): with self.get_media('sync').first().file.open('r') as f: sync = f.read().split('\n') @@ -441,7 +495,7 @@ class Book(models.Model): def media_audio_epub(self): return self.get_media('audio.epub') - def get_audiobooks(self): + def get_audiobooks(self, with_children=False, processing=False): ogg_files = {} for m in self.media.filter(type='ogg').order_by().iterator(): ogg_files[m.name] = m @@ -467,13 +521,27 @@ class Book(models.Model): media['ogg'] = ogg audiobooks.append(media) - projects = sorted(projects) - total_duration = '%d:%02d' % ( - total_duration // 60, - total_duration % 60 - ) + if with_children: + for child in self.get_children(): + ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks( + with_children=True, processing=True) + audiobooks.append({'part': child}) + audiobooks += ch_audiobooks + projects.update(ch_projects) + total_duration += ch_duration + + if not processing: + projects = sorted(projects) + total_duration = '%d:%02d' % ( + total_duration // 60, + total_duration % 60 + ) + return audiobooks, projects, total_duration + def get_audiobooks_with_children(self): + return self.get_audiobooks(with_children=True) + def wldocument(self, parse_dublincore=True, inherit=True): from catalogue.import_utils import ORMDocProvider from librarian.parser import WLDocument @@ -594,7 +662,7 @@ class Book(models.Model): @classmethod def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True, - remote_gallery_url=None, days=0, findable=True): + remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None): from catalogue import tasks if dont_build is None: @@ -637,25 +705,31 @@ class Book(models.Model): book.findable = findable book.language = book_info.language book.title = book_info.title + book.license = book_info.license or '' if book_info.variant_of: book.common_slug = book_info.variant_of.slug else: book.common_slug = book.slug - book.extra_info = json.dumps(book_info.to_dict()) + extra = book_info.to_dict() + if logo: + extra['logo'] = logo + if logo_mono: + extra['logo_mono'] = logo_mono + if logo_alt: + extra['logo_alt'] = logo_alt + book.extra_info = json.dumps(extra) book.load_abstract() book.load_toc() book.save() meta_tags = Tag.tags_from_info(book_info) - for tag in meta_tags: - if not tag.for_books: - tag.for_books = True - tag.save() - - book.tags = set(meta_tags + book_shelves) + just_tags = [t for (t, rel) in meta_tags if not rel] + book.tags = set(just_tags + book_shelves) book.save() # update sort_key_author + book.translators.set([t for (t, rel) in meta_tags if rel == 'translator']) + cover_changed = old_cover != book.cover_info() obsolete_children = set(b for b in book.children.all() if b not in children) @@ -701,6 +775,7 @@ class Book(models.Model): for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN: if format_ not in dont_build: getattr(book, '%s_file' % format_).build_delay() + book.html_nonotes_file.build_delay() if not settings.NO_SEARCH_INDEX and search_index and findable: tasks.index_book.delay(book.id) @@ -729,8 +804,12 @@ class Book(models.Model): else: entity, entity_created = Entity.objects.get_or_create(uri=uri) if entity_created: - entity.populate() - entity.save() + try: + entity.populate() + except: + pass + else: + entity.save() ref, ref_created = entity.reference_set.get_or_create(book=self) refs[uri] = ref if not ref_created: @@ -755,6 +834,42 @@ class Book(models.Model): def references(self): return self.reference_set.all().select_related('entity') + def update_has_audio(self): + self.has_audio = False + if self.media.filter(type='mp3').exists(): + self.has_audio = True + if self.descendant.filter(has_audio=True).exists(): + self.has_audio = True + self.save(update_fields=['has_audio']) + if self.parent is not None: + self.parent.update_has_audio() + + def update_narrators(self): + narrator_names = set() + for bm in self.media.filter(type='mp3'): + narrator_names.update(set( + a.strip() for a in re.split(r',|\si\s', bm.artist) + )) + narrators = [] + + for name in narrator_names: + if not name: continue + slug = slugify(name) + try: + t = Tag.objects.get(category='author', slug=slug) + except Tag.DoesNotExist: + sort_key = sortify( + ' '.join(name.rsplit(' ', 1)[::-1]).lower() + ) + t = Tag.objects.create( + category='author', + name_pl=name, + slug=slug, + sort_key=sort_key, + ) + narrators.append(t) + self.narrators.set(narrators) + @classmethod @transaction.atomic def repopulate_ancestors(cls):