From: Radek Czajka Date: Tue, 1 Jul 2025 10:23:25 +0000 (+0200) Subject: audio fix X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/eebe542f3d49cdce6d851587ddf8bd2cb1bd8ef1 audio fix --- diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 87faa4b0e..0559a74ab 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -40,7 +40,7 @@ mutagen==1.47 sorl-thumbnail==12.10.0 # home-brewed & dependencies -librarian==24.5.9 +librarian==24.5.10 # celery tasks celery[redis]==5.4.0 diff --git a/src/catalogue/models/book.py b/src/catalogue/models/book.py index c2b9eeddf..ff09853f5 100644 --- a/src/catalogue/models/book.py +++ b/src/catalogue/models/book.py @@ -421,6 +421,67 @@ class Book(models.Model): def has_sync_file(self): return settings.FEATURE_SYNCHRO and self.has_media("sync") + def build_sync_file(self): + from lxml import html + from django.core.files.base import ContentFile + with self.html_file.open('rb') as f: + h = html.fragment_fromstring(f.read().decode('utf-8')) + + durations = [ + m['mp3'].duration + for m in self.get_audiobooks()[0] + ] + if settings.MOCK_DURATIONS: + durations = settings.MOCK_DURATIONS + + sync = [] + ts = None + sid = 1 + dirty = False + for elem in h.iter(): + if elem.get('data-audio-ts'): + part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts')) + ts = str(round(sum(durations[:part - 1]) + ts, 3)) + # check if inside verse + p = elem.getparent() + while p is not None: + # Workaround for missing ids. + if 'verse' in p.get('class', ''): + if not p.get('id'): + p.set('id', f'syn{sid}') + dirty = True + sid += 1 + sync.append((ts, p.get('id'))) + ts = None + break + p = p.getparent() + elif ts: + cls = elem.get('class', '') + # Workaround for missing ids. + if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'): + if not elem.get('id'): + elem.set('id', f'syn{sid}') + dirty = True + sid += 1 + sync.append((ts, elem.get('id'))) + ts = None + if dirty: + htext = html.tostring(h, encoding='utf-8') + with open(self.html_file.path, 'wb') as f: + f.write(htext) + try: + bm = self.media.get(type='sync') + except: + bm = BookMedia(book=self, type='sync') + sync = ( + '27\n' + '\n'.join( + f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1]) + )).encode('latin1') + bm.file.save( + None, ContentFile(sync) + ) + + def get_sync(self): with self.get_media('sync').first().file.open('r') as f: sync = f.read().split('\n')