1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
4 from copy import deepcopy
8 from aeneas.executetask import ExecuteTask
9 from aeneas.task import Task
10 from lxml import etree
12 from librarian import OutputFile, get_resource
13 from librarian.html import raw_printable_text
14 from .html import DaisyHtmlBuilder
17 def get_duration(path):
45 return "%02d:%02d:%02.3f" % (hours, minutes, seconds)
48 def populate(element, context):
50 element.text = element.text.format(**context)
52 element.tail = element.tail.format(**context)
53 for k, v in element.attrib.items():
54 element.attrib[k] = v.format(**context)
56 populate(child, context)
60 file_extension = 'daisy.zip'
62 def build(self, document, mp3, split_on=None, **kwargs):
64 raise ValueError("Need MP3 files")
66 outfile = tempfile.NamedTemporaryFile(delete=False)
67 zipf = zipfile.ZipFile(outfile, 'w')
69 directory = document.meta.url.slug + '/'
79 newdoc = deepcopy(document)
80 newdoc.tree.getroot().document = newdoc
82 master = newdoc.tree.getroot()[-1]
84 for item in list(master):
85 if item.tag == split_on:
89 headers.append(raw_printable_text(item))
90 if i != n and not (n == 1 and not i):
95 documents.append(newdoc)
97 documents = [document]
98 headers = [document.meta.title]
100 assert len(documents) == len(mp3)
102 narrator = mutagen.File(mp3[0]).get('TPE1')
103 narrator = narrator.text[0] if narrator else ''
106 for i, part in enumerate(documents):
108 html = DaisyHtmlBuilder().build(part)
111 directory + 'book%d.html' % i,
114 durations.append(get_duration(mp3[i]))
117 directory + "book%d.mp3" % i,
120 config_string = "task_language=pol|is_text_type=unparsed|is_text_unparsed_id_regex=sec\d+$|is_text_unparsed_id_sort=numeric|os_task_file_format=tab"
121 task = Task(config_string=config_string)
123 with tempfile.TemporaryDirectory() as temp:
124 syncfile = temp + "/sync"
125 task.audio_file_path_absolute = mp3[i]
126 task.text_file_path_absolute = html.get_filename()
127 task.sync_map_file_path_absolute = syncfile
129 ExecuteTask(task).execute()
130 task.output_sync_map_file()
133 with open(syncfile) as f:
135 start, end, sec = line.strip().split('\t')
138 sync.append([start, end, sec])
140 hms = format_hms(durations[i])
141 elapsed_hms = format_hms(sum(durations[:i]))
147 "HHMMSS": hms.split('.')[0],
148 "Sd": "%.1f" % durations[i],
149 "ELAPSED": elapsed_hms,
151 "TITLE": document.meta.title,
152 "PUBLISHER": document.meta.publisher[0],
153 "YEAR": document.meta.created_at[:4],
154 "MONTH": document.meta.created_at[5:7],
155 "AUTHOR": document.meta.author.readable(),
157 "NARRATOR": narrator,
160 with open(get_resource('res/daisy/content.smil')) as f:
161 tree = etree.parse(f)
162 populate(tree.getroot(), context)
164 seq = tree.find('//seq')
165 for si, item in enumerate(sync):
166 par = etree.SubElement(seq, 'par', id="par%06d" % (si + 1), endsync="last")
170 src="book%d.html#%s" % (i, item[2]))
172 audio = etree.SubElement(
175 src="book%d.mp3" % i,
177 "clip-begin": "npt=%.3fs" % item[0],
178 "clip-end": "npt=%.3fs" % item[1],
183 directory + 'content%d.smil' % i,
186 xml_declaration=True,
191 for fname in ('smil10.dtd', 'xhtml1-transitional.dtd', 'xhtml-lat1.ent', 'xhtml-special.ent', 'xhtml-symbol.ent'):
193 get_resource('res/daisy/' + fname),
196 duration = sum(durations)
197 hms = format_hms(duration)
201 "HHMMSS": hms.split('.')[0],
202 "Sd": "%.1f" % duration,
203 "TITLE": document.meta.title,
204 "PUBLISHER": document.meta.publisher[0],
205 "YEAR": document.meta.created_at[:4],
206 "MONTH": document.meta.created_at[5:7],
207 "AUTHOR": document.meta.author.readable(),
208 "NARRATOR": narrator,
211 tree = etree.parse(get_resource('res/daisy/er_book_info.xml'))
212 cont = tree.getroot()[0]
213 for i, dur in enumerate(durations):
214 etree.SubElement(cont, 'smil', nr=str(i+1), Name="content%i.smil" % i, dur="%.1f" % dur)
216 directory + 'er_book_info.xml',
217 etree.tostring(tree, xml_declaration=True))
219 tree = etree.parse(get_resource('res/daisy/master.smil'))
220 populate(tree.getroot(), context)
221 cont = tree.getroot()[-1]
222 for i, header in enumerate(headers):
223 etree.SubElement(cont, 'ref', title=header, src="content%d.smil#seq000001" % i, id="smil_%04d" % i)
225 directory + 'master.smil',
226 etree.tostring(tree, xml_declaration=True))
228 tree = etree.parse(get_resource('res/daisy/ncc.html'))
229 populate(tree.getroot(), context)
230 cont = tree.getroot()[-1]
231 for i, header in enumerate(headers):
233 h1 = etree.SubElement(
234 cont, 'h1', id='content', **{"class": "title"})
236 h1, "a", href='content%d.smil#par000001' % i).text = document.meta.title
238 h2 = etree.SubElement(
239 cont, 'h2', id='content', **{"class": "chapter"})
241 h2, "a", href='content%d.smil#par000001' % i).text = header
244 directory + 'ncc.html',
245 etree.tostring(tree, xml_declaration=True))
248 return OutputFile.from_filename(outfile.name)