1 from copy import deepcopy
5 from aeneas.executetask import ExecuteTask
6 from aeneas.task import Task
9 from librarian import OutputFile, get_resource
10 from librarian.html import raw_printable_text
11 from .html import DaisyHtmlBuilder
14 def get_duration(path):
42 return "%02d:%02d:%02.3f" % (hours, minutes, seconds)
45 def populate(element, context):
47 element.text = element.text.format(**context)
49 element.tail = element.tail.format(**context)
50 for k, v in element.attrib.items():
51 element.attrib[k] = v.format(**context)
53 populate(child, context)
57 file_extension = 'daisy.zip'
59 def build(self, document, mp3, split_on=None, **kwargs):
61 raise ValueError("Need MP3 files")
63 outfile = tempfile.NamedTemporaryFile(delete=False)
64 zipf = zipfile.ZipFile(outfile, 'w')
66 directory = document.meta.url.slug + '/'
76 newdoc = deepcopy(document)
77 newdoc.tree.getroot().document = newdoc
79 master = newdoc.tree.getroot()[-1]
81 for item in list(master):
82 if item.tag == split_on:
86 headers.append(raw_printable_text(item))
87 if i != n and not (n == 1 and not i):
92 documents.append(newdoc)
94 documents = [document]
95 headers = [document.meta.title]
97 assert len(documents) == len(mp3)
99 narrator = mutagen.File(mp3[0]).get('TPE1')
100 narrator = narrator.text[0] if narrator else ''
103 for i, part in enumerate(documents):
105 html = DaisyHtmlBuilder().build(part)
108 directory + 'book%d.html' % i,
111 durations.append(get_duration(mp3[i]))
114 directory + "book%d.mp3" % i,
117 config_string = "task_language=pol|is_text_type=unparsed|is_text_unparsed_id_regex=sec\d+$|is_text_unparsed_id_sort=numeric|os_task_file_format=tab"
118 task = Task(config_string=config_string)
120 with tempfile.TemporaryDirectory() as temp:
121 syncfile = temp + "/sync"
122 task.audio_file_path_absolute = mp3[i]
123 task.text_file_path_absolute = html.get_filename()
124 task.sync_map_file_path_absolute = syncfile
126 ExecuteTask(task).execute()
127 task.output_sync_map_file()
130 with open(syncfile) as f:
132 start, end, sec = line.strip().split('\t')
135 sync.append([start, end, sec])
137 hms = format_hms(durations[i])
138 elapsed_hms = format_hms(sum(durations[:i]))
144 "HHMMSS": hms.split('.')[0],
145 "Sd": "%.1f" % durations[i],
146 "ELAPSED": elapsed_hms,
148 "TITLE": document.meta.title,
149 "PUBLISHER": document.meta.publisher[0],
150 "YEAR": document.meta.created_at[:4],
151 "MONTH": document.meta.created_at[5:7],
152 "AUTHOR": document.meta.author.readable(),
154 "NARRATOR": narrator,
157 with open(get_resource('res/daisy/content.smil')) as f:
158 tree = etree.parse(f)
159 populate(tree.getroot(), context)
161 seq = tree.find('//seq')
162 for si, item in enumerate(sync):
163 par = etree.SubElement(seq, 'par', id="par%06d" % (si + 1), endsync="last")
167 src="book%d.html#%s" % (i, item[2]))
169 audio = etree.SubElement(
172 src="book%d.mp3" % i,
174 "clip-begin": "npt=%.3fs" % item[0],
175 "clip-end": "npt=%.3fs" % item[1],
180 directory + 'content%d.smil' % i,
183 xml_declaration=True,
188 for fname in ('smil10.dtd', 'xhtml1-transitional.dtd', 'xhtml-lat1.ent', 'xhtml-special.ent', 'xhtml-symbol.ent'):
190 get_resource('res/daisy/' + fname),
193 duration = sum(durations)
194 hms = format_hms(duration)
198 "HHMMSS": hms.split('.')[0],
199 "Sd": "%.1f" % duration,
200 "TITLE": document.meta.title,
201 "PUBLISHER": document.meta.publisher[0],
202 "YEAR": document.meta.created_at[:4],
203 "MONTH": document.meta.created_at[5:7],
204 "AUTHOR": document.meta.author.readable(),
205 "NARRATOR": narrator,
208 tree = etree.parse(get_resource('res/daisy/er_book_info.xml'))
209 cont = tree.getroot()[0]
210 for i, dur in enumerate(durations):
211 etree.SubElement(cont, 'smil', nr=str(i+1), Name="content%i.smil" % i, dur="%.1f" % dur)
213 directory + 'er_book_info.xml',
214 etree.tostring(tree, xml_declaration=True))
216 tree = etree.parse(get_resource('res/daisy/master.smil'))
217 populate(tree.getroot(), context)
218 cont = tree.getroot()[-1]
219 for i, header in enumerate(headers):
220 etree.SubElement(cont, 'ref', title=header, src="content%d.smil#seq000001" % i, id="smil_%04d" % i)
222 directory + 'master.smil',
223 etree.tostring(tree, xml_declaration=True))
225 tree = etree.parse(get_resource('res/daisy/ncc.html'))
226 populate(tree.getroot(), context)
227 cont = tree.getroot()[-1]
228 for i, header in enumerate(headers):
230 h1 = etree.SubElement(
231 cont, 'h1', id='content', **{"class": "title"})
233 h1, "a", href='content%d.smil#par000001' % i).text = document.meta.title
235 h2 = etree.SubElement(
236 cont, 'h2', id='content', **{"class": "chapter"})
238 h2, "a", href='content%d.smil#par000001' % i).text = header
241 directory + 'ncc.html',
242 etree.tostring(tree, xml_declaration=True))
245 return OutputFile.from_filename(outfile.name)