4 from aeneas.executetask import ExecuteTask
5 from aeneas.task import Task
8 from librarian import OutputFile, get_resource
9 from .html import DaisyHtmlBuilder
12 def get_duration(path):
40 return "%02d:%02d:%02.3f" % (hours, minutes, seconds)
43 def populate(element, context):
45 element.text = element.text.format(**context)
47 element.tail = element.tail.format(**context)
48 for k, v in element.attrib.items():
49 element.attrib[k] = v.format(**context)
51 populate(child, context)
55 file_extension = 'daisy.zip'
57 def build(self, document, mp3, **kwargs):
59 raise ValueError("Need MP3 files")
61 outfile = tempfile.NamedTemporaryFile(delete=False)
62 zipf = zipfile.ZipFile(outfile, 'w')
64 directory = document.meta.url.slug + '/'
66 html = DaisyHtmlBuilder().build(document)
69 directory + 'book.html',
73 for i, mp3_file in enumerate(mp3):
74 durations.append(get_duration(mp3_file))
77 directory + "book%d.mp3" % i,
79 duration = sum(durations)
81 config_string = "task_language=pol|is_text_type=unparsed|is_text_unparsed_id_regex=sec\d+$|is_text_unparsed_id_sort=numeric|os_task_file_format=tab"
82 task = Task(config_string=config_string)
84 # TODO: concatenate all the
85 with tempfile.TemporaryDirectory() as temp:
86 with open(temp + "/book.mp3", "wb") as m:
88 with open(minput, "rb") as minputf:
89 m.write(minputf.read())
92 syncfile = temp + "/sync"
93 task.audio_file_path_absolute = temp + "/book.mp3"
94 task.text_file_path_absolute = html.get_filename()
95 task.sync_map_file_path_absolute = syncfile
97 ExecuteTask(task).execute()
98 task.output_sync_map_file()
100 with open(syncfile) as f:
102 start, end, sec = line.strip().split('\t')
105 sync.append([start, end, sec])
107 hms = format_hms(duration)
109 narrator = mutagen.File(mp3[0]).get('TPE1')
110 narrator = narrator.text[0] if narrator else ''
116 "HHMMSS": hms.split('.')[0],
117 "Sd": "%.1f" % duration,
119 "TITLE": document.meta.title,
120 "PUBLISHER": document.meta.publisher[0],
121 "YEAR": document.meta.created_at[:4],
122 "MONTH": document.meta.created_at[5:7],
123 "AUTHOR": document.meta.author.readable(),
125 "NARRATOR": narrator,
128 for fname in ('smil10.dtd', 'xhtml1-transitional.dtd', 'xhtml-lat1.ent', 'xhtml-special.ent', 'xhtml-symbol.ent'):
130 get_resource('res/daisy/' + fname),
133 for fname in ('er_book_info.xml', 'master.smil', 'ncc.html'):
134 with open(get_resource('res/daisy/' + fname)) as f:
135 tree = etree.parse(f)
136 populate(tree.getroot(), context)
145 with open(get_resource('res/daisy/content.smil')) as f:
146 tree = etree.parse(f)
147 populate(tree.getroot(), context)
149 seq = tree.find('//seq')
150 for i, item in enumerate(sync):
151 par = etree.SubElement(seq, 'par', id="par%06d" % (i + 1), endsync="last")
155 src="book.html#%s" % item[2])
157 # If we have a split between mp3 parts, err on the larger side.
159 start, end = item[0], item[1]
160 while start >= durations[i]:
161 start -= durations[i]
164 if 2 * (end - durations[i]) > end - start:
169 audio = etree.SubElement(
172 src="book%d.mp3" % i,
174 "clip-begin": "npt=%.3fs" % start,
175 "clip-end": "npt=%.3fs" % end,
180 directory + 'content.smil',
183 xml_declaration=True,
192 return OutputFile.from_filename(outfile.name)