1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
4 from copy import deepcopy
10 from librarian import OutputFile, get_resource
11 from librarian.html import raw_printable_text
12 from .html import DaisyHtmlBuilder
15 def get_duration(path):
43 return "%02d:%02d:%02.3f" % (hours, minutes, seconds)
46 def populate(element, context):
48 element.text = element.text.format(**context)
50 element.tail = element.tail.format(**context)
51 for k, v in element.attrib.items():
52 element.attrib[k] = v.format(**context)
54 populate(child, context)
58 file_extension = 'daisy.zip'
60 def build(self, document, mp3, split_on=None, **kwargs):
62 raise ValueError("Need MP3 files")
64 outfile = tempfile.NamedTemporaryFile(delete=False)
65 zipf = zipfile.ZipFile(outfile, 'w')
67 directory = document.meta.url.slug + '/'
77 newdoc = deepcopy(document)
78 newdoc.tree.getroot().document = newdoc
80 master = newdoc.tree.getroot()[-1]
82 for item in list(master):
83 if item.tag == split_on:
87 headers.append(raw_printable_text(item))
88 if i != n and not (n == 1 and not i):
93 documents.append(newdoc)
95 documents = [document]
96 headers = [document.meta.title]
98 assert len(documents) == len(mp3)
100 narrator = mutagen.File(mp3[0]).get('TPE1')
101 narrator = narrator.text[0] if narrator else ''
104 for i, part in enumerate(documents):
106 html = DaisyHtmlBuilder().build(part)
109 directory + 'book%d.html' % i,
112 durations.append(get_duration(mp3[i]))
115 directory + "book%d.mp3" % i,
118 config_string = "task_language=pol|is_text_type=unparsed|is_text_unparsed_id_regex=sec\\d+$|is_text_unparsed_id_sort=numeric|os_task_file_format=tab"
119 task = Task(config_string=config_string)
121 with tempfile.TemporaryDirectory() as temp:
122 syncfile = temp + "/sync"
123 task.audio_file_path_absolute = mp3[i]
124 task.text_file_path_absolute = html.get_filename()
125 task.sync_map_file_path_absolute = syncfile
127 ExecuteTask(task).execute()
128 task.output_sync_map_file()
131 with open(syncfile) as f:
133 start, end, sec = line.strip().split('\t')
136 sync.append([start, end, sec])
138 hms = format_hms(durations[i])
139 elapsed_hms = format_hms(sum(durations[:i]))
145 "HHMMSS": hms.split('.')[0],
146 "Sd": "%.1f" % durations[i],
147 "ELAPSED": elapsed_hms,
149 "TITLE": document.meta.title,
150 "PUBLISHER": document.meta.publisher[0],
151 "YEAR": document.meta.created_at[:4],
152 "MONTH": document.meta.created_at[5:7],
153 "AUTHOR": document.meta.author.readable(),
155 "NARRATOR": narrator,
158 with open(get_resource('res/daisy/content.smil')) as f:
159 tree = etree.parse(f)
160 populate(tree.getroot(), context)
162 seq = tree.find('//seq')
163 for si, item in enumerate(sync):
164 par = etree.SubElement(seq, 'par', id="par%06d" % (si + 1), endsync="last")
168 src="book%d.html#%s" % (i, item[2]))
170 audio = etree.SubElement(
173 src="book%d.mp3" % i,
175 "clip-begin": "npt=%.3fs" % item[0],
176 "clip-end": "npt=%.3fs" % item[1],
181 directory + 'content%d.smil' % i,
184 xml_declaration=True,
189 for fname in ('smil10.dtd', 'xhtml1-transitional.dtd', 'xhtml-lat1.ent', 'xhtml-special.ent', 'xhtml-symbol.ent'):
191 get_resource('res/daisy/' + fname),
194 duration = sum(durations)
195 hms = format_hms(duration)
199 "HHMMSS": hms.split('.')[0],
200 "Sd": "%.1f" % duration,
201 "TITLE": document.meta.title,
202 "PUBLISHER": document.meta.publisher[0],
203 "YEAR": document.meta.created_at[:4],
204 "MONTH": document.meta.created_at[5:7],
205 "AUTHOR": document.meta.author.readable(),
206 "NARRATOR": narrator,
209 tree = etree.parse(get_resource('res/daisy/er_book_info.xml'))
210 cont = tree.getroot()[0]
211 for i, dur in enumerate(durations):
212 etree.SubElement(cont, 'smil', nr=str(i+1), Name="content%i.smil" % i, dur="%.1f" % dur)
214 directory + 'er_book_info.xml',
215 etree.tostring(tree, xml_declaration=True))
217 tree = etree.parse(get_resource('res/daisy/master.smil'))
218 populate(tree.getroot(), context)
219 cont = tree.getroot()[-1]
220 for i, header in enumerate(headers):
221 etree.SubElement(cont, 'ref', title=header, src="content%d.smil#seq000001" % i, id="smil_%04d" % i)
223 directory + 'master.smil',
224 etree.tostring(tree, xml_declaration=True))
226 tree = etree.parse(get_resource('res/daisy/ncc.html'))
227 populate(tree.getroot(), context)
228 cont = tree.getroot()[-1]
229 for i, header in enumerate(headers):
231 h1 = etree.SubElement(
232 cont, 'h1', id='content', **{"class": "title"})
234 h1, "a", href='content%d.smil#par000001' % i).text = document.meta.title
236 h2 = etree.SubElement(
237 cont, 'h2', id='content', **{"class": "chapter"})
239 h2, "a", href='content%d.smil#par000001' % i).text = header
242 directory + 'ncc.html',
243 etree.tostring(tree, xml_declaration=True))
246 return OutputFile.from_filename(outfile.name)