1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
 
   4 from copy import deepcopy
 
  10 from librarian import OutputFile, get_resource
 
  11 from librarian.html import raw_printable_text
 
  12 from .html import DaisyHtmlBuilder
 
  15 def get_duration(path):
 
  43     return "%02d:%02d:%02.3f" % (hours, minutes, seconds)    
 
  46 def populate(element, context):
 
  48         element.text = element.text.format(**context)
 
  50         element.tail = element.tail.format(**context)
 
  51     for k, v in element.attrib.items():
 
  52         element.attrib[k] = v.format(**context)
 
  54         populate(child, context)
 
  58     file_extension = 'daisy.zip'
 
  60     def build(self, document, mp3, split_on=None, **kwargs):
 
  62             raise ValueError("Need MP3 files")
 
  64         outfile = tempfile.NamedTemporaryFile(delete=False)
 
  65         zipf = zipfile.ZipFile(outfile, 'w')
 
  67         directory = document.meta.url.slug + '/'
 
  77                 newdoc = deepcopy(document)
 
  78                 newdoc.tree.getroot().document = newdoc
 
  80                 master = newdoc.tree.getroot()[-1]
 
  82                 for item in list(master):
 
  83                     if item.tag == split_on:
 
  87                             headers.append(raw_printable_text(item))
 
  88                     if i != n and not (n == 1 and not i):
 
  93                     documents.append(newdoc)
 
  95             documents = [document]
 
  96             headers = [document.meta.title]
 
  98         assert len(documents) == len(mp3)
 
 100         narrator = mutagen.File(mp3[0]).get('TPE1')
 
 101         narrator = narrator.text[0] if narrator else ''
 
 104         for i, part in enumerate(documents):
 
 106             html = DaisyHtmlBuilder().build(part)
 
 109                 directory + 'book%d.html' % i,
 
 112             durations.append(get_duration(mp3[i]))
 
 115                 directory + "book%d.mp3" % i,
 
 118             config_string = "task_language=pol|is_text_type=unparsed|is_text_unparsed_id_regex=sec\\d+$|is_text_unparsed_id_sort=numeric|os_task_file_format=tab"
 
 119             task = Task(config_string=config_string)
 
 121             with tempfile.TemporaryDirectory() as temp:
 
 122                 syncfile = temp + "/sync"
 
 123                 task.audio_file_path_absolute = mp3[i]
 
 124                 task.text_file_path_absolute = html.get_filename()
 
 125                 task.sync_map_file_path_absolute = syncfile
 
 127                 ExecuteTask(task).execute()
 
 128                 task.output_sync_map_file()
 
 131                 with open(syncfile) as f:
 
 133                         start, end, sec = line.strip().split('\t')
 
 136                         sync.append([start, end, sec])
 
 138             hms = format_hms(durations[i])
 
 139             elapsed_hms = format_hms(sum(durations[:i]))
 
 145                 "HHMMSS": hms.split('.')[0],
 
 146                 "Sd": "%.1f" % durations[i],
 
 147                 "ELAPSED": elapsed_hms,
 
 149                 "TITLE": document.meta.title,
 
 150                 "PUBLISHER": document.meta.publisher[0],
 
 151                 "YEAR": document.meta.created_at[:4],
 
 152                 "MONTH": document.meta.created_at[5:7],
 
 153                 "AUTHOR": document.meta.author.readable(),
 
 155                 "NARRATOR": narrator,
 
 158             with open(get_resource('res/daisy/content.smil')) as f:
 
 159                 tree = etree.parse(f)
 
 160             populate(tree.getroot(), context)
 
 162             seq = tree.find('//seq')
 
 163             for si, item in enumerate(sync):
 
 164                 par = etree.SubElement(seq, 'par', id="par%06d" % (si + 1), endsync="last")
 
 168                     src="book%d.html#%s" % (i, item[2]))
 
 170                 audio = etree.SubElement(
 
 173                     src="book%d.mp3" % i,
 
 175                         "clip-begin": "npt=%.3fs" % item[0],
 
 176                         "clip-end": "npt=%.3fs" % item[1],
 
 181                 directory + 'content%d.smil' % i,
 
 184                     xml_declaration=True,
 
 189         for fname in ('smil10.dtd', 'xhtml1-transitional.dtd', 'xhtml-lat1.ent', 'xhtml-special.ent', 'xhtml-symbol.ent'):
 
 191                 get_resource('res/daisy/' + fname),
 
 194         duration = sum(durations)
 
 195         hms = format_hms(duration)
 
 199             "HHMMSS": hms.split('.')[0],
 
 200             "Sd": "%.1f" % duration,
 
 201             "TITLE": document.meta.title,
 
 202             "PUBLISHER": document.meta.publisher[0],
 
 203             "YEAR": document.meta.created_at[:4],
 
 204             "MONTH": document.meta.created_at[5:7],
 
 205             "AUTHOR": document.meta.author.readable(),
 
 206             "NARRATOR": narrator,
 
 209         tree = etree.parse(get_resource('res/daisy/er_book_info.xml'))
 
 210         cont = tree.getroot()[0]
 
 211         for i, dur in enumerate(durations):
 
 212             etree.SubElement(cont, 'smil', nr=str(i+1), Name="content%i.smil" % i, dur="%.1f" % dur)
 
 214             directory + 'er_book_info.xml',
 
 215             etree.tostring(tree, xml_declaration=True))
 
 217         tree = etree.parse(get_resource('res/daisy/master.smil'))
 
 218         populate(tree.getroot(), context)
 
 219         cont = tree.getroot()[-1]
 
 220         for i, header in enumerate(headers):
 
 221             etree.SubElement(cont, 'ref', title=header, src="content%d.smil#seq000001" % i, id="smil_%04d" % i)
 
 223             directory + 'master.smil',
 
 224             etree.tostring(tree, xml_declaration=True))
 
 226         tree = etree.parse(get_resource('res/daisy/ncc.html'))
 
 227         populate(tree.getroot(), context)
 
 228         cont = tree.getroot()[-1]
 
 229         for i, header in enumerate(headers):
 
 231                 h1 = etree.SubElement(
 
 232                     cont, 'h1', id='content', **{"class": "title"})
 
 234                     h1, "a", href='content%d.smil#par000001' % i).text = document.meta.title
 
 236                 h2 = etree.SubElement(
 
 237                     cont, 'h2', id='content', **{"class": "chapter"})
 
 239                     h2, "a", href='content%d.smil#par000001' % i).text = header
 
 242             directory + 'ncc.html',
 
 243             etree.tostring(tree, xml_declaration=True))
 
 246         return OutputFile.from_filename(outfile.name)