X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/11da1ebe840102327816e08dd1de12fa8b75ca0f..362e390a3a0948ed3c2aa54b966a898e90b231a3:/src/catalogue/api/tojson.py diff --git a/src/catalogue/api/tojson.py b/src/catalogue/api/tojson.py index f28351f68..b803e73da 100644 --- a/src/catalogue/api/tojson.py +++ b/src/catalogue/api/tojson.py @@ -54,7 +54,7 @@ tags = { 'osoba': ('em', True, {'class': 'osoba'}, None, None), 'didaskalia': ('div', True, {'class': 'didaskalia'}, None, None), 'kwestia': ('div', False, {'class': 'kwestia'}, None, None), - 'didask_tekst': ('em', False, {'class': 'didask_tekst'}, None, None), + 'didask_tekst': ('em', True, {'class': 'didask_tekst'}, None, None), 'naglowek_czesc': ('h2', True, None, None, None), 'naglowek_akt': ('h2', True, None, None, None), @@ -82,13 +82,50 @@ tags = { 'begin': ('_ignore', True, {'class': 'reference'}, {'data-uri': 'href'}, False), 'end': ('_ignore', True, {'class': 'reference'}, {'data-uri': 'href'}, False), - 'motyw': ('a', True, {'class': 'theme'}, None, False), + 'motyw': ('_ignore', True, {'class': 'theme'}, None, False), 'pa': ('a', True, {'class': 'footnote footnote-pa'}, None, False), 'pe': ('a', True, {'class': 'footnote footnote-pe'}, None, False), 'pr': ('a', True, {'class': 'footnote footnote-pr'}, None, False), 'pt': ('a', True, {'class': 'footnote footnote-pt'}, None, False), 'ptrad': ('a', True, {'class': 'footnote footnote-ptrad'}, None, False), + + 'werset': ('p', True, {'class': 'werset'}, None, True), + 'br': ('br', False, None, None, None), + 'indeks_dolny': ('em', True, {'class': 'indeks_dolny'}, None, False), + 'mat': ('span', True, {'class': 'mat'}, None, False), + + 'mfenced': ('math_mfenced', True, None, None, False), + 'mfrac': ('math_mfrac', True, None, None, False), + 'mrow': ('math_mrow', True, None, None, False), + 'mi': ('math_mi', True, None, None, False), + 'mn': ('math_mn', True, None, None, False), + 'mo': ('math_mo', True, None, None, False), + 'msup': ('math_msup', True, None, None, False), + + 'list': ('blockquote', False, {'class': 'list'}, None, None), + 'wywiad_pyt': ('blockquote', False, {'class': 'wywiad_pyt'}, None, None), + 'wywiad_odp': ('blockquote', False, {'class': 'wywiad_odp'}, None, None), + 'rownolegle': ('blockquote', False, {'class': 'rownolegle'}, None, None), + 'animacja': ('div', False, {'class': 'animacja'}, None, None), + 'data': ('div', True, {'class': 'data'}, None, None), + 'podpis': ('div', True, {'class': 'podpis'}, None, None), + 'naglowek_listu': ('div', True, {'class': 'naglowek_listu'}, None, None), + 'pozdrowienie': ('div', True, {'class': 'pozdrowienie'}, None, None), + 'adresat': ('div', True, {'class': 'adresat'}, None, None), + 'tytul_oryg': ('div', True, {'class': 'tytul_oryg'}, None, None), + 'miejsce_data': ('div', True, {'class': 'miejsce_data'}, None, None), + 'audio': ('_ignore', False, None, None, None), + 'www': ('a', True, {'class': 'www'}, {'href': '.text'}, False), + + 'tabela': ('table', False, None, None, None), + 'tabelka': ('table', False, None, None, None), + 'wiersz': ('tr', False, None, None, None), + 'kol': ('td', True, None, None, None), + + 'ilustr': ('img', False, None, {'src': 'src'}, False), + 'tab': ('span', False, {'class': 'tab'}, {'szer': 'szer'}, False), + } id_prefixes = { @@ -121,7 +158,7 @@ front2 = set(['autor_utworu']) def norm(text): - text = text.replace('---', '—').replace('--', '–').replace('...', '…').replace(',,', '„').replace('"', '”') + text = text.replace('---', '—').replace('--', '–').replace('...', '…').replace(',,', '„').replace('"', '”').replace('\n', ' ') return text @@ -146,12 +183,18 @@ def toj(elem, S): id_prefix = id_prefixes.get(elem.tag, 'i') S['id'][id_prefix] += 1 output['id'] = id_prefix + str(S['id'][id_prefix]) + if elem.attrib.get('id'): + output['id'] = 'wl-' + elem.attrib.get('id') if attrs: output['attr'] = attrs.copy() if attr_map: output.setdefault('attr', {}) for k, v in attr_map.items(): - output['attr'][k] = elem.attrib[v] + if v == '.text': + val = elem.text + else: + val = elem.attrib[v] + output['attr'][k] = val output['contents'] = contents output = [output] if elem.tag == 'strofa':