+from collections import defaultdict
import json
import re
from sys import argv
'begin': ('_ignore', True, {'class': 'reference'}, {'data-uri': 'href'}, False),
'end': ('_ignore', True, {'class': 'reference'}, {'data-uri': 'href'}, False),
- 'motyw': ('a', True, {'class': 'theme'}, None, False),
+ 'motyw': ('_ignore', True, {'class': 'theme'}, None, False),
'pa': ('a', True, {'class': 'footnote footnote-pa'}, None, False),
'pe': ('a', True, {'class': 'footnote footnote-pe'}, None, False),
'ptrad': ('a', True, {'class': 'footnote footnote-ptrad'}, None, False),
}
+id_prefixes = {
+ 'pa': 'fn',
+ 'pe': 'fn',
+ 'pr': 'fn',
+ 'pt': 'fn',
+ 'ptrad': 'fn',
+ 'wers': 'f',
+ 'wers_wciety': 'f',
+ 'wers_cd': 'f',
+ 'wers_akap': 'f',
+ 'zastepnik_wersu': 'f',
+ 'wers_do_prawej': 'f',
+ 'wers_srodek': 'f',
+ 'akap': 'f',
+ 'akap_cd': 'f',
+ 'akap_dialog': 'f',
+}
+
#tree = etree.parse(argv[1])
if 'dlugi_cytat' not in S['stack'] and 'poezja_cyt' not in S['stack']:
S['vindex'] += 1
output['visibleNumber'] = S['vindex']
+ id_prefix = id_prefixes.get(elem.tag, 'i')
+ S['id'][id_prefix] += 1
+ output['id'] = id_prefix + str(S['id'][id_prefix])
+ if elem.attrib.get('id'):
+ output['id'] = 'wl-' + elem.attrib.get('id')
if attrs:
output['attr'] = attrs.copy()
if attr_map:
S = {
'index': 0,
'vindex': 0,
+ 'id': defaultdict(lambda: 0),
'stack': [],
'front1': [],
'front2': [],