fixes

[wolnelektury.git] / src / catalogue / api / tojson.py
diff --git a/src/catalogue/api/tojson.py b/src/catalogue/api/tojson.py

index 633d4f6..3ff257a 100644 (file)
--- a/src/catalogue/api/tojson.py
+++ b/src/catalogue/api/tojson.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
  import json
  import re
  from sys import argv
  import json
  import re
  from sys import argv
@@ -81,13 +82,36 @@ tags = {
  
      'begin': ('_ignore', True, {'class': 'reference'}, {'data-uri': 'href'}, False),
      'end': ('_ignore', True, {'class': 'reference'}, {'data-uri': 'href'}, False),
  
      'begin': ('_ignore', True, {'class': 'reference'}, {'data-uri': 'href'}, False),
      'end': ('_ignore', True, {'class': 'reference'}, {'data-uri': 'href'}, False),
-    'motyw': ('a', True, {'class': 'theme'}, None, False),
+    'motyw': ('_ignore', True, {'class': 'theme'}, None, False),
  
      'pa': ('a', True, {'class': 'footnote footnote-pa'}, None, False),
      'pe': ('a', True, {'class': 'footnote footnote-pe'}, None, False),
      'pr': ('a', True, {'class': 'footnote footnote-pr'}, None, False),
      'pt': ('a', True, {'class': 'footnote footnote-pt'}, None, False),
      'ptrad': ('a', True, {'class': 'footnote footnote-ptrad'}, None, False),
  
      'pa': ('a', True, {'class': 'footnote footnote-pa'}, None, False),
      'pe': ('a', True, {'class': 'footnote footnote-pe'}, None, False),
      'pr': ('a', True, {'class': 'footnote footnote-pr'}, None, False),
      'pt': ('a', True, {'class': 'footnote footnote-pt'}, None, False),
      'ptrad': ('a', True, {'class': 'footnote footnote-ptrad'}, None, False),
+
+    'werset': ('p', True, {'class': 'werset'}, None, True),
+    'br': ('br', False, None, None, None),
+    'indeks_dolny': ('em', True, {'class': 'indeks_dolny'}, None, False),
+    'mat': ('span', True, {'class': 'mat'}, None, False),
+}
+
+id_prefixes = {
+    'pa': 'fn',
+    'pe': 'fn',
+    'pr': 'fn',
+    'pt': 'fn',
+    'ptrad': 'fn',
+    'wers': 'f',
+    'wers_wciety': 'f',
+    'wers_cd': 'f',
+    'wers_akap': 'f',
+    'zastepnik_wersu': 'f',
+    'wers_do_prawej': 'f',
+    'wers_srodek': 'f',
+    'akap': 'f',
+    'akap_cd': 'f',
+    'akap_dialog': 'f',
  }
  
  
  }
  
  
@@ -102,7 +126,7 @@ front2 = set(['autor_utworu'])
  
  
  def norm(text):
  
  
  def norm(text):
-    text = text.replace('---', '—').replace('--', '–').replace('...', '…').replace(',,', '„').replace('"', '”')
+    text = text.replace('---', '—').replace('--', '–').replace('...', '…').replace(',,', '„').replace('"', '”').replace('\n', ' ')
      return text
  
  
      return text
  
  
@@ -124,6 +148,11 @@ def toj(elem, S):
              if 'dlugi_cytat' not in S['stack'] and 'poezja_cyt' not in S['stack']:
                  S['vindex'] += 1
                  output['visibleNumber'] = S['vindex']
              if 'dlugi_cytat' not in S['stack'] and 'poezja_cyt' not in S['stack']:
                  S['vindex'] += 1
                  output['visibleNumber'] = S['vindex']
+        id_prefix = id_prefixes.get(elem.tag, 'i')
+        S['id'][id_prefix] += 1
+        output['id'] = id_prefix + str(S['id'][id_prefix])
+        if elem.attrib.get('id'):
+            output['id'] = 'wl-' + elem.attrib.get('id')
          if attrs:
              output['attr'] = attrs.copy()
          if attr_map:
          if attrs:
              output['attr'] = attrs.copy()
          if attr_map:
@@ -184,6 +213,7 @@ def conv(tree):
      S = {
          'index': 0,
          'vindex': 0,
      S = {
          'index': 0,
          'vindex': 0,
+        'id': defaultdict(lambda: 0),
          'stack': [],
          'front1': [],
          'front2': [],
          'stack': [],
          'front1': [],
          'front2': [],