Content warnings.
[librarian.git] / src / librarian / elements / base.py
1 # -*- coding: utf-8
2
3 import re
4 from lxml import etree
5 from librarian import dcparser, RDFNS
6 from librarian.util import get_translation
7
8
9 class WLElement(etree.ElementBase):
10     TXT_TOP_MARGIN = 0
11     TXT_BOTTOM_MARGIN = 0
12     TXT_PREFIX = ""
13     TXT_SUFFIX = ""
14
15     HTML_TAG = None
16     HTML_ATTR = {}
17     HTML_CLASS = None
18     
19     CAN_HAVE_TEXT = True
20     STRIP = False
21
22     text_substitutions = [
23         (u'---', u'—'),
24         (u'--', u'–'),
25         (u'...', u'…'),
26         (u',,', u'„'),
27         (u'"', u'”'),
28         ('\ufeff', ''),
29     ]
30
31     @property
32     def meta_object(self):
33         if not hasattr(self, '_meta_object'):
34             elem = self.find(RDFNS('RDF'))
35             if elem is not None:
36                 self._meta_object = dcparser.BookInfo.from_element(elem)
37             else:
38                 self._meta_object = None
39         return self._meta_object
40
41     @property
42     def meta(self):
43         if self.meta_object is not None:
44             return self.meta_object
45         else:
46             if self.getparent() is not None:
47                 return self.getparent().meta
48             else:
49                 return self.document.base_meta
50
51     @property
52     def gettext(self):
53         return get_translation(self.meta.language).gettext
54
55     def normalize_text(self, text):
56         text = text or ''
57         for e, s in self.text_substitutions:
58             text = text.replace(e, s)
59         text = re.sub(r'\s+', ' ', text)
60         return text
61
62     def _build_inner(self, builder, build_method):
63         child_count = len(self)
64         if self.CAN_HAVE_TEXT and self.text:
65             text = self.normalize_text(self.text)
66             if self.STRIP:
67                 text = text.lstrip()
68                 if not child_count:
69                     text = text.rstrip()
70             builder.push_text(text)
71         for i, child in enumerate(self):
72             if isinstance(child, WLElement):
73                 getattr(child, build_method)(builder)
74             if self.CAN_HAVE_TEXT and child.tail:
75                 text = self.normalize_text(child.tail)
76                 if self.STRIP and i == child_count - 1:
77                     text = text.rstrip()
78                 builder.push_text(text)
79
80     def _txt_build_inner(self, builder):
81         self._build_inner(builder, 'txt_build')
82
83     def txt_build(self, builder):
84         if hasattr(self, 'TXT_LEGACY_TOP_MARGIN'):
85             builder.push_legacy_margin(self.TXT_LEGACY_TOP_MARGIN)
86         else:
87             builder.push_margin(self.TXT_TOP_MARGIN)
88         builder.push_text(self.TXT_PREFIX, True)
89         self._txt_build_inner(builder)
90         builder.push_text(self.TXT_SUFFIX, True)
91         if hasattr(self, 'TXT_LEGACY_BOTTOM_MARGIN'):
92             builder.push_legacy_margin(self.TXT_LEGACY_BOTTOM_MARGIN)
93         else:
94             builder.push_margin(self.TXT_BOTTOM_MARGIN)
95
96     def _html_build_inner(self, builder):
97         self._build_inner(builder, 'html_build')
98
99     def get_html_attr(self, builder):
100         attr = self.HTML_ATTR.copy()
101         if self.HTML_CLASS:
102             attr['class'] = self.HTML_CLASS
103         # always copy the id attribute (?)
104         if self.attrib.get('id'):
105             attr['id'] = self.attrib['id']
106         elif '_compat_section_id' in self.attrib:
107             attr['id'] = self.attrib['_compat_section_id']
108         return attr
109
110     def html_build(self, builder):
111         if self.HTML_TAG:
112             builder.start_element(
113                 self.HTML_TAG,
114                 self.get_html_attr(builder),
115             )
116
117         self._html_build_inner(builder)
118         if self.HTML_TAG:
119             builder.end_element()
120
121     def sanitize(self):
122         # TODO: Remove insanity here.
123         for e in self:
124             if isinstance(e, WLElement):
125                 e.sanitize()