Remove DateValue, drop Py<3.6, fix tests.
[librarian.git] / src / librarian / elements / base.py
1 # -*- coding: utf-8
2
3 import re
4 from lxml import etree
5 from librarian import dcparser, RDFNS
6 from librarian.util import get_translation
7
8
9 class WLElement(etree.ElementBase):
10     SECTION_PRECEDENCE = None
11
12     TXT_TOP_MARGIN = 0
13     TXT_BOTTOM_MARGIN = 0
14     TXT_PREFIX = ""
15     TXT_SUFFIX = ""
16
17     HTML_TAG = None
18     HTML_ATTR = {}
19     HTML_CLASS = None
20
21     EPUB_TAG = None
22     EPUB_ATTR = {}
23     EPUB_CLASS = None
24     EPUB_START_CHUNK = False
25    
26     CAN_HAVE_TEXT = True
27     STRIP = False
28
29     text_substitutions = [
30         (u'---', u'—'),
31         (u'--', u'–'),
32         #(u'...', u'…'),  # Temporary turnoff for epub
33         (u',,', u'„'),
34         (u'"', u'”'),
35         ('\ufeff', ''),
36
37         ("'", "\u2019"),    # This was enabled for epub.
38     ]
39
40     @property
41     def meta_object(self):
42         if not hasattr(self, '_meta_object'):
43             elem = self.find(RDFNS('RDF'))
44             if elem is not None:
45                 self._meta_object = dcparser.BookInfo.from_element(elem)
46             else:
47                 self._meta_object = None
48         return self._meta_object
49
50     @property
51     def meta(self):
52         if self.meta_object is not None:
53             return self.meta_object
54         else:
55             if self.getparent() is not None:
56                 return self.getparent().meta
57             else:
58                 return self.document.base_meta
59
60     @property
61     def gettext(self):
62         return get_translation(self.meta.language).gettext
63
64     def in_context_of(self, setting):
65         parent = self.getparent()
66         if parent is None:
67             return False
68         try:
69             return getattr(parent, setting)
70         except AttributeError:
71             return parent.in_context_of(setting)
72
73     def signal(self, signal):
74         parent = self.getparent()
75         if parent is not None:
76             parent.signal(signal)
77     
78     def raw_printable_text(self, builder):
79         from librarian.html import raw_printable_text
80
81         # TODO: podtagi, wyroznienia, etc
82         t = ''
83         t += self.normalize_text(self.text, builder)
84         for c in self:
85             if not isinstance(c, WLElement):
86                 continue
87             if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'):
88                 t += c.raw_printable_text(builder)
89             t += self.normalize_text(c.tail, builder)
90         return t
91     
92     def normalize_text(self, text, builder):
93         text = text or ''
94         for e, s in self.text_substitutions:
95             text = text.replace(e, s)
96             # FIXME: TEmporary turnoff
97 #        text = re.sub(r'\s+', ' ', text)
98 ### TODO: Added now for epub
99
100         if getattr(builder, 'hyphenator', None) is not None:
101             newt = ''
102             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
103             for w in wlist:
104                 newt += builder.hyphenator.inserted(w, u'\u00AD')
105             text = newt
106
107         if builder.orphans:
108             text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text)
109
110         return text
111
112     def _build_inner(self, builder, build_method):
113         child_count = len(self)
114         if self.CAN_HAVE_TEXT and self.text:
115             text = self.normalize_text(self.text, builder)
116             if self.STRIP:
117                 text = text.lstrip()
118                 if not child_count:
119                     text = text.rstrip()
120             builder.push_text(text)
121         for i, child in enumerate(self):
122             if isinstance(child, WLElement):
123                 getattr(child, build_method)(builder)
124             if self.CAN_HAVE_TEXT and child.tail:
125                 text = self.normalize_text(child.tail, builder)
126                 if self.STRIP and i == child_count - 1:
127                     text = text.rstrip()
128                 builder.push_text(text)
129
130     def _txt_build_inner(self, builder):
131         self._build_inner(builder, 'txt_build')
132
133     def txt_build(self, builder):
134         if hasattr(self, 'TXT_LEGACY_TOP_MARGIN'):
135             builder.push_legacy_margin(self.TXT_LEGACY_TOP_MARGIN)
136         else:
137             builder.push_margin(self.TXT_TOP_MARGIN)
138         builder.push_text(self.TXT_PREFIX, True)
139         self._txt_build_inner(builder)
140         builder.push_text(self.TXT_SUFFIX, True)
141         if hasattr(self, 'TXT_LEGACY_BOTTOM_MARGIN'):
142             builder.push_legacy_margin(self.TXT_LEGACY_BOTTOM_MARGIN)
143         else:
144             builder.push_margin(self.TXT_BOTTOM_MARGIN)
145
146     def _html_build_inner(self, builder):
147         self._build_inner(builder, 'html_build')
148
149     def get_html_attr(self, builder):
150         attr = self.HTML_ATTR.copy()
151         if self.HTML_CLASS:
152             attr['class'] = self.HTML_CLASS
153         # always copy the id attribute (?)
154         if self.attrib.get('id'):
155             attr['id'] = self.attrib['id']
156         elif '_compat_section_id' in self.attrib:
157             attr['id'] = self.attrib['_compat_section_id']
158         return attr
159
160     def html_build(self, builder):
161         if self.HTML_TAG:
162             builder.start_element(
163                 self.HTML_TAG,
164                 self.get_html_attr(builder),
165             )
166
167         self._html_build_inner(builder)
168         if self.HTML_TAG:
169             builder.end_element()
170
171     def _epub_build_inner(self, builder):
172         self._build_inner(builder, 'epub_build')
173
174     def get_epub_attr(self, builder):
175         attr = self.EPUB_ATTR.copy()
176         if self.EPUB_CLASS:
177             attr['class'] = self.EPUB_CLASS
178         return attr
179
180     def epub_build(self, builder):
181         from librarian.elements.masters import Master
182
183         # TEMPORARY
184         self.CAN_HAVE_TEXT = True
185         self.STRIP = False
186        
187         start_chunk = self.EPUB_START_CHUNK and isinstance(self.getparent(), Master)
188
189         if start_chunk:
190             builder.start_chunk()
191
192         fragment = None
193         if self.SECTION_PRECEDENCE and not self.in_context_of('NO_TOC'):
194             if not start_chunk:
195                 fragment = 'sub%d' % builder.assign_section_number()
196                 self.attrib['id'] = fragment
197
198             builder.add_toc_entry(
199                 fragment,
200                 self.raw_printable_text(builder),
201                 self.SECTION_PRECEDENCE
202             )
203             
204         if self.EPUB_TAG:
205             attr = self.get_epub_attr(builder)
206             if fragment:
207                 attr['id'] = fragment
208             builder.start_element(
209                 self.EPUB_TAG,
210                 attr
211             )
212
213         self._epub_build_inner(builder)
214         if self.EPUB_TAG:
215             builder.end_element()
216
217     def validate(self):
218         from librarian.elements.masters import Master
219         from librarian.elements.blocks import DlugiCytat, PoezjaCyt
220         from librarian.elements.footnotes import Footnote
221
222         if self.SECTION_PRECEDENCE:
223             assert isinstance(self.getparent(), (Master, DlugiCytat, PoezjaCyt, Footnote)), \
224                     'Header {} inside a <{}> instead of a master.'.format(
225                             etree.tostring(self, encoding='unicode'), self.getparent().tag)
226
227         for c in self:
228             if isinstance(c, WLElement):
229                 c.validate()
230
231
232     def sanitize(self):
233         # TODO: Remove insanity here.
234         for e in self:
235             if isinstance(e, WLElement):
236                 e.sanitize()