3e0f898918ec02f026f1986c1c314cfa18840911
[librarian.git] / src / librarian / elements / base.py
1 # -*- coding: utf-8
2
3 import re
4 from lxml import etree
5 from librarian import dcparser, RDFNS
6 from librarian.html import raw_printable_text
7 from librarian.util import get_translation
8
9
10 class WLElement(etree.ElementBase):
11     SECTION_PRECEDENCE = None
12
13     TXT_TOP_MARGIN = 0
14     TXT_BOTTOM_MARGIN = 0
15     TXT_PREFIX = ""
16     TXT_SUFFIX = ""
17
18     HTML_TAG = None
19     HTML_ATTR = {}
20     HTML_CLASS = None
21
22     EPUB_TAG = None
23     EPUB_ATTR = {}
24     EPUB_CLASS = None
25     EPUB_START_CHUNK = False
26    
27     CAN_HAVE_TEXT = True
28     STRIP = False
29
30     text_substitutions = [
31         (u'---', u'—'),
32         (u'--', u'–'),
33         #(u'...', u'…'),  # Temporary turnoff for epub
34         (u',,', u'„'),
35         (u'"', u'”'),
36         ('\ufeff', ''),
37
38         ("'", "\u2019"),    # This was enabled for epub.
39     ]
40
41     @property
42     def meta_object(self):
43         if not hasattr(self, '_meta_object'):
44             elem = self.find(RDFNS('RDF'))
45             if elem is not None:
46                 self._meta_object = dcparser.BookInfo.from_element(elem)
47             else:
48                 self._meta_object = None
49         return self._meta_object
50
51     @property
52     def meta(self):
53         if self.meta_object is not None:
54             return self.meta_object
55         else:
56             if self.getparent() is not None:
57                 return self.getparent().meta
58             else:
59                 return self.document.base_meta
60
61     @property
62     def gettext(self):
63         return get_translation(self.meta.language).gettext
64
65     def in_context_of(self, setting):
66         parent = self.getparent()
67         if parent is None:
68             return False
69         try:
70             return getattr(parent, setting)
71         except AttributeError:
72             return parent.in_context_of(setting)
73
74     def signal(self, signal):
75         parent = self.getparent()
76         if parent is not None:
77             parent.signal(signal)
78     
79     def raw_printable_text(self):
80         # TODO: podtagi, wyroznienia, etc
81         t = ''
82         t += self.normalize_text(self.text)
83         for c in self:
84             if not isinstance(c, WLElement):
85                 continue
86             if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'):
87                 t += c.raw_printable_text()
88             t += self.normalize_text(c.tail)
89         return t
90     
91     def normalize_text(self, text):
92         text = text or ''
93         for e, s in self.text_substitutions:
94             text = text.replace(e, s)
95             # FIXME: TEmporary turnoff
96 #        text = re.sub(r'\s+', ' ', text)
97 ### TODO: Added now for epub
98         text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text)
99
100         return text
101
102     def _build_inner(self, builder, build_method):
103         child_count = len(self)
104         if self.CAN_HAVE_TEXT and self.text:
105             text = self.normalize_text(self.text)
106             if self.STRIP:
107                 text = text.lstrip()
108                 if not child_count:
109                     text = text.rstrip()
110             builder.push_text(text)
111         for i, child in enumerate(self):
112             if isinstance(child, WLElement):
113                 getattr(child, build_method)(builder)
114             if self.CAN_HAVE_TEXT and child.tail:
115                 text = self.normalize_text(child.tail)
116                 if self.STRIP and i == child_count - 1:
117                     text = text.rstrip()
118                 builder.push_text(text)
119
120     def _txt_build_inner(self, builder):
121         self._build_inner(builder, 'txt_build')
122
123     def txt_build(self, builder):
124         if hasattr(self, 'TXT_LEGACY_TOP_MARGIN'):
125             builder.push_legacy_margin(self.TXT_LEGACY_TOP_MARGIN)
126         else:
127             builder.push_margin(self.TXT_TOP_MARGIN)
128         builder.push_text(self.TXT_PREFIX, True)
129         self._txt_build_inner(builder)
130         builder.push_text(self.TXT_SUFFIX, True)
131         if hasattr(self, 'TXT_LEGACY_BOTTOM_MARGIN'):
132             builder.push_legacy_margin(self.TXT_LEGACY_BOTTOM_MARGIN)
133         else:
134             builder.push_margin(self.TXT_BOTTOM_MARGIN)
135
136     def _html_build_inner(self, builder):
137         self._build_inner(builder, 'html_build')
138
139     def get_html_attr(self, builder):
140         attr = self.HTML_ATTR.copy()
141         if self.HTML_CLASS:
142             attr['class'] = self.HTML_CLASS
143         # always copy the id attribute (?)
144         if self.attrib.get('id'):
145             attr['id'] = self.attrib['id']
146         elif '_compat_section_id' in self.attrib:
147             attr['id'] = self.attrib['_compat_section_id']
148         return attr
149
150     def html_build(self, builder):
151         if self.HTML_TAG:
152             builder.start_element(
153                 self.HTML_TAG,
154                 self.get_html_attr(builder),
155             )
156
157         self._html_build_inner(builder)
158         if self.HTML_TAG:
159             builder.end_element()
160
161     def _epub_build_inner(self, builder):
162         self._build_inner(builder, 'epub_build')
163
164     def get_epub_attr(self, builder):
165         attr = self.EPUB_ATTR.copy()
166         if self.EPUB_CLASS:
167             attr['class'] = self.EPUB_CLASS
168         return attr
169
170     def epub_build(self, builder):
171         from librarian.elements.masters import Master
172
173         # TEMPORARY
174         self.CAN_HAVE_TEXT = True
175         self.STRIP = False
176        
177         start_chunk = self.EPUB_START_CHUNK and isinstance(self.getparent(), Master)
178
179         if start_chunk:
180             builder.start_chunk()
181
182         fragment = None
183         if self.SECTION_PRECEDENCE and not self.in_context_of('NO_TOC'):
184             if not start_chunk:
185                 fragment = 'sub%d' % builder.assign_section_number()
186                 self.attrib['id'] = fragment
187
188             builder.add_toc_entry(
189                 fragment,
190                 self.raw_printable_text(),
191                 self.SECTION_PRECEDENCE
192             )
193             
194         if self.EPUB_TAG:
195             attr = self.get_epub_attr(builder)
196             if fragment:
197                 attr['id'] = fragment
198             builder.start_element(
199                 self.EPUB_TAG,
200                 attr
201             )
202
203         self._epub_build_inner(builder)
204         if self.EPUB_TAG:
205             builder.end_element()
206
207     def validate(self):
208         from librarian.elements.masters import Master
209         from librarian.elements.blocks import DlugiCytat, PoezjaCyt
210         from librarian.elements.footnotes import Footnote
211
212         if self.SECTION_PRECEDENCE:
213             assert isinstance(self.getparent(), (Master, DlugiCytat, PoezjaCyt, Footnote)), \
214                     'Header {} inside a <{}> instead of a master.'.format(
215                             etree.tostring(self), self.getparent().tag)
216
217         for c in self:
218             if isinstance(c, WLElement):
219                 c.validate()
220
221
222     def sanitize(self):
223         # TODO: Remove insanity here.
224         for e in self:
225             if isinstance(e, WLElement):
226                 e.sanitize()