1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
11 from lxml import etree
12 from librarian.parser import WLDocument
13 from librarian import XHTMLNS, ParseError
14 from librarian import functions
16 from lxml.etree import XMLSyntaxError, XSLTApplyError
18 functions.reg_substitute_entities()
21 'legacy': 'xslt/book2html.xslt',
22 'full': 'xslt/wl2html_full.xslt',
23 'partial': 'xslt/wl2html_partial.xslt'
26 def get_stylesheet(name):
27 return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
29 def html_has_content(text):
30 return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
32 def transform(input, output_filename=None, is_file=True, \
33 parse_dublincore=True, stylesheet='legacy', options={}, flags=None):
34 """Transforms file input_filename in XML to output_filename in XHTML.
36 If output_filename is None, returns an XML,
37 otherwise returns True if file has been written,False if it hasn't.
38 File won't be written if it has no content.
42 style_filename = get_stylesheet(stylesheet)
43 style = etree.parse(style_filename)
46 document = WLDocument.from_file(input, True, \
47 parse_dublincore=parse_dublincore)
49 document = WLDocument.from_string(input, True, \
50 parse_dublincore=parse_dublincore)
54 document.edoc.getroot().set(flag, 'yes')
56 document.clean_ed_note()
58 result = document.transform(style, **options)
59 del document # no longer needed large object :)
61 if html_has_content(result):
62 add_anchors(result.getroot())
63 add_table_of_contents(result.getroot())
65 if output_filename is not None:
66 result.write(output_filename, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8')
71 if output_filename is not None:
76 raise ValueError("'%s' is not a valid stylesheet.")
77 except (XMLSyntaxError, XSLTApplyError), e:
80 class Fragment(object):
81 def __init__(self, id, themes):
82 super(Fragment, self).__init__()
87 def append(self, event, element):
88 self.events.append((event, element))
90 def closed_events(self):
92 for event, element in self.events:
94 stack.append(('end', element))
99 print 'CLOSED NON-OPEN TAG:', element
102 return self.events + stack
106 for event, element in self.closed_events():
108 result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
110 result.append(element.text)
112 result.append(u'</%s>' % element.tag)
114 result.append(element.tail)
116 result.append(element)
118 return ''.join(result)
120 def __unicode__(self):
121 return self.to_string()
124 def extract_fragments(input_filename):
125 """Extracts theme fragments from input_filename."""
127 closed_fragments = {}
129 for event, element in etree.iterparse(input_filename, events=('start', 'end')):
130 # Process begin and end elements
131 if element.get('class', '') in ('theme-begin', 'theme-end'):
132 if not event == 'end': continue # Process elements only once, on end event
135 if element.get('class', '') == 'theme-begin':
136 fragment = Fragment(id=element.get('fid'), themes=element.text)
139 if element.getparent().get('id', None) != 'book-text':
140 parents = [element.getparent()]
141 while parents[-1].getparent().get('id', None) != 'book-text':
142 parents.append(parents[-1].getparent())
145 for parent in parents:
146 fragment.append('start', parent)
148 open_fragments[fragment.id] = fragment
150 # Close existing fragment
153 fragment = open_fragments[element.get('fid')]
155 print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
157 closed_fragments[fragment.id] = fragment
158 del open_fragments[fragment.id]
160 # Append element tail to lost_text (we don't want to lose any text)
162 for fragment_id in open_fragments:
163 open_fragments[fragment_id].append('text', element.tail)
166 # Process all elements except begin and end
168 # Omit annotation tags
169 if len(element.get('name', '')) or element.get('class', '') == 'annotation':
170 if event == 'end' and element.tail:
171 for fragment_id in open_fragments:
172 open_fragments[fragment_id].append('text', element.tail)
174 for fragment_id in open_fragments:
175 open_fragments[fragment_id].append(event, copy.copy(element))
177 return closed_fragments, open_fragments
180 def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None):
182 if link_text is None:
184 anchor = etree.Element('a', href='#%s' % prefix)
185 anchor.set('class', 'anchor')
186 anchor.text = unicode(link_text)
188 anchor.tail = element.text
190 element.insert(0, anchor)
193 anchor_target = etree.Element('a', name='%s' % prefix)
194 anchor_target.set('class', 'target')
195 anchor_target.text = u' '
197 anchor_target.tail = element.text
199 element.insert(0, anchor_target)
202 def any_ancestor(element, test):
203 for ancestor in element.iterancestors():
209 def add_anchors(root):
211 for element in root.iterdescendants():
212 if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
213 or e.get('id') == 'nota_red'
214 or e.tag == 'blockquote'):
217 if element.tag == 'p' and 'verse' in element.get('class', ''):
218 if counter == 1 or counter % 5 == 0:
219 add_anchor(element, "f%d" % counter, link_text=counter)
221 elif 'paragraph' in element.get('class', ''):
222 add_anchor(element, "f%d" % counter, link_text=counter)
226 def add_table_of_contents(root):
229 for element in root.iterdescendants():
230 if element.tag in ('h2', 'h3'):
231 if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)):
234 if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2':
235 sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), []))
237 sections.append((counter, element.tag, ''.join(element.xpath('text()')), []))
238 add_anchor(element, "s%d" % counter, with_link=False)
241 toc = etree.Element('div')
243 toc_header = etree.SubElement(toc, 'h2')
244 toc_header.text = u'Spis treści'
245 toc_list = etree.SubElement(toc, 'ol')
247 for n, section, text, subsections in sections:
248 section_element = etree.SubElement(toc_list, 'li')
249 add_anchor(section_element, "s%d" % n, with_target=False, link_text=text)
252 subsection_list = etree.SubElement(section_element, 'ol')
253 for n, subsection, text, _ in subsections:
254 subsection_element = etree.SubElement(subsection_list, 'li')
255 add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)