1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
11 from lxml import etree
12 from librarian.parser import WLDocument
13 from librarian import XHTMLNS, ParseError
14 from librarian import functions
16 from lxml.etree import XMLSyntaxError, XSLTApplyError
18 functions.reg_substitute_entities()
21 'legacy': 'xslt/book2html.xslt',
22 'full': 'xslt/wl2html_full.xslt',
23 'partial': 'xslt/wl2html_partial.xslt'
26 def get_stylesheet(name):
27 return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
29 def html_has_content(text):
30 return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
32 def transform(input, output_filename=None, is_file=True, \
33 parse_dublincore=True, stylesheet='legacy', options={}):
34 """Transforms file input_filename in XML to output_filename in XHTML.
36 If output_filename is None, returns an XML,
37 otherwise returns True if file has been written,False if it hasn't.
38 File won't be written if it has no content.
42 style_filename = get_stylesheet(stylesheet)
43 style = etree.parse(style_filename)
46 document = WLDocument.from_file(input, True, \
47 parse_dublincore=parse_dublincore)
49 document = WLDocument.from_string(input, True, \
50 parse_dublincore=parse_dublincore)
52 result = document.transform(style, **options)
53 del document # no longer needed large object :)
55 if html_has_content(result):
56 add_anchors(result.getroot())
57 add_table_of_contents(result.getroot())
59 if output_filename is not None:
60 result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8')
65 if output_filename is not None:
70 raise ValueError("'%s' is not a valid stylesheet.")
71 except (XMLSyntaxError, XSLTApplyError), e:
74 class Fragment(object):
75 def __init__(self, id, themes):
76 super(Fragment, self).__init__()
81 def append(self, event, element):
82 self.events.append((event, element))
84 def closed_events(self):
86 for event, element in self.events:
88 stack.append(('end', element))
93 print 'CLOSED NON-OPEN TAG:', element
96 return self.events + stack
100 for event, element in self.closed_events():
102 result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
104 result.append(element.text)
106 result.append(u'</%s>' % element.tag)
108 result.append(element.tail)
110 result.append(element)
112 return ''.join(result)
114 def __unicode__(self):
115 return self.to_string()
118 def extract_fragments(input_filename):
119 """Extracts theme fragments from input_filename."""
121 closed_fragments = {}
123 for event, element in etree.iterparse(input_filename, events=('start', 'end')):
124 # Process begin and end elements
125 if element.get('class', '') in ('theme-begin', 'theme-end'):
126 if not event == 'end': continue # Process elements only once, on end event
129 if element.get('class', '') == 'theme-begin':
130 fragment = Fragment(id=element.get('fid'), themes=element.text)
133 if element.getparent().get('id', None) != 'book-text':
134 parents = [element.getparent()]
135 while parents[-1].getparent().get('id', None) != 'book-text':
136 parents.append(parents[-1].getparent())
139 for parent in parents:
140 fragment.append('start', parent)
142 open_fragments[fragment.id] = fragment
144 # Close existing fragment
147 fragment = open_fragments[element.get('fid')]
149 print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
151 closed_fragments[fragment.id] = fragment
152 del open_fragments[fragment.id]
154 # Append element tail to lost_text (we don't want to lose any text)
156 for fragment_id in open_fragments:
157 open_fragments[fragment_id].append('text', element.tail)
160 # Process all elements except begin and end
162 # Omit annotation tags
163 if len(element.get('name', '')) or element.get('class', '') == 'annotation':
164 if event == 'end' and element.tail:
165 for fragment_id in open_fragments:
166 open_fragments[fragment_id].append('text', element.tail)
168 for fragment_id in open_fragments:
169 open_fragments[fragment_id].append(event, copy.copy(element))
171 return closed_fragments, open_fragments
174 def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None):
176 if link_text is None:
178 anchor = etree.Element('a', href='#%s' % prefix)
179 anchor.set('class', 'anchor')
180 anchor.text = unicode(link_text)
182 anchor.tail = element.text
184 element.insert(0, anchor)
187 anchor_target = etree.Element('a', name='%s' % prefix)
188 anchor_target.set('class', 'target')
189 anchor_target.text = u' '
191 anchor_target.tail = element.text
193 element.insert(0, anchor_target)
196 def any_ancestor(element, test):
197 for ancestor in element.iterancestors():
203 def add_anchors(root):
205 for element in root.iterdescendants():
206 if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
207 or e.get('id') == 'nota_red'
208 or e.tag == 'blockquote'):
211 if element.tag == 'p' and 'verse' in element.get('class', ''):
212 if counter == 1 or counter % 5 == 0:
213 add_anchor(element, "f%d" % counter, link_text=counter)
215 elif 'paragraph' in element.get('class', ''):
216 add_anchor(element, "f%d" % counter, link_text=counter)
220 def add_table_of_contents(root):
223 for element in root.iterdescendants():
224 if element.tag in ('h2', 'h3'):
225 if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)):
228 if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2':
229 sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), []))
231 sections.append((counter, element.tag, ''.join(element.xpath('text()')), []))
232 add_anchor(element, "s%d" % counter, with_link=False)
235 toc = etree.Element('div')
237 toc_header = etree.SubElement(toc, 'h2')
238 toc_header.text = u'Spis treści'
239 toc_list = etree.SubElement(toc, 'ol')
241 for n, section, text, subsections in sections:
242 section_element = etree.SubElement(toc_list, 'li')
243 add_anchor(section_element, "s%d" % n, with_target=False, link_text=text)
246 subsection_list = etree.SubElement(section_element, 'ol')
247 for n, subsection, text, _ in subsections:
248 subsection_element = etree.SubElement(subsection_list, 'li')
249 add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)