1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
11 from lxml import etree
12 from librarian.parser import WLDocument
13 from librarian import XHTMLNS, ParseError
14 from librarian import functions
16 from lxml.etree import XMLSyntaxError, XSLTApplyError
18 functions.reg_substitute_entities()
21 'legacy': 'xslt/book2html.xslt',
22 'full': 'xslt/wl2html_full.xslt',
23 'partial': 'xslt/wl2html_partial.xslt'
26 def get_stylesheet(name):
27 return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
29 def html_has_content(text):
30 return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
32 def transform(input, output_filename=None, is_file=True, \
33 parse_dublincore=True, stylesheet='legacy', options={}):
34 """Transforms file input_filename in XML to output_filename in XHTML.
36 If output_filename is None, returns an XML,
37 otherwise returns True if file has been written,False if it hasn't.
38 File won't be written if it has no content.
42 style_filename = get_stylesheet(stylesheet)
43 style = etree.parse(style_filename)
46 document = WLDocument.from_file(input, True, \
47 parse_dublincore=parse_dublincore)
49 document = WLDocument.from_string(input, True, \
50 parse_dublincore=parse_dublincore)
52 result = document.transform(style, **options)
53 del document # no longer needed large object :)
55 if html_has_content(result):
56 add_anchors(result.getroot())
57 add_table_of_contents(result.getroot())
59 if output_filename is not None:
60 result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8')
65 if output_filename is not None:
70 raise ValueError("'%s' is not a valid stylesheet.")
71 except (XMLSyntaxError, XSLTApplyError), e:
74 class Fragment(object):
75 def __init__(self, id, themes):
76 super(Fragment, self).__init__()
81 def append(self, event, element):
82 self.events.append((event, element))
84 def closed_events(self):
86 for event, element in self.events:
88 stack.append(('end', element))
93 print 'CLOSED NON-OPEN TAG:', element
96 return self.events + stack
100 for event, element in self.closed_events():
102 result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
104 result.append(element.text)
106 result.append(u'</%s>' % element.tag)
108 result.append(element.tail)
110 result.append(element)
112 return ''.join(result)
114 def __unicode__(self):
115 return self.to_string()
118 def extract_fragments(input_filename):
119 """Extracts theme fragments from input_filename."""
121 closed_fragments = {}
123 for event, element in etree.iterparse(input_filename, events=('start', 'end')):
124 # Process begin and end elements
125 if element.get('class', '') in ('theme-begin', 'theme-end'):
126 if not event == 'end': continue # Process elements only once, on end event
129 if element.get('class', '') == 'theme-begin':
130 fragment = Fragment(id=element.get('fid'), themes=element.text)
133 if element.getparent().get('id', None) != 'book-text':
134 parents = [element.getparent()]
135 while parents[-1].getparent().get('id', None) != 'book-text':
136 parents.append(parents[-1].getparent())
139 for parent in parents:
140 fragment.append('start', parent)
142 open_fragments[fragment.id] = fragment
144 # Close existing fragment
147 fragment = open_fragments[element.get('fid')]
149 print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
151 closed_fragments[fragment.id] = fragment
152 del open_fragments[fragment.id]
154 # Append element tail to lost_text (we don't want to lose any text)
156 for fragment_id in open_fragments:
157 open_fragments[fragment_id].append('text', element.tail)
160 # Process all elements except begin and end
162 # Omit annotation tags
163 if len(element.get('name', '')) or element.get('class', '') == 'annotation':
164 if event == 'end' and element.tail:
165 for fragment_id in open_fragments:
166 open_fragments[fragment_id].append('text', element.tail)
168 for fragment_id in open_fragments:
169 open_fragments[fragment_id].append(event, copy.copy(element))
171 return closed_fragments, open_fragments
174 def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None):
176 if link_text is None:
178 anchor = etree.Element('a', href='#%s' % prefix)
179 anchor.set('class', 'anchor')
180 anchor.text = unicode(link_text)
182 anchor.tail = element.text
184 element.insert(0, anchor)
187 anchor_target = etree.Element('a', name='%s' % prefix)
188 anchor_target.set('class', 'target')
189 anchor_target.text = u' '
191 anchor_target.tail = element.text
193 element.insert(0, anchor_target)
196 def any_ancestor(element, test):
197 for ancestor in element.iterancestors():
203 def add_anchors(root):
205 for element in root.iterdescendants():
206 if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
207 or e.tag == 'blockquote'):
210 if element.tag == 'p' and 'verse' in element.get('class', ''):
211 if counter == 1 or counter % 5 == 0:
212 add_anchor(element, "f%d" % counter, link_text=counter)
214 elif 'paragraph' in element.get('class', ''):
215 add_anchor(element, "f%d" % counter, link_text=counter)
219 def add_table_of_contents(root):
222 for element in root.iterdescendants():
223 if element.tag in ('h2', 'h3'):
224 if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)):
227 if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2':
228 sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), []))
230 sections.append((counter, element.tag, ''.join(element.xpath('text()')), []))
231 add_anchor(element, "s%d" % counter, with_link=False)
234 toc = etree.Element('div')
236 toc_header = etree.SubElement(toc, 'h2')
237 toc_header.text = u'Spis treści'
238 toc_list = etree.SubElement(toc, 'ol')
240 for n, section, text, subsections in sections:
241 section_element = etree.SubElement(toc_list, 'li')
242 add_anchor(section_element, "s%d" % n, with_target=False, link_text=text)
245 subsection_list = etree.SubElement(section_element, 'ol')
246 for n, subsection, text, _ in subsections:
247 subsection_element = etree.SubElement(subsection_list, 'li')
248 add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)