1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
11 from lxml import etree
12 from librarian.parser import WLDocument
13 from librarian import XHTMLNS, ParseError
14 from librarian import functions
16 from lxml.etree import XMLSyntaxError, XSLTApplyError
18 functions.reg_substitute_entities()
19 functions.reg_person_name()
22 'legacy': 'xslt/book2html.xslt',
23 'full': 'xslt/wl2html_full.xslt',
24 'partial': 'xslt/wl2html_partial.xslt'
27 def get_stylesheet(name):
28 return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
30 def html_has_content(text):
31 return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
33 def transform(input, output_filename=None, is_file=True, \
34 parse_dublincore=True, stylesheet='legacy', options={}, flags=None):
35 """Transforms file input_filename in XML to output_filename in XHTML.
37 If output_filename is None, returns an XML,
38 otherwise returns True if file has been written,False if it hasn't.
39 File won't be written if it has no content.
43 style_filename = get_stylesheet(stylesheet)
44 style = etree.parse(style_filename)
47 document = WLDocument.from_file(input, True, \
48 parse_dublincore=parse_dublincore)
50 document = WLDocument.from_string(input, True, \
51 parse_dublincore=parse_dublincore)
55 document.edoc.getroot().set(flag, 'yes')
57 document.clean_ed_note()
59 result = document.transform(style, **options)
60 del document # no longer needed large object :)
62 if html_has_content(result):
63 add_anchors(result.getroot())
64 add_table_of_contents(result.getroot())
66 if output_filename is not None:
67 result.write(output_filename, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8')
72 if output_filename is not None:
77 raise ValueError("'%s' is not a valid stylesheet.")
78 except (XMLSyntaxError, XSLTApplyError), e:
81 class Fragment(object):
82 def __init__(self, id, themes):
83 super(Fragment, self).__init__()
88 def append(self, event, element):
89 self.events.append((event, element))
91 def closed_events(self):
93 for event, element in self.events:
95 stack.append(('end', element))
100 print 'CLOSED NON-OPEN TAG:', element
103 return self.events + stack
107 for event, element in self.closed_events():
109 result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
111 result.append(element.text)
113 result.append(u'</%s>' % element.tag)
115 result.append(element.tail)
117 result.append(element)
119 return ''.join(result)
121 def __unicode__(self):
122 return self.to_string()
125 def extract_fragments(input_filename):
126 """Extracts theme fragments from input_filename."""
128 closed_fragments = {}
130 for event, element in etree.iterparse(input_filename, events=('start', 'end')):
131 # Process begin and end elements
132 if element.get('class', '') in ('theme-begin', 'theme-end'):
133 if not event == 'end': continue # Process elements only once, on end event
136 if element.get('class', '') == 'theme-begin':
137 fragment = Fragment(id=element.get('fid'), themes=element.text)
140 if element.getparent().get('id', None) != 'book-text':
141 parents = [element.getparent()]
142 while parents[-1].getparent().get('id', None) != 'book-text':
143 parents.append(parents[-1].getparent())
146 for parent in parents:
147 fragment.append('start', parent)
149 open_fragments[fragment.id] = fragment
151 # Close existing fragment
154 fragment = open_fragments[element.get('fid')]
156 print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
158 closed_fragments[fragment.id] = fragment
159 del open_fragments[fragment.id]
161 # Append element tail to lost_text (we don't want to lose any text)
163 for fragment_id in open_fragments:
164 open_fragments[fragment_id].append('text', element.tail)
167 # Process all elements except begin and end
169 # Omit annotation tags
170 if len(element.get('name', '')) or element.get('class', '') == 'annotation':
171 if event == 'end' and element.tail:
172 for fragment_id in open_fragments:
173 open_fragments[fragment_id].append('text', element.tail)
175 for fragment_id in open_fragments:
176 open_fragments[fragment_id].append(event, copy.copy(element))
178 return closed_fragments, open_fragments
181 def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None):
183 if link_text is None:
185 anchor = etree.Element('a', href='#%s' % prefix)
186 anchor.set('class', 'anchor')
187 anchor.text = unicode(link_text)
189 anchor.tail = element.text
191 element.insert(0, anchor)
194 anchor_target = etree.Element('a', name='%s' % prefix)
195 anchor_target.set('class', 'target')
196 anchor_target.text = u' '
198 anchor_target.tail = element.text
200 element.insert(0, anchor_target)
203 def any_ancestor(element, test):
204 for ancestor in element.iterancestors():
210 def add_anchors(root):
212 for element in root.iterdescendants():
213 if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
214 or e.get('id') == 'nota_red'
215 or e.tag == 'blockquote'):
218 if element.tag == 'p' and 'verse' in element.get('class', ''):
219 if counter == 1 or counter % 5 == 0:
220 add_anchor(element, "f%d" % counter, link_text=counter)
222 elif 'paragraph' in element.get('class', ''):
223 add_anchor(element, "f%d" % counter, link_text=counter)
227 def add_table_of_contents(root):
230 for element in root.iterdescendants():
231 if element.tag in ('h2', 'h3'):
232 if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)):
235 if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2':
236 sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), []))
238 sections.append((counter, element.tag, ''.join(element.xpath('text()')), []))
239 add_anchor(element, "s%d" % counter, with_link=False)
242 toc = etree.Element('div')
244 toc_header = etree.SubElement(toc, 'h2')
245 toc_header.text = u'Spis treści'
246 toc_list = etree.SubElement(toc, 'ol')
248 for n, section, text, subsections in sections:
249 section_element = etree.SubElement(toc_list, 'li')
250 add_anchor(section_element, "s%d" % n, with_target=False, link_text=text)
253 subsection_list = etree.SubElement(section_element, 'ol')
254 for n, subsection, text, _ in subsections:
255 subsection_element = etree.SubElement(subsection_list, 'li')
256 add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)