From 8c5c356ee7710262751d1a629aaa80e272f71918 Mon Sep 17 00:00:00 2001 From: Marcin Koziej Date: Thu, 20 Dec 2012 16:34:27 +0100 Subject: [PATCH 1/1] New python html generator --- librarian/parser.py | 2 +- librarian/pyhtml.py | 190 +++++++++++++++++++++++++++++++++++++ librarian/pyhtml/edumed.py | 5 + librarian/xmlutils.py | 177 ++++++++++++++++++++++++++++++++++ 4 files changed, 373 insertions(+), 1 deletion(-) create mode 100644 librarian/pyhtml.py create mode 100644 librarian/pyhtml/edumed.py create mode 100644 librarian/xmlutils.py diff --git a/librarian/parser.py b/librarian/parser.py index a9e8c65..9068fc0 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -183,7 +183,7 @@ class WLDocument(object): # Converters def as_html(self, *args, **kwargs): - from librarian import html + from librarian import pyhtml as html return html.transform(self, *args, **kwargs) def as_text(self, *args, **kwargs): diff --git a/librarian/pyhtml.py b/librarian/pyhtml.py new file mode 100644 index 0000000..5d4dc50 --- /dev/null +++ b/librarian/pyhtml.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from lxml import etree +from librarian import OutputFile, RDFNS, DCNS +from xmlutils import Xmill, tag, tagged, ifoption + + +class EduModule(Xmill): + def __init__(self, *args): + super(EduModule, self).__init__(*args) + self.activity_counter = 0 + self.question_counter = 0 + + def handle_utwor(self, element): + v = {} +# from pdb import *; set_trace() + v['title'] = element.xpath('//dc:title/text()', namespaces={'dc':DCNS.uri})[0] + return u""" + + + + +%(title)s + + + + + +""" % v, u""" + + + +""" + + + def handle_powiesc(self, element): + return u""" +
+ + + + + +""", u"
" + + + handle_autor_utworu = tag("span", "author") + handle_nazwa_utworu = tag("h1", "title") + handle_dzielo_nadrzedne = tag("span", "collection") + handle_podtytul = tag("span", "subtitle") + handle_naglowek_akt = handle_naglowek_czesc = handle_srodtytul = tag("h2") + handle_naglowek_scena = handle_naglowek_rozdzial = tag('h3') + handle_naglowek_osoba = handle_naglowek_podrozdzial = tag('h4') + handle_akap = handle_akap_dialog = handle_akap_cd = tag('p', 'paragraph') + handle_strofa = tag('div', 'stanza') + + def handle_aktywnosc(self, element): + self.activity_counter += 1 + self.options = { + 'activity': True, + 'activity_counter': self.activity_counter + } + submill = EduModule() + + opis = submill.generate(element.xpath('opis')[0]) + + n = element.xpath('wskazowki') + if n: wskazowki = submill.generate(n[0]) + + else: wskazowki = '' + n = element.xpath('pomoce') + + if n: pomoce = submill.generate(n[0]) + else: pomoce = '' + + forma = ''.join(element.xpath('forma/text()')) + + czas = ''.join(element.xpath('czas/text()')) + + counter = self.activity_counter + + return u""" +
+
%(counter)d. + %(opis)s + %(wskazowki)s +
+
+

Czas: %(czas)s min

+

Forma: %(forma)s

+ %(pomoce)s +
+
+
+""" % locals() + + handle_opis = ifoption(activity=False)(tag('div', 'description')) + handle_wskazowki = ifoption(activity=False)(tag('div', ('hints', 'teacher'))) + + @ifoption(activity=False) + @tagged('div', 'materials') + def handle_pomoce(self, _): + return "Pomoce: ", "" + + def handle_czas(self, *_): + return + + def handle_forma(self, *_): + return + + def handle_cwiczenie(self, element): + self.options = {'excercise': element.attrib['typ']} + self.question_counter = 0 + self.piece_counter = 0 + + return u""" +
+
+""" % element.attrib, \ +u""" +
+ + + +
+
+
+""" + + def handle_pytanie(self, element): + self.question_counter += 1 + self.piece_counter = 0 + solution = element.attrib.get('rozw', None) + if solution: solution_s = ' data-solution="%s"' % solution + else: solution_s = '' + + return '
' %\ + (self.question_counter, solution_s), \ + "
" + + # Lists + def handle_lista(self, element): + ltype = element.attrib.get('typ', 'punkt') + if ltype == 'slowniczek': + self.options = {'slowniczek': True} + return '
', '
' +### robie teraz punkty wyboru + listtag = {'num': 'ol', + 'punkt': 'ul', + 'alfa': 'ul', + 'czytelnia': 'ul'}[ltype] + + return '<%s class="lista %s">' % (listtag, ltype), '' % listtag + + def handle_punkt(self, element): + if self.options['excercise'] and element.attrib['nazwa']: + qc = self.question_counter + self.piece_counter += 1 + no = self.piece_counter + + return u""" +
  • +""" % locals(), u"
  • " + + elif self.options['slowniczek']: + return '
    ', '
    ' + else: + return '
  • ', '
  • ' + + def handle_rdf__RDF(self, _): + # ustal w opcjach rzeczy :D + return + + +def transform(wldoc, stylesheet='edumed', options=None, flags=None): + """Transforms the WL document to XHTML. + + If output_filename is None, returns an XML, + otherwise returns True if file has been written,False if it hasn't. + File won't be written if it has no content. + """ + + edumod = EduModule(options) +# from pdb import set_trace; set_trace() + html = edumod.generate(wldoc.edoc.getroot()) + + return OutputFile.from_string(html.encode('utf-8')) diff --git a/librarian/pyhtml/edumed.py b/librarian/pyhtml/edumed.py new file mode 100644 index 0000000..7fd330b --- /dev/null +++ b/librarian/pyhtml/edumed.py @@ -0,0 +1,5 @@ + +from lxml import etree + + + diff --git a/librarian/xmlutils.py b/librarian/xmlutils.py new file mode 100644 index 0000000..523ad8b --- /dev/null +++ b/librarian/xmlutils.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from lxml import etree +from collections import defaultdict + + +class Xmill(object): + """Transforms XML to some text. + Used instead of XSLT which is difficult and cumbersome. + + """ + def __init__(self, options=None): + self._options = [] + if options: + self._options.append(options) + + def generate(self, document): + """Generate text from node using handlers defined in class.""" + output = self._handle_element(document) + return u''.join([x for x in flatten(output) if x is not None]) + + @property + def options(self): + """Returnes merged scoped options for current node. + """ + # Here we can see how a decision not to return the modified map + # leads to a need for a hack. + return reduce(lambda a, b: a.update(b) or a, self._options, defaultdict(lambda: False)) + + @options.setter + def options(self, opts): + """Sets options overrides for current and child nodes + """ + self._options.append(opts) + + + def _handle_for_element(self, element): + ns = None + tagname = None +# from nose.tools import set_trace + + if isinstance(element, etree._Comment): return None + + if element.tag[0] == '{': + for nshort, nhref in element.nsmap.items(): + try: + if element.tag.index('{%s}' % nhref) == 0: + ns = nshort + tagname = element.tag[len('{%s}' % nhref):] + break + except ValueError: + pass + if not ns: + raise ValueError("Strange ns for tag: %s, nsmap: %s" % + (element.tag, element.nsmap)) + else: + tagname = element.tag + + if ns: + meth_name = "handle_%s__%s" % (ns, tagname) + else: + meth_name = "handle_%s" % (tagname,) + + handler = getattr(self, meth_name, None) + return handler + + def next(self, element): + if len(element): + return element[0] + + while True: + sibling = element.getnext() + if sibling is not None: return sibling # found a new branch to dig into + element = element.getparent() + if element is None: return None # end of tree + + def _handle_element(self, element): + handler = self._handle_for_element(element) + # How many scopes + try: + options_scopes = len(self._options) + + if handler is None: + pre = [element.text] + post = [] + else: + vals = handler(element) + # depending on number of returned values, vals can be None, a value, or a tuple. + # how poorly designed is that? 9 lines below are needed just to unpack this. + if vals is None: + return [] + else: + if not isinstance(vals, tuple): + pre = [vals] + post = [] + else: + pre = [vals[0], element.text] + post = [vals[1]] + + if element.tail: + post.append(element.tail) + + out = pre + [self._handle_element(child) for child in element] + post + finally: + # clean up option scopes if necessary + self._options = self._options[0:options_scopes] + return out + + +def tag(name, classes=None, **attrs): + """Returns a handler which wraps node contents in tag `name', with class attribute + set to `classes' and other attributes according to keyword paramters + """ + if classes: + if isinstance(classes, (tuple, list)): classes = ' '.join(classes) + attrs['class'] = classes + a = ''.join([' %s="%s"' % (k,v) for (k,v) in attrs.items()]) + def _hnd(self, element): + return "<%s%s>" % (name, a), "" % name + return _hnd + + +def tagged(name, classes=None, **attrs): + """Handler decorator which wraps handler output in tag `name', with class attribute + set to `classes' and other attributes according to keyword paramters + """ + if classes: + if isinstance(classes, (tuple,list)): classes = ' '.join(classes) + attrs['class'] = classes + a = ''.join([' %s="%s"' % (k,v) for (k,v) in attrs.items()]) + def _decor(f): + def _wrap(self, element): + r = f(self, element) + if r is None: return + + prepend = "<%s%s>" % (name, a) + append = "" % name + + if isinstance(r, tuple): + return prepend + r[0], r[1] + append + return prepend + r + append + return _wrap + return _decor + + +def ifoption(**options): + """Decorator which enables node only when options are set + """ + def _decor(f): + def _handler(self, *args, **kw): + opts = self.options + for k, v in options.items(): + if opts[k] != v: + return + return f(self, *args, **kw) + return _handler + return _decor + +def flatten(l, ltypes=(list, tuple)): + """flatten function from BasicPropery/BasicTypes package + """ + ltype = type(l) + l = list(l) + i = 0 + while i < len(l): + while isinstance(l[i], ltypes): + if not l[i]: + l.pop(i) + i -= 1 + break + else: + l[i:i + 1] = l[i] + i += 1 + return ltype(l) -- 2.20.1