From 432b6175940bcddf371c80c46e429f37ada6559f Mon Sep 17 00:00:00 2001 From: Marcin Koziej Date: Mon, 28 Jan 2013 12:22:42 +0100 Subject: [PATCH 1/1] - functions - publicize text transforming functions - small formatting fixes for html generation --- librarian/functions.py | 123 ++++++++++++++++++++++------------------- librarian/pyhtml.py | 26 +++++---- librarian/xmlutils.py | 31 +++++++---- 3 files changed, 103 insertions(+), 77 deletions(-) diff --git a/librarian/functions.py b/librarian/functions.py index 523b3d5..e91d7e1 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -14,42 +14,47 @@ def _register_function(f): ns[f.__name__] = f -def reg_substitute_entities(): - ENTITY_SUBSTITUTIONS = [ - (u'---', u'—'), - (u'--', u'–'), - (u'...', u'…'), - (u',,', u'„'), - (u'"', u'”'), - ] - - def substitute_entities(context, text): - """XPath extension function converting all entites in passed text.""" - if isinstance(text, list): - text = ''.join(text) - for entity, substitutution in ENTITY_SUBSTITUTIONS: - text = text.replace(entity, substitutution) - return text +ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), +] + +def substitute_entities(context, text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + +def reg_substitute_entities(): _register_function(substitute_entities) +def strip(context, text): + """Remove unneeded whitespace from beginning and end""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'\s+', ' ', text).strip() + + def reg_strip(): - def strip(context, text): - """Remove unneeded whitespace from beginning and end""" - if isinstance(text, list): - text = ''.join(text) - return re.sub(r'\s+', ' ', text).strip() _register_function(strip) +def starts_white(context, text): + if isinstance(text, list): + text = ''.join(text) + if not text: + return False + return text[0].isspace() + + def reg_starts_white(): - def starts_white(context, text): - if isinstance(text, list): - text = ''.join(text) - if not text: - return False - return text[0].isspace() _register_function(starts_white) @@ -63,44 +68,50 @@ def reg_ends_white(): _register_function(ends_white) +def wrap_words(context, text, wrapping): + """XPath extension function automatically wrapping words in passed text""" + if isinstance(text, list): + text = ''.join(text) + if not wrapping: + return text + + words = re.split(r'\s', text) + + line_length = 0 + lines = [[]] + for word in words: + line_length += len(word) + 1 + if line_length > wrapping: + # Max line length was exceeded. We create new line + lines.append([]) + line_length = len(word) + lines[-1].append(word) + return '\n'.join(' '.join(line) for line in lines) + + def reg_wrap_words(): - def wrap_words(context, text, wrapping): - """XPath extension function automatically wrapping words in passed text""" - if isinstance(text, list): - text = ''.join(text) - if not wrapping: - return text - - words = re.split(r'\s', text) - - line_length = 0 - lines = [[]] - for word in words: - line_length += len(word) + 1 - if line_length > wrapping: - # Max line length was exceeded. We create new line - lines.append([]) - line_length = len(word) - lines[-1].append(word) - return '\n'.join(' '.join(line) for line in lines) _register_function(wrap_words) +def person_name(context, text): + """ Converts "Name, Forename" to "Forename Name" """ + if isinstance(text, list): + text = ''.join(text) + return Person.from_text(text).readable() + + def reg_person_name(): - def person_name(context, text): - """ Converts "Name, Forename" to "Forename Name" """ - if isinstance(text, list): - text = ''.join(text) - return Person.from_text(text).readable() _register_function(person_name) +def texcommand(context, text): + """Remove non-letters""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'[^a-zA-Z]', '', text).strip() + + def reg_texcommand(): - def texcommand(context, text): - """Remove non-letters""" - if isinstance(text, list): - text = ''.join(text) - return re.sub(r'[^a-zA-Z]', '', text).strip() _register_function(texcommand) diff --git a/librarian/pyhtml.py b/librarian/pyhtml.py index a46659c..6df3647 100644 --- a/librarian/pyhtml.py +++ b/librarian/pyhtml.py @@ -6,25 +6,27 @@ from lxml import etree from librarian import OutputFile, RDFNS, DCNS from xmlutils import Xmill, tag, tagged, ifoption +from librarian import functions import re import random + class EduModule(Xmill): def __init__(self, *args): super(EduModule, self).__init__(*args) self.activity_counter = 0 + self.register_text_filter(lambda t: functions.substitute_entities(None, t)) def handle_powiesc(self, element): return u"""
- + """, u"
" - handle_autor_utworu = tag("span", "author") handle_nazwa_utworu = tag("h1", "title") handle_dzielo_nadrzedne = tag("span", "collection") @@ -183,18 +185,21 @@ class Excercise(EduModule): add_class += ' handles handles-%s' % handles self.options = {'handles': handles} - return '
' %\ (add_class, self.question_counter, solution_s), \ - "
" + "" class Wybor(Excercise): - def handle_pytanie(self, element): - pre, post = super(Wybor, self).handle_pytanie(element) - solutions = re.split(r"[, ]+", element.attrib['rozw']) - if len(solutions) == 1: - self.options = { 'single': True } + def handle_cwiczenie(self, element): + pre, post = super(Wybor, self).handle_cwiczenie(element) + is_single_choice = True + for p in element.xpath(".//pytanie"): + solutions = re.split(r"[, ]+", p.attrib['rozw']) + if len(solutions) != 1: + is_single_choice = False + break + self.options = {'single': is_single_choice} return pre, post def handle_punkt(self, element): @@ -221,7 +226,6 @@ class Wybor(Excercise): return super(Wybor, self).handle_punkt(element) - class Uporzadkuj(Excercise): def handle_pytanie(self, element): """ diff --git a/librarian/xmlutils.py b/librarian/xmlutils.py index a3f9df9..d762320 100644 --- a/librarian/xmlutils.py +++ b/librarian/xmlutils.py @@ -8,14 +8,25 @@ from collections import defaultdict class Xmill(object): - """Transforms XML to some text. + """Transforms XML to some text. Used instead of XSLT which is difficult and cumbersome. - + """ def __init__(self, options=None): self._options = [] if options: self._options.append(options) + self.text_filters = [] + + def register_text_filter(self, fun): + self.text_filters.append(fun) + + def filter_text(self, text): + for flt in self.text_filters: + if text is None: + return None + text = flt(text) + return text def generate(self, document): """Generate text from node using handlers defined in class.""" @@ -26,7 +37,7 @@ class Xmill(object): def options(self): """Returnes merged scoped options for current node. """ - # Here we can see how a decision not to return the modified map + # Here we can see how a decision not to return the modified map # leads to a need for a hack. return reduce(lambda a, b: a.update(b) or a, self._options, defaultdict(lambda: False)) @@ -54,8 +65,8 @@ class Xmill(object): except ValueError: pass if not ns: - raise ValueError("Strange ns for tag: %s, nsmap: %s" % - (element.tag, element.nsmap)) + raise ValueError("Strange ns for tag: %s, nsmap: %s" % + (element.tag, element.nsmap)) else: tagname = element.tag @@ -63,7 +74,7 @@ class Xmill(object): meth_name = "handle_%s__%s" % (ns, tagname) else: meth_name = "handle_%s" % (tagname,) - + handler = getattr(self, meth_name, None) return handler @@ -84,7 +95,7 @@ class Xmill(object): options_scopes = len(self._options) if handler is None: - pre = [element.text] + pre = [self.filter_text(element.text)] post = [] else: vals = handler(element) @@ -94,10 +105,10 @@ class Xmill(object): return [] else: if not isinstance(vals, tuple): - return [vals, element.tail] + return [vals, self.filter_text(element.tail)] else: - pre = [vals[0], element.text] - post = [vals[1], element.tail] + pre = [vals[0], self.filter_text(element.text)] + post = [vals[1], self.filter_text(element.tail)] out = pre + [self._handle_element(child) for child in element] + post finally: -- 2.20.1