Merge branch 'universal' into edumed-ofop

author Marcin Koziej <mkoziej@ksi.(none)>

Wed, 30 Jan 2013 15:28:07 +0000 (16:28 +0100)

committer Marcin Koziej <mkoziej@ksi.(none)>

Wed, 30 Jan 2013 15:28:07 +0000 (16:28 +0100)
author Marcin Koziej <mkoziej@ksi.(none)>
Wed, 30 Jan 2013 15:28:07 +0000 (16:28 +0100)
committer Marcin Koziej <mkoziej@ksi.(none)>
Wed, 30 Jan 2013 15:28:07 +0000 (16:28 +0100)
diff --combined librarian/__init__.py

index 09bdcd7,3b811d3..bf41c7a
--- 1/librarian/__init__.py
--- 2/librarian/__init__.py
+++ b/librarian/__init__.py
@@@ -79,8 -79,8 +79,8 @@@ class WLURI(object)
       """Represents a WL URI. Extracts slug from it."""
       slug = None
   
- -    example = 'http://wolnelektury.pl/katalog/lektura/template/'
- -    _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektura/'
+ +    example = 'http://edukacjamedialna.edu.pl/'
+ +    _re_wl_uri = re.compile(r'http://(www\.)?edukacjamedialna.edu.pl/'
               '(?P<slug>[-a-z0-9]+)/?$')
   
       def __init__(self, uri):
@@@ -104,7 -104,7 +104,7 @@@
           u'http://wolnelektury.pl/katalog/lektura/a-slug/'
   
           """
- -        uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
+ +        uri = 'http://prawokultury.pl/publikacje/%s/' % slug
           return cls(uri)
   
       def __unicode__(self):
@@@ -150,10 -150,7 +150,10 @@@ import dcparse
   
   DEFAULT_BOOKINFO = dcparser.BookInfo(
           { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
- -        { DCNS('creator'): [u'Some, Author'],
+ +        { 
+ +          DCNS('creator.expert'): [u'Some, Author'],
+ +          DCNS('creator.scenario'): [u'Some, Author'],
+ +          DCNS('creator.textbook'): [u'Some, Author'],
             DCNS('title'): [u'Some Title'],
             DCNS('subject.period'): [u'Unknown'],
             DCNS('subject.type'): [u'Unknown'],
@@@ -208,32 -205,35 +208,35 @@@ def get_resource(path)
       return os.path.join(os.path.dirname(__file__), path)
   
   
- class OutputFile(object):
-     """Represents a file returned by one of the converters."""
- 
+ class IOFile(object):
+     """ Represents a file fed as input or returned as a result. """
       _string = None
       _filename = None
+     _filename_tmp = False
+ 
+     def __init__(self, attachments=None):
+         self.attachments = attachments or {}
   
       def __del__(self):
-         if self._filename:
+         if self._filename_tmp:
               os.unlink(self._filename)
   
       def __nonzero__(self):
           return self._string is not None or self._filename is not None
   
       @classmethod
-     def from_string(cls, string):
+     def from_string(cls, string, *args, **kwargs):
           """Converter returns contents of a file as a string."""
   
-         instance = cls()
+         instance = cls(*args, **kwargs)
           instance._string = string
           return instance
   
       @classmethod
-     def from_filename(cls, filename):
+     def from_filename(cls, filename, *args, **kwargs):
           """Converter returns contents of a file as a named file."""
   
-         instance = cls()
+         instance = cls(*args, **kwargs)
           instance._filename = filename
           return instance
   
@@@ -266,6 -266,7 +269,7 @@@
               temp.write(self._string)
               temp.close()
               self._filename = temp.name
+             self._filename_tmp = True
               return self._filename
           else:
               return None
@@@ -278,6 -279,23 +282,23 @@@
               os.makedirs(dirname)
           shutil.copy(self.get_filename(), path)
   
+     def dump_to(self, path, directory=None):
+         """ Path should be name for main file. """
+         self.save_as(path)
+         dirname = os.path.dirname(os.path.abspath(path))
+         for filename, attachment in self.attachments.items():
+             attachment.save_as(os.path.join(dirname, filename))
+ 
+ 
+ class Format(object):
+     """ Generic format class. """
+     def __init__(self, wldoc, **kwargs):
+         self.wldoc = wldoc
+         self.customization = kwargs
+ 
+     def build(self):
+         raise NotImplementedError
+ 
   
   class URLOpener(urllib.FancyURLopener):
       version = 'FNP Librarian (http://github.com/fnp/librarian)'
diff --combined librarian/functions.py

index e91d7e1,9490cbb..40f06cd
--- 1/librarian/functions.py
--- 2/librarian/functions.py
+++ b/librarian/functions.py
@@@ -14,47 -14,42 +14,47 @@@ def _register_function(f)
       ns[f.__name__] = f
   
   
- -def reg_substitute_entities():
- -    ENTITY_SUBSTITUTIONS = [
- -        (u'---', u'—'),
- -        (u'--', u'–'),
- -        (u'...', u'…'),
- -        (u',,', u'„'),
- -        (u'"', u'”'),
- -    ]
- -
- -    def substitute_entities(context, text):
- -        """XPath extension function converting all entites in passed text."""
- -        if isinstance(text, list):
- -            text = ''.join(text)
- -        for entity, substitutution in ENTITY_SUBSTITUTIONS:
- -            text = text.replace(entity, substitutution)
- -        return text
+ +ENTITY_SUBSTITUTIONS = [
+ +      (u'---', u'—'),
+ +      (u'--', u'–'),
+ +      (u'...', u'…'),
+ +      (u',,', u'„'),
+ +      (u'"', u'”'),
+ +]
+ +
+ +def substitute_entities(context, text):
+ +    """XPath extension function converting all entites in passed text."""
+ +    if isinstance(text, list):
+ +        text = ''.join(text)
+ +    for entity, substitutution in ENTITY_SUBSTITUTIONS:
+ +        text = text.replace(entity, substitutution)
+ +    return text
   
+ +
+ +def reg_substitute_entities():
       _register_function(substitute_entities)
   
   
+ +def strip(context, text):
+ +    """Remove unneeded whitespace from beginning and end"""
+ +    if isinstance(text, list):
+ +        text = ''.join(text)
+ +    return re.sub(r'\s+', ' ', text).strip()
+ +
+ +
   def reg_strip():
- -    def strip(context, text):
- -        """Remove unneeded whitespace from beginning and end"""
- -        if isinstance(text, list):
- -            text = ''.join(text)
- -        return re.sub(r'\s+', ' ', text).strip()
       _register_function(strip)
   
   
+ +def starts_white(context, text):
+ +    if isinstance(text, list):
+ +        text = ''.join(text)
+ +    if not text:
+ +        return False
+ +    return text[0].isspace()
+ +
+ +
   def reg_starts_white():
- -    def starts_white(context, text):
- -        if isinstance(text, list):
- -            text = ''.join(text)
- -        if not text:
- -            return False
- -        return text[0].isspace()
       _register_function(starts_white)
   
   
@@@ -68,50 -63,58 +68,64 @@@ def reg_ends_white()
       _register_function(ends_white)
   
   
+ +def wrap_words(context, text, wrapping):
+ +    """XPath extension function automatically wrapping words in passed text"""
+ +    if isinstance(text, list):
+ +        text = ''.join(text)
+ +    if not wrapping:
+ +        return text
+ +
+ +    words = re.split(r'\s', text)
+ +
+ +    line_length = 0
+ +    lines = [[]]
+ +    for word in words:
+ +        line_length += len(word) + 1
+ +        if line_length > wrapping:
+ +            # Max line length was exceeded. We create new line
+ +            lines.append([])
+ +            line_length = len(word)
+ +        lines[-1].append(word)
+ +    return '\n'.join(' '.join(line) for line in lines)
+ +
+ +
   def reg_wrap_words():
- -    def wrap_words(context, text, wrapping):
- -        """XPath extension function automatically wrapping words in passed text"""
- -        if isinstance(text, list):
- -            text = ''.join(text)
- -        if not wrapping:
- -            return text
- -
- -        words = re.split(r'\s', text)
- -
- -        line_length = 0
- -        lines = [[]]
- -        for word in words:
- -            line_length += len(word) + 1
- -            if line_length > wrapping:
- -                # Max line length was exceeded. We create new line
- -                lines.append([])
- -                line_length = len(word)
- -            lines[-1].append(word)
- -        return '\n'.join(' '.join(line) for line in lines)
       _register_function(wrap_words)
   
   
+ +def person_name(context, text):
+ +    """ Converts "Name, Forename" to "Forename Name" """
+ +    if isinstance(text, list):
+ +        text = ''.join(text)
+ +    return Person.from_text(text).readable()
+ +
+ +
   def reg_person_name():
- -    def person_name(context, text):
- -        """ Converts "Name, Forename" to "Forename Name" """
- -        if isinstance(text, list):
- -            text = ''.join(text)
- -        return Person.from_text(text).readable()
       _register_function(person_name)
   
   
+ +def texcommand(context, text):
+ +    """Remove non-letters"""
+ +    if isinstance(text, list):
+ +        text = ''.join(text)
+ +    return re.sub(r'[^a-zA-Z]', '', text).strip()
+ +
+ +
   def reg_texcommand():
- -    def texcommand(context, text):
- -        """Remove non-letters"""
- -        if isinstance(text, list):
- -            text = ''.join(text)
- -        return re.sub(r'[^a-zA-Z]', '', text).strip()
       _register_function(texcommand)
   
   
+ def reg_get(format_):
+     def get(context, *args):
+         obj = format_
+         for arg in args:
+             if hasattr(obj, arg):
+                 obj = getattr(obj, arg)
+             else:
+                 try:
+                     obj = obj[arg]
+                 except (TypeError, KeyError), e:
+                     # Just raise proper AttributeError.
+                     getattr(obj, arg)
+         return obj
+     _register_function(get)
diff --combined librarian/parser.py

index 9068fc0,d330a72..b2ad98a
--- 1/librarian/parser.py
--- 2/librarian/parser.py
+++ b/librarian/parser.py
@@@ -4,8 -4,8 +4,8 @@@
   # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   #
   from librarian import ValidationError, NoDublinCore,  ParseError, NoProvider
- from librarian import RDFNS
- from librarian.cover import WLCover
+ from librarian import RDFNS, IOFile
+ from librarian.styles.wolnelektury.cover import WLCover
   from librarian import dcparser
   
   from xml.parsers.expat import ExpatError
@@@ -20,58 -20,68 +20,68 @@@ class WLDocument(object)
       LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
       provider = None
   
-     def __init__(self, edoc, parse_dublincore=True, provider=None, 
-                     strict=False, meta_fallbacks=None):
-         self.edoc = edoc
+     _edoc = None
+     @property
+     def edoc(self):
+         if self._edoc is None:
+             data = self.source.get_string()
+             if not isinstance(data, unicode):
+                 data = data.decode('utf-8')
+             data = data.replace(u'\ufeff', '')
+             try:
+                 parser = etree.XMLParser(remove_blank_text=False)
+                 self._edoc = etree.parse(StringIO(data.encode('utf-8')), parser)
+             except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+                 raise ParseError(e)
+         return self._edoc
+ 
+     _rdf_elem = None
+     @property
+     def rdf_elem(self):
+         if self._rdf_elem is None:
+             dc_path = './/' + RDFNS('RDF')
+             self._rdf_elem = self.edoc.getroot().find(dc_path)
+             if self._rdf_elem is None:
+                 raise NoDublinCore('Document has no DublinCore - which is required.')
+         return self._rdf_elem
+ 
+     _book_info = None
+     @property
+     def book_info(self):
+         if not self.parse_dublincore:
+             return None
+         if self._book_info is None:
+             self._book_info = dcparser.BookInfo.from_element(
+                     self.rdf_elem, fallbacks=self.meta_fallbacks, strict=self.strict)
+         return self._book_info
+ 
+     def __init__(self, iofile, provider=None, 
+             parse_dublincore=True, # shouldn't it be in a subclass?
+             strict=False, # ?
+             meta_fallbacks=None # ?
+             ):
+         self.source = iofile
           self.provider = provider
- 
-         root_elem = edoc.getroot()
- 
-         dc_path = './/' + RDFNS('RDF')
- 
-         if root_elem.tag != 'utwor':
+         self.parse_dublincore = parse_dublincore
+         self.strict = strict
+         self.meta_fallbacks = meta_fallbacks
+         if self.edoc.getroot().tag != 'utwor':
               raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
- 
           if parse_dublincore:
-             self.rdf_elem = root_elem.find(dc_path)
- 
-             if self.rdf_elem is None:
-                 raise NoDublinCore('Document has no DublinCore - which is required.')
- 
-             self.book_info = dcparser.BookInfo.from_element(
-                     self.rdf_elem, fallbacks=meta_fallbacks, strict=strict)
-         else:
-             self.book_info = None
+             self.book_info
   
       @classmethod
       def from_string(cls, xml, *args, **kwargs):
-         return cls.from_file(StringIO(xml), *args, **kwargs)
+         return cls(IOFile.from_string(xml), *args, **kwargs)
   
       @classmethod
       def from_file(cls, xmlfile, *args, **kwargs):
- 
-         # first, prepare for parsing
           if isinstance(xmlfile, basestring):
-             file = open(xmlfile, 'rb')
-             try:
-                 data = file.read()
-             finally:
-                 file.close()
+             iofile = IOFile.from_filename(xmlfile)
           else:
-             data = xmlfile.read()
- 
-         if not isinstance(data, unicode):
-             data = data.decode('utf-8')
+             iofile = IOFile.from_file(xmlfile)
+         return cls(iofile, *args, **kwargs)
   
-         data = data.replace(u'\ufeff', '')
- 
-         try:
-             parser = etree.XMLParser(remove_blank_text=False)
-             tree = etree.parse(StringIO(data.encode('utf-8')), parser)
- 
-             return cls(tree, *args, **kwargs)
-         except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
-             raise ParseError(e)
   
       def swap_endlines(self):
           """Converts line breaks in stanzas into <br/> tags."""
@@@ -95,10 -105,10 +105,10 @@@
                   elem.text = chunks.pop(0)
   
       def parts(self):
-         if self.provider is None:
-             raise NoProvider('No document provider supplied.')
           if self.book_info is None:
               raise NoDublinCore('No Dublin Core in document.')
+         if self.book_info.parts and self.provider is None:
+             raise NoProvider('No document provider supplied.')
           for part_uri in self.book_info.parts:
               yield self.from_file(self.provider.by_uri(part_uri),
                       provider=self.provider)
@@@ -183,7 -193,7 +193,7 @@@
       # Converters
   
       def as_html(self, *args, **kwargs):
- -        from librarian import html
+ +        from librarian import pyhtml as html
           return html.transform(self, *args, **kwargs)
   
       def as_text(self, *args, **kwargs):
author	Marcin Koziej <mkoziej@ksi.(none)>
	Wed, 30 Jan 2013 15:28:07 +0000 (16:28 +0100)
committer	Marcin Koziej <mkoziej@ksi.(none)>
	Wed, 30 Jan 2013 15:28:07 +0000 (16:28 +0100)
		1	2
librarian/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history
librarian/functions.py	patch \|	diff1 \|	diff2 \|	blob \| history
librarian/parser.py	patch \|	diff1 \|	diff2 \|	blob \| history