Merge branch 'universal' into edumed-ofop
authorMarcin Koziej <mkoziej@ksi.(none)>
Wed, 30 Jan 2013 15:28:07 +0000 (16:28 +0100)
committerMarcin Koziej <mkoziej@ksi.(none)>
Wed, 30 Jan 2013 15:28:07 +0000 (16:28 +0100)
1  2 
librarian/__init__.py
librarian/functions.py
librarian/parser.py

diff --combined librarian/__init__.py
@@@ -79,8 -79,8 +79,8 @@@ class WLURI(object)
      """Represents a WL URI. Extracts slug from it."""
      slug = None
  
 -    example = 'http://wolnelektury.pl/katalog/lektura/template/'
 -    _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektura/'
 +    example = 'http://edukacjamedialna.edu.pl/'
 +    _re_wl_uri = re.compile(r'http://(www\.)?edukacjamedialna.edu.pl/'
              '(?P<slug>[-a-z0-9]+)/?$')
  
      def __init__(self, uri):
          u'http://wolnelektury.pl/katalog/lektura/a-slug/'
  
          """
 -        uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
 +        uri = 'http://prawokultury.pl/publikacje/%s/' % slug
          return cls(uri)
  
      def __unicode__(self):
@@@ -150,10 -150,7 +150,10 @@@ import dcparse
  
  DEFAULT_BOOKINFO = dcparser.BookInfo(
          { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
 -        { DCNS('creator'): [u'Some, Author'],
 +        { 
 +          DCNS('creator.expert'): [u'Some, Author'],
 +          DCNS('creator.scenario'): [u'Some, Author'],
 +          DCNS('creator.textbook'): [u'Some, Author'],
            DCNS('title'): [u'Some Title'],
            DCNS('subject.period'): [u'Unknown'],
            DCNS('subject.type'): [u'Unknown'],
@@@ -208,32 -205,35 +208,35 @@@ def get_resource(path)
      return os.path.join(os.path.dirname(__file__), path)
  
  
- class OutputFile(object):
-     """Represents a file returned by one of the converters."""
+ class IOFile(object):
+     """ Represents a file fed as input or returned as a result. """
      _string = None
      _filename = None
+     _filename_tmp = False
+     def __init__(self, attachments=None):
+         self.attachments = attachments or {}
  
      def __del__(self):
-         if self._filename:
+         if self._filename_tmp:
              os.unlink(self._filename)
  
      def __nonzero__(self):
          return self._string is not None or self._filename is not None
  
      @classmethod
-     def from_string(cls, string):
+     def from_string(cls, string, *args, **kwargs):
          """Converter returns contents of a file as a string."""
  
-         instance = cls()
+         instance = cls(*args, **kwargs)
          instance._string = string
          return instance
  
      @classmethod
-     def from_filename(cls, filename):
+     def from_filename(cls, filename, *args, **kwargs):
          """Converter returns contents of a file as a named file."""
  
-         instance = cls()
+         instance = cls(*args, **kwargs)
          instance._filename = filename
          return instance
  
              temp.write(self._string)
              temp.close()
              self._filename = temp.name
+             self._filename_tmp = True
              return self._filename
          else:
              return None
              os.makedirs(dirname)
          shutil.copy(self.get_filename(), path)
  
+     def dump_to(self, path, directory=None):
+         """ Path should be name for main file. """
+         self.save_as(path)
+         dirname = os.path.dirname(os.path.abspath(path))
+         for filename, attachment in self.attachments.items():
+             attachment.save_as(os.path.join(dirname, filename))
+ class Format(object):
+     """ Generic format class. """
+     def __init__(self, wldoc, **kwargs):
+         self.wldoc = wldoc
+         self.customization = kwargs
+     def build(self):
+         raise NotImplementedError
  
  class URLOpener(urllib.FancyURLopener):
      version = 'FNP Librarian (http://github.com/fnp/librarian)'
diff --combined librarian/functions.py
@@@ -14,47 -14,42 +14,47 @@@ def _register_function(f)
      ns[f.__name__] = f
  
  
 -def reg_substitute_entities():
 -    ENTITY_SUBSTITUTIONS = [
 -        (u'---', u'—'),
 -        (u'--', u'–'),
 -        (u'...', u'…'),
 -        (u',,', u'„'),
 -        (u'"', u'”'),
 -    ]
 -
 -    def substitute_entities(context, text):
 -        """XPath extension function converting all entites in passed text."""
 -        if isinstance(text, list):
 -            text = ''.join(text)
 -        for entity, substitutution in ENTITY_SUBSTITUTIONS:
 -            text = text.replace(entity, substitutution)
 -        return text
 +ENTITY_SUBSTITUTIONS = [
 +      (u'---', u'—'),
 +      (u'--', u'–'),
 +      (u'...', u'…'),
 +      (u',,', u'„'),
 +      (u'"', u'”'),
 +]
 +
 +def substitute_entities(context, text):
 +    """XPath extension function converting all entites in passed text."""
 +    if isinstance(text, list):
 +        text = ''.join(text)
 +    for entity, substitutution in ENTITY_SUBSTITUTIONS:
 +        text = text.replace(entity, substitutution)
 +    return text
  
 +
 +def reg_substitute_entities():
      _register_function(substitute_entities)
  
  
 +def strip(context, text):
 +    """Remove unneeded whitespace from beginning and end"""
 +    if isinstance(text, list):
 +        text = ''.join(text)
 +    return re.sub(r'\s+', ' ', text).strip()
 +
 +
  def reg_strip():
 -    def strip(context, text):
 -        """Remove unneeded whitespace from beginning and end"""
 -        if isinstance(text, list):
 -            text = ''.join(text)
 -        return re.sub(r'\s+', ' ', text).strip()
      _register_function(strip)
  
  
 +def starts_white(context, text):
 +    if isinstance(text, list):
 +        text = ''.join(text)
 +    if not text:
 +        return False
 +    return text[0].isspace()
 +
 +
  def reg_starts_white():
 -    def starts_white(context, text):
 -        if isinstance(text, list):
 -            text = ''.join(text)
 -        if not text:
 -            return False
 -        return text[0].isspace()
      _register_function(starts_white)
  
  
@@@ -68,50 -63,58 +68,64 @@@ def reg_ends_white()
      _register_function(ends_white)
  
  
 +def wrap_words(context, text, wrapping):
 +    """XPath extension function automatically wrapping words in passed text"""
 +    if isinstance(text, list):
 +        text = ''.join(text)
 +    if not wrapping:
 +        return text
 +
 +    words = re.split(r'\s', text)
 +
 +    line_length = 0
 +    lines = [[]]
 +    for word in words:
 +        line_length += len(word) + 1
 +        if line_length > wrapping:
 +            # Max line length was exceeded. We create new line
 +            lines.append([])
 +            line_length = len(word)
 +        lines[-1].append(word)
 +    return '\n'.join(' '.join(line) for line in lines)
 +
 +
  def reg_wrap_words():
 -    def wrap_words(context, text, wrapping):
 -        """XPath extension function automatically wrapping words in passed text"""
 -        if isinstance(text, list):
 -            text = ''.join(text)
 -        if not wrapping:
 -            return text
 -
 -        words = re.split(r'\s', text)
 -
 -        line_length = 0
 -        lines = [[]]
 -        for word in words:
 -            line_length += len(word) + 1
 -            if line_length > wrapping:
 -                # Max line length was exceeded. We create new line
 -                lines.append([])
 -                line_length = len(word)
 -            lines[-1].append(word)
 -        return '\n'.join(' '.join(line) for line in lines)
      _register_function(wrap_words)
  
  
 +def person_name(context, text):
 +    """ Converts "Name, Forename" to "Forename Name" """
 +    if isinstance(text, list):
 +        text = ''.join(text)
 +    return Person.from_text(text).readable()
 +
 +
  def reg_person_name():
 -    def person_name(context, text):
 -        """ Converts "Name, Forename" to "Forename Name" """
 -        if isinstance(text, list):
 -            text = ''.join(text)
 -        return Person.from_text(text).readable()
      _register_function(person_name)
  
  
 +def texcommand(context, text):
 +    """Remove non-letters"""
 +    if isinstance(text, list):
 +        text = ''.join(text)
 +    return re.sub(r'[^a-zA-Z]', '', text).strip()
 +
 +
  def reg_texcommand():
 -    def texcommand(context, text):
 -        """Remove non-letters"""
 -        if isinstance(text, list):
 -            text = ''.join(text)
 -        return re.sub(r'[^a-zA-Z]', '', text).strip()
      _register_function(texcommand)
  
  
+ def reg_get(format_):
+     def get(context, *args):
+         obj = format_
+         for arg in args:
+             if hasattr(obj, arg):
+                 obj = getattr(obj, arg)
+             else:
+                 try:
+                     obj = obj[arg]
+                 except (TypeError, KeyError), e:
+                     # Just raise proper AttributeError.
+                     getattr(obj, arg)
+         return obj
+     _register_function(get)
diff --combined librarian/parser.py
@@@ -4,8 -4,8 +4,8 @@@
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  from librarian import ValidationError, NoDublinCore,  ParseError, NoProvider
- from librarian import RDFNS
- from librarian.cover import WLCover
+ from librarian import RDFNS, IOFile
+ from librarian.styles.wolnelektury.cover import WLCover
  from librarian import dcparser
  
  from xml.parsers.expat import ExpatError
@@@ -20,58 -20,68 +20,68 @@@ class WLDocument(object)
      LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
      provider = None
  
-     def __init__(self, edoc, parse_dublincore=True, provider=None, 
-                     strict=False, meta_fallbacks=None):
-         self.edoc = edoc
+     _edoc = None
+     @property
+     def edoc(self):
+         if self._edoc is None:
+             data = self.source.get_string()
+             if not isinstance(data, unicode):
+                 data = data.decode('utf-8')
+             data = data.replace(u'\ufeff', '')
+             try:
+                 parser = etree.XMLParser(remove_blank_text=False)
+                 self._edoc = etree.parse(StringIO(data.encode('utf-8')), parser)
+             except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+                 raise ParseError(e)
+         return self._edoc
+     _rdf_elem = None
+     @property
+     def rdf_elem(self):
+         if self._rdf_elem is None:
+             dc_path = './/' + RDFNS('RDF')
+             self._rdf_elem = self.edoc.getroot().find(dc_path)
+             if self._rdf_elem is None:
+                 raise NoDublinCore('Document has no DublinCore - which is required.')
+         return self._rdf_elem
+     _book_info = None
+     @property
+     def book_info(self):
+         if not self.parse_dublincore:
+             return None
+         if self._book_info is None:
+             self._book_info = dcparser.BookInfo.from_element(
+                     self.rdf_elem, fallbacks=self.meta_fallbacks, strict=self.strict)
+         return self._book_info
+     def __init__(self, iofile, provider=None, 
+             parse_dublincore=True, # shouldn't it be in a subclass?
+             strict=False, # ?
+             meta_fallbacks=None # ?
+             ):
+         self.source = iofile
          self.provider = provider
-         root_elem = edoc.getroot()
-         dc_path = './/' + RDFNS('RDF')
-         if root_elem.tag != 'utwor':
+         self.parse_dublincore = parse_dublincore
+         self.strict = strict
+         self.meta_fallbacks = meta_fallbacks
+         if self.edoc.getroot().tag != 'utwor':
              raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
          if parse_dublincore:
-             self.rdf_elem = root_elem.find(dc_path)
-             if self.rdf_elem is None:
-                 raise NoDublinCore('Document has no DublinCore - which is required.')
-             self.book_info = dcparser.BookInfo.from_element(
-                     self.rdf_elem, fallbacks=meta_fallbacks, strict=strict)
-         else:
-             self.book_info = None
+             self.book_info
  
      @classmethod
      def from_string(cls, xml, *args, **kwargs):
-         return cls.from_file(StringIO(xml), *args, **kwargs)
+         return cls(IOFile.from_string(xml), *args, **kwargs)
  
      @classmethod
      def from_file(cls, xmlfile, *args, **kwargs):
-         # first, prepare for parsing
          if isinstance(xmlfile, basestring):
-             file = open(xmlfile, 'rb')
-             try:
-                 data = file.read()
-             finally:
-                 file.close()
+             iofile = IOFile.from_filename(xmlfile)
          else:
-             data = xmlfile.read()
-         if not isinstance(data, unicode):
-             data = data.decode('utf-8')
+             iofile = IOFile.from_file(xmlfile)
+         return cls(iofile, *args, **kwargs)
  
-         data = data.replace(u'\ufeff', '')
-         try:
-             parser = etree.XMLParser(remove_blank_text=False)
-             tree = etree.parse(StringIO(data.encode('utf-8')), parser)
-             return cls(tree, *args, **kwargs)
-         except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
-             raise ParseError(e)
  
      def swap_endlines(self):
          """Converts line breaks in stanzas into <br/> tags."""
                  elem.text = chunks.pop(0)
  
      def parts(self):
-         if self.provider is None:
-             raise NoProvider('No document provider supplied.')
          if self.book_info is None:
              raise NoDublinCore('No Dublin Core in document.')
+         if self.book_info.parts and self.provider is None:
+             raise NoProvider('No document provider supplied.')
          for part_uri in self.book_info.parts:
              yield self.from_file(self.provider.by_uri(part_uri),
                      provider=self.provider)
      # Converters
  
      def as_html(self, *args, **kwargs):
 -        from librarian import html
 +        from librarian import pyhtml as html
          return html.transform(self, *args, **kwargs)
  
      def as_text(self, *args, **kwargs):