style

author Jan Szejko <jan.szejko@gmail.com>

Fri, 1 Jul 2016 12:26:00 +0000 (14:26 +0200)

committer Jan Szejko <jan.szejko@gmail.com>

Fri, 1 Jul 2016 12:26:00 +0000 (14:26 +0200)
author Jan Szejko <jan.szejko@gmail.com>
Fri, 1 Jul 2016 12:26:00 +0000 (14:26 +0200)
committer Jan Szejko <jan.szejko@gmail.com>
Fri, 1 Jul 2016 12:26:00 +0000 (14:26 +0200)
diff --git a/librarian/__init__.py b/librarian/__init__.py

index 23244ef..5c145d3 100644 (file)
--- a/librarian/__init__.py
+++ b/librarian/__init__.py
@@ -9,6 +9,7 @@ import os
  import re
  import shutil
  import urllib
+import lxml.etree as etree
  
  
  class UnicodeException(Exception):
@@ -25,22 +26,27 @@ class UnicodeException(Exception):
              message = unicode(args, encoding='utf-8', errors='ignore')
          return message
  
+
  class ParseError(UnicodeException):
      pass
  
+
  class ValidationError(UnicodeException):
      pass
  
+
  class NoDublinCore(ValidationError):
      """There's no DublinCore section, and it's required."""
      pass
  
+
  class NoProvider(UnicodeException):
      """There's no DocProvider specified, and it's needed."""
      pass
  
+
  class XMLNamespace(object):
-    '''A handy structure to repsent names in an XML namespace.'''
+    """A handy structure to repsent names in an XML namespace."""
  
      def __init__(self, uri):
          self.uri = uri
@@ -57,6 +63,7 @@ class XMLNamespace(object):
      def __str__(self):
          return '%s' % self.uri
  
+
  class EmptyNamespace(XMLNamespace):
      def __init__(self):
          super(EmptyNamespace, self).__init__('')
@@ -80,8 +87,9 @@ class WLURI(object):
      slug = None
  
      example = 'http://edukacjamedialna.edu.pl/lekcje/template'
-    _re_wl_uri = re.compile(r'http://(www\.)?edukacjamedialna.edu.pl/lekcje/'
-            '(?P<slug>[-a-z0-9]+)/?$')
+    _re_wl_uri = re.compile(
+        r'http://(www\.)?edukacjamedialna.edu.pl/lekcje/'
+        '(?P<slug>[-a-z0-9]+)/?$')
  
      def __init__(self, uri):
          uri = unicode(uri)
@@ -148,43 +156,46 @@ class DirDocProvider(DocProvider):
          return IOFile.from_filename(os.path.join(self.dir, fname))
  
  
-import lxml.etree as etree
-import dcparser
-
-DEFAULT_BOOKINFO = dcparser.BookInfo(
-        { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
-        { 
-          DCNS('creator.expert'): [u'Some, Author'],
-          DCNS('creator.scenario'): [u'Some, Author'],
-          DCNS('creator.textbook'): [u'Some, Author'],
-          DCNS('title'): [u'Some Title'],
-          DCNS('subject.period'): [u'Unknown'],
-          DCNS('subject.type'): [u'Unknown'],
-          DCNS('subject.genre'): [u'Unknown'],
-          DCNS('date'): ['1970-01-01'],
-          DCNS('language'): [u'pol'],
-          # DCNS('date'): [creation_date],
-          DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
-          DCNS('description'):
-          [u"""Publikacja zrealizowana w ramach projektu
-             Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
-             wykonana przez Bibliotekę Narodową z egzemplarza
-             pochodzącego ze zbiorów BN."""],
-          DCNS('identifier.url'): [WLURI.example],
-          DCNS('rights'):
-            [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })
+def get_default_bookinfo():
+    import dcparser
+    dcparser.BookInfo(
+        {RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
+        {
+            DCNS('creator.expert'): [u'Some, Author'],
+            DCNS('creator.scenario'): [u'Some, Author'],
+            DCNS('creator.textbook'): [u'Some, Author'],
+            DCNS('title'): [u'Some Title'],
+            DCNS('subject.period'): [u'Unknown'],
+            DCNS('subject.type'): [u'Unknown'],
+            DCNS('subject.genre'): [u'Unknown'],
+            DCNS('date'): ['1970-01-01'],
+            DCNS('language'): [u'pol'],
+            # DCNS('date'): [creation_date],
+            DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
+            DCNS('description'):
+                [u"""Publikacja zrealizowana w ramach projektu
+                 Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
+                 wykonana przez Bibliotekę Narodową z egzemplarza
+                 pochodzącego ze zbiorów BN."""],
+            DCNS('identifier.url'): [WLURI.example],
+            DCNS('rights'):
+                [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"],
+        })
+
+DEFAULT_BOOKINFO = get_default_bookinfo()
+
  
  def xinclude_forURI(uri):
      e = etree.Element(XINS("include"))
      e.set("href", uri)
      return etree.tostring(e, encoding=unicode)
  
+
  def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
      """Wrap the text within the minimal XML structure with a DC template."""
      bookinfo.created_at = creation_date
  
-    dcstring = etree.tostring(bookinfo.to_etree(), \
-        method='xml', encoding=unicode, pretty_print=True)
+    dcstring = etree.tostring(bookinfo.to_etree(), encoding=unicode, pretty_print=True)
  
      return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
          u'\n</plain-text>\n</utwor>'
@@ -194,8 +205,7 @@ def serialize_raw(element):
      b = u'' + (element.text or '')
  
      for child in element.iterchildren():
-        e = etree.tostring(child, method='xml', encoding=unicode,
-                pretty_print=True)
+        e = etree.tostring(child, encoding=unicode, pretty_print=True)
          b += e
  
      return b
@@ -204,9 +214,11 @@ SERIALIZERS = {
      'raw': serialize_raw,
  }
  
+
  def serialize_children(element, format='raw'):
      return SERIALIZERS[format](element)
  
+
  def get_resource(path):
      return os.path.join(os.path.dirname(__file__), path)
  
diff --git a/librarian/book2anything.py b/librarian/book2anything.py

index b60cd0f..c8726c6 100644 (file)
--- a/librarian/book2anything.py
+++ b/librarian/book2anything.py
@@ -4,7 +4,6 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from collections import namedtuple
  import os.path
  import optparse
  
@@ -30,19 +29,18 @@ class Option(object):
  
  class Book2Anything(object):
      """A class for creating book2... scripts.
-    
+
      Subclass it for any format you want to convert to.
      """
-    format_name = None # Set format name, like "PDF".
-    ext = None # Set file extension, like "pdf".
-    uses_cover = False # Can it add a cover?
-    cover_optional = True # Only relevant if uses_cover
-    uses_provider = False # Does it need a DocProvider?
-    transform = None # Transform method. Uses WLDocument.as_{ext} by default.
-    parser_options = [] # List of Option objects for additional parser args.
-    transform_options = [] # List of Option objects for additional transform args.
-    transform_flags = [] # List of Option objects for supported transform flags.
-
+    format_name = None  # Set format name, like "PDF".
+    ext = None  # Set file extension, like "pdf".
+    uses_cover = False  # Can it add a cover?
+    cover_optional = True  # Only relevant if uses_cover
+    uses_provider = False  # Does it need a DocProvider?
+    transform = None  # Transform method. Uses WLDocument.as_{ext} by default.
+    parser_options = []  # List of Option objects for additional parser args.
+    transform_options = []  # List of Option objects for additional transform args.
+    transform_flags = []  # List of Option objects for supported transform flags.
  
      @classmethod
      def run(cls):
@@ -52,27 +50,33 @@ class Book2Anything(object):
  
          parser = optparse.OptionParser(usage=usage)
  
-        parser.add_option('-v', '--verbose', 
-                action='store_true', dest='verbose', default=False,
-                help='print status messages to stdout')
-        parser.add_option('-d', '--make-dir',
-                action='store_true', dest='make_dir', default=False,
-                help='create a directory for author and put the output file in it')
-        parser.add_option('-o', '--output-file',
-                dest='output_file', metavar='FILE',
-                help='specifies the output file')
-        parser.add_option('-O', '--output-dir',
-                dest='output_dir', metavar='DIR',
-                help='specifies the directory for output')
+        parser.add_option(
+            '-v', '--verbose',
+            action='store_true', dest='verbose', default=False,
+            help='print status messages to stdout')
+        parser.add_option(
+            '-d', '--make-dir',
+            action='store_true', dest='make_dir', default=False,
+            help='create a directory for author and put the output file in it')
+        parser.add_option(
+            '-o', '--output-file',
+            dest='output_file', metavar='FILE',
+            help='specifies the output file')
+        parser.add_option(
+            '-O', '--output-dir',
+            dest='output_dir', metavar='DIR',
+            help='specifies the directory for output')
          if cls.uses_cover:
              if cls.cover_optional:
-                parser.add_option('-c', '--with-cover', 
-                        action='store_true', dest='with_cover', default=False,
-                        help='create default cover')
-            parser.add_option('-C', '--image-cache',
-                    dest='image_cache', metavar='URL',
-                    help='prefix for image download cache' +
-                        (' (implies --with-cover)' if cls.cover_optional else ''))
+                parser.add_option(
+                    '-c', '--with-cover',
+                    action='store_true', dest='with_cover', default=False,
+                    help='create default cover')
+            parser.add_option(
+                '-C', '--image-cache',
+                dest='image_cache', metavar='URL',
+                help='prefix for image download cache' +
+                     (' (implies --with-cover)' if cls.cover_optional else ''))
          for option in cls.parser_options + cls.transform_options + cls.transform_flags:
              option.add(parser)
  
@@ -80,7 +84,7 @@ class Book2Anything(object):
  
          if len(input_filenames) < 1:
              parser.print_help()
-            return(1)
+            return 1
  
          # Prepare additional args for parser.
          parser_args = {}
@@ -91,8 +95,7 @@ class Book2Anything(object):
          for option in cls.transform_options:
              transform_args[option.name()] = option.value(options)
          # Add flags to transform_args, if any.
-        transform_flags = [flag.name() for flag in cls.transform_flags
-                    if flag.value(options)]
+        transform_flags = [flag.name() for flag in cls.transform_flags if flag.value(options)]
          if transform_flags:
              transform_args['flags'] = transform_flags
          # Add cover support, if any.
@@ -105,35 +108,35 @@ class Book2Anything(object):
              elif not cls.cover_optional or options.with_cover:
                  transform_args['cover'] = WLCover
  
-
          # Do some real work
+        main_input = None
          try:
              for main_input in input_filenames:
                  if options.verbose:
                      print main_input
  
-            # Where to find input?
-            if cls.uses_provider:
-                path, fname = os.path.realpath(main_input).rsplit('/', 1)
-                provider = DirDocProvider(path)
-            else:
-                provider = None
-
-            # Where to write output?
-            if not (options.output_file or options.output_dir):
-                output_file = os.path.splitext(main_input)[0] + '.' + cls.ext
-            else:
-                output_file = None
-
-            # Do the transformation.
-            doc = WLDocument.from_file(main_input, provider=provider, **parser_args)
-            transform = cls.transform
-            if transform is None:
-                transform = getattr(WLDocument, 'as_%s' % cls.ext)
-            output = transform(doc, **transform_args)
-
-            doc.save_output_file(output,
-                output_file, options.output_dir, options.make_dir, cls.ext)
+                # Where to find input?
+                if cls.uses_provider:
+                    path, fname = os.path.realpath(main_input).rsplit('/', 1)
+                    provider = DirDocProvider(path)
+                else:
+                    provider = None
+
+                # Where to write output?
+                if not (options.output_file or options.output_dir):
+                    output_file = os.path.splitext(main_input)[0] + '.' + cls.ext
+                else:
+                    output_file = None
+
+                # Do the transformation.
+                doc = WLDocument.from_file(main_input, provider=provider, **parser_args)
+                transform = cls.transform
+                if transform is None:
+                    transform = getattr(WLDocument, 'as_%s' % cls.ext)
+                output = transform(doc, **transform_args)
+
+                doc.save_output_file(
+                    output, output_file, options.output_dir, options.make_dir, cls.ext)
  
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
diff --git a/librarian/cover.py b/librarian/cover.py

index dc64a9c..2320f19 100644 (file)
--- a/librarian/cover.py
+++ b/librarian/cover.py
@@ -113,12 +113,12 @@ class Cover(object):
      exts = {
          'JPEG': 'jpg',
          'PNG': 'png',
-        }
+    }
  
      mime_types = {
          'JPEG': 'image/jpeg',
          'PNG': 'image/png',
-        }
+    }
  
      def __init__(self, book_info, format=None):
          try:
@@ -154,24 +154,22 @@ class Cover(object):
          top = self.author_top
          tbox = TextBox(
              self.width - self.author_margin_left - self.author_margin_right,
-            self.height - top,
-            )
+            self.height - top)
          author_font = self.author_font or ImageFont.truetype(
              get_resource('fonts/DejaVuSerif.ttf'), 30)
          tbox.text(self.pretty_author(), self.author_color, author_font,
-            self.author_lineskip, self.author_shadow)
+                  self.author_lineskip, self.author_shadow)
          text_img = tbox.image()
          img.paste(text_img, (self.author_margin_left, top), text_img)
  
          top += text_img.size[1] + self.title_top
          tbox = TextBox(
              self.width - self.title_margin_left - self.title_margin_right,
-            self.height - top,
-            )
+            self.height - top)
          title_font = self.author_font or ImageFont.truetype(
              get_resource('fonts/DejaVuSerif.ttf'), 40)
          tbox.text(self.pretty_title(), self.title_color, title_font,
-            self.title_lineskip, self.title_shadow)
+                  self.title_lineskip, self.title_shadow)
          text_img = tbox.image()
          img.paste(text_img, (self.title_margin_left, top), text_img)
  
diff --git a/librarian/dcparser.py b/librarian/dcparser.py

index afcefa0..747ac86 100644 (file)
--- a/librarian/dcparser.py
+++ b/librarian/dcparser.py
@@ -10,7 +10,7 @@ import time
  from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
                         WLURI)
  
-import lxml.etree as etree # ElementTree API using libxml2
+import lxml.etree as etree  # ElementTree API using libxml2
  from lxml.etree import XMLSyntaxError
  
  
@@ -25,7 +25,7 @@ class Person(object):
  
      @classmethod
      def from_text(cls, text):
-        parts = [ token.strip() for token in text.split(',') ]
+        parts = [token.strip() for token in text.split(',')]
          if len(parts) == 1:
              surname = parts[0]
              names = []
@@ -36,7 +36,7 @@ class Person(object):
              if len(parts[1]) == 0:
                  # there is no non-whitespace data after the comma
                  raise ValueError("Found a comma, but no names given: \"%s\" -> %r." % (text, parts))
-            names = [ name for name in parts[1].split() if len(name) ] # all non-whitespace tokens
+            names = [name for name in parts[1].split() if len(name)]  # all non-whitespace tokens
          return cls(surname, *names)
  
      def readable(self):
@@ -60,6 +60,7 @@ class Person(object):
      def __repr__(self):
          return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names)
  
+
  def as_date(text):
      try:
          try:
@@ -70,18 +71,22 @@ def as_date(text):
      except ValueError, e:
          raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
  
+
  def as_person(text):
      return Person.from_text(text)
  
+
  def as_unicode(text):
      if isinstance(text, unicode):
          return text
      else:
          return text.decode('utf-8')
  
+
  def as_wluri_strict(text):
      return WLURI.strict(text)
  
+
  class Field(object):
      def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs):
          self.uri = uri
@@ -91,7 +96,7 @@ class Field(object):
          self.multiple = multiple
          self.salias = salias
  
-        self.required = kwargs.get('required', True) and not kwargs.has_key('default')
+        self.required = kwargs.get('required', True) and 'default' not in kwargs
          self.default = kwargs.get('default', [] if multiple else [None])
  
      def validate_value(self, val, strict=False):
@@ -104,7 +109,7 @@ class Field(object):
              if self.multiple:
                  if validator is None:
                      return val
-                return [ validator(v) if v is not None else v for v in val ]
+                return [validator(v) if v is not None else v for v in val]
              elif len(val) > 1:
                  raise ValidationError("Multiple values not allowed for field '%s'" % self.uri)
              elif len(val) == 0:
@@ -119,7 +124,7 @@ class Field(object):
      def validate(self, fdict, fallbacks=None, strict=False):
          if fallbacks is None:
              fallbacks = {}
-        if not fdict.has_key(self.uri):
+        if self.uri not in fdict:
              if not self.required:
                  # Accept single value for single fields and saliases.
                  if self.name in fallbacks:
@@ -145,7 +150,7 @@ class Field(object):
  
  
  class DCInfo(type):
-    def __new__(meta, classname, bases, class_dict):
+    def __new__(mcs, classname, bases, class_dict):
          fields = list(class_dict['FIELDS'])
  
          for base in bases[::-1]:
@@ -157,41 +162,41 @@ class DCInfo(type):
                          fields.insert(0, field)
  
          class_dict['FIELDS'] = tuple(fields)
-        return super(DCInfo, meta).__new__(meta, classname, bases, class_dict)
+        return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict)
  
  
  class WorkInfo(object):
      __metaclass__ = DCInfo
  
      FIELDS = (
-        Field( DCNS('creator.expert'), 'authors_expert', as_person, salias='author', required=False, multiple=True),
-        Field( DCNS('creator.methodologist'), 'authors_methodologist', as_person, salias='author', required=False, multiple=True),
-        Field( DCNS('creator.scenario'), 'authors_scenario', as_person, salias='author', required=False, multiple=True),
-        Field( DCNS('creator.textbook'), 'authors_textbook', as_person, salias='author', required=False, multiple=True),
-        Field( DCNS('requires'), 'requires', required=False, multiple=True),
-        Field( DCNS('title'), 'title'),
-        Field( DCNS('type'), 'type', required=False),
-
-        Field( DCNS('contributor.editor'), 'editors', \
-            as_person, salias='editor', multiple=True, default=[]),
-        Field( DCNS('contributor.technical_editor'), 'technical_editors',
-            as_person, salias='technical_editor', multiple=True, default=[]),
-
-        Field( DCNS('date'), 'created_at', as_date),
-        Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
-        Field( DCNS('publisher'), 'publisher'),
-
-        Field( DCNS('subject.competence'), 'competences', multiple=True, required=False),
-        Field( DCNS('subject.curriculum'), 'curriculum', multiple=True, required=False),
-
-        Field( DCNS('language'), 'language'),
-        Field( DCNS('description'), 'description', required=False),
-
-        Field( DCNS('source'), 'source_name', required=False),
-        Field( DCNS('source.URL'), 'source_url', required=False),
-        Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
-        Field( DCNS('rights.license'), 'license', required=False),
-        Field( DCNS('rights'), 'license_description'),
+        Field(DCNS('creator.expert'), 'authors_expert', as_person, salias='author', required=False, multiple=True),
+        Field(DCNS('creator.methodologist'), 'authors_methodologist', as_person, salias='author', required=False,
+              multiple=True),
+        Field(DCNS('creator.scenario'), 'authors_scenario', as_person, salias='author', required=False, multiple=True),
+        Field(DCNS('creator.textbook'), 'authors_textbook', as_person, salias='author', required=False, multiple=True),
+        Field(DCNS('requires'), 'requires', required=False, multiple=True),
+        Field(DCNS('title'), 'title'),
+        Field(DCNS('type'), 'type', required=False),
+
+        Field(DCNS('contributor.editor'), 'editors', as_person, salias='editor', multiple=True, default=[]),
+        Field(DCNS('contributor.technical_editor'), 'technical_editors', as_person, salias='technical_editor',
+              multiple=True, default=[]),
+
+        Field(DCNS('date'), 'created_at', as_date),
+        Field(DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
+        Field(DCNS('publisher'), 'publisher'),
+
+        Field(DCNS('subject.competence'), 'competences', multiple=True, required=False),
+        Field(DCNS('subject.curriculum'), 'curriculum', multiple=True, required=False),
+
+        Field(DCNS('language'), 'language'),
+        Field(DCNS('description'), 'description', required=False),
+
+        Field(DCNS('source'), 'source_name', required=False),
+        Field(DCNS('source.URL'), 'source_url', required=False),
+        Field(DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
+        Field(DCNS('rights.license'), 'license', required=False),
+        Field(DCNS('rights'), 'license_description'),
      )
  
      @classmethod
@@ -203,8 +208,8 @@ class WorkInfo(object):
      def from_file(cls, xmlfile, *args, **kwargs):
          desc_tag = None
          try:
-            iter = etree.iterparse(xmlfile, ['start', 'end'])
-            for (event, element) in iter:
+            elements = etree.iterparse(xmlfile, ['start', 'end'])
+            for (event, element) in elements:
                  if element.tag == RDFNS('RDF') and event == 'start':
                      desc_tag = element
                      break
@@ -214,7 +219,7 @@ class WorkInfo(object):
                      Check if there are rdf:RDF and rdf:Description tags.")
  
              # continue 'till the end of RDF section
-            for (event, element) in iter:
+            for (event, element) in elements:
                  if element.tag == RDFNS('RDF') and event == 'end':
                      break
  
@@ -252,13 +257,13 @@ class WorkInfo(object):
          self.fmap = {}
  
          for field in self.FIELDS:
-            value = field.validate(dc_fields, fallbacks=fallbacks,
-                            strict=strict)
+            value = field.validate(dc_fields, fallbacks=fallbacks, strict=strict)
              if field.multiple:
                  value = getattr(self, 'prop_' + field.name, []) + value
              setattr(self, 'prop_' + field.name, value)
              self.fmap[field.name] = field
-            if field.salias: self.fmap[field.salias] = field
+            if field.salias:
+                self.fmap[field.salias] = field
  
      def __getattribute__(self, name):
          try:
@@ -266,7 +271,8 @@ class WorkInfo(object):
              value = object.__getattribute__(self, 'prop_'+field.name)
              if field.name == name:
                  return value
-            else: # singular alias
+            else:
+                # singular alias
                  if not field.multiple:
                      raise "OUCH!! for field %s" % name
  
@@ -279,7 +285,8 @@ class WorkInfo(object):
              field = object.__getattribute__(self, 'fmap')[name]
              if field.name == name:
                  object.__setattr__(self, 'prop_'+field.name, newvalue)
-            else: # singular alias
+            else:
+                # singular alias
                  if not field.multiple:
                      raise "OUCH! while setting field %s" % name
  
@@ -291,13 +298,13 @@ class WorkInfo(object):
          """Update using field_dict. Verify correctness, but don't check if all
          required fields are present."""
          for field in self.FIELDS:
-            if field_dict.has_key(field.name):
+            if field.name in field_dict:
                  setattr(self, field.name, field_dict[field.name])
  
-    def to_etree(self, parent = None):
+    def to_etree(self, parent=None):
          """XML representation of this object."""
-        #etree._namespace_map[str(self.RDF)] = 'rdf'
-        #etree._namespace_map[str(self.DC)] = 'dc'
+        # etree._namespace_map[str(self.RDF)] = 'rdf'
+        # etree._namespace_map[str(self.DC)] = 'dc'
  
          if parent is None:
              root = etree.Element(RDFNS('RDF'))
@@ -313,7 +320,8 @@ class WorkInfo(object):
              v = getattr(self, field.name, None)
              if v is not None:
                  if field.multiple:
-                    if len(v) == 0: continue
+                    if len(v) == 0:
+                        continue
                      for x in v:
                          e = etree.Element(field.uri)
                          if x is not None:
@@ -327,16 +335,16 @@ class WorkInfo(object):
          return root
  
      def serialize(self):
-        rdf = {}
-        rdf['about'] = { 'uri': RDFNS('about'), 'value': self.about }
+        rdf = {'about': {'uri': RDFNS('about'), 'value': self.about}}
  
          dc = {}
          for field in self.FIELDS:
              v = getattr(self, field.name, None)
              if v is not None:
                  if field.multiple:
-                    if len(v) == 0: continue
-                    v = [ unicode(x) for x in v if x is not None ]
+                    if len(v) == 0:
+                        continue
+                    v = [unicode(x) for x in v if x is not None]
                  else:
                      v = unicode(v)
  
@@ -351,43 +359,38 @@ class WorkInfo(object):
  
              if v is not None:
                  if field.multiple:
-                    if len(v) == 0: continue
-                    v = [ unicode(x) for x in v if x is not None ]
+                    if len(v) == 0:
+                        continue
+                    v = [unicode(x) for x in v if x is not None]
                  else:
                      v = unicode(v)
                  result[field.name] = v
  
              if field.salias:
                  v = getattr(self, field.salias)
-                if v is not None: result[field.salias] = unicode(v)
+                if v is not None:
+                    result[field.salias] = unicode(v)
  
          return result
  
  
  class BookInfo(WorkInfo):
      FIELDS = (
-        Field( DCNS('audience'), 'audiences', salias='audience', multiple=True,
-                required=False),
-
-        Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True,
-                required=False),
-        Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True,
-                required=False),
-        Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
-                required=False),
-
-        Field( DCNS('contributor.translator'), 'translators', \
-            as_person,  salias='translator', multiple=True, default=[]),
-        Field( DCNS('relation.hasPart'), 'parts',
-            WLURI, strict=as_wluri_strict, multiple=True, required=False),
-        Field( DCNS('relation.isVariantOf'), 'variant_of',
-            WLURI, strict=as_wluri_strict, required=False),
-        Field( DCNS('relation'), 'relations',
-            WLURI, strict=as_wluri_strict, multiple=True, required=False),
-
-        Field( DCNS('relation.coverImage.url'), 'cover_url', required=False),
-        Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
-        Field( DCNS('relation.coverImage.source'), 'cover_source', required=False),
+        Field(DCNS('audience'), 'audiences', salias='audience', multiple=True, required=False),
+
+        Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, required=False),
+        Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True, required=False),
+        Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, required=False),
+
+        Field(DCNS('contributor.translator'), 'translators', as_person,  salias='translator', multiple=True,
+              default=[]),
+        Field(DCNS('relation.hasPart'), 'parts', WLURI, strict=as_wluri_strict, multiple=True, required=False),
+        Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI, strict=as_wluri_strict, required=False),
+        Field(DCNS('relation'), 'relations', WLURI, strict=as_wluri_strict, multiple=True, required=False),
+
+        Field(DCNS('relation.coverImage.url'), 'cover_url', required=False),
+        Field(DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
+        Field(DCNS('relation.coverImage.source'), 'cover_source', required=False),
      )
  
  
diff --git a/librarian/epub.py b/librarian/epub.py

index 8141eea..01f5c92 100644 (file)
--- a/librarian/epub.py
+++ b/librarian/epub.py
@@ -33,6 +33,7 @@ def inner_xml(node):
      nt = node.text if node.text is not None else ''
      return ''.join([nt] + [etree.tostring(child) for child in node])
  
+
  def set_inner_xml(node, text):
      """ sets node's text and children from a string
  
@@ -121,7 +122,7 @@ class Stanza(object):
      >>> print etree.tostring(s)
      <strofa><wers_normalny>a</wers_normalny><wers_normalny>b<x>x/
      y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
-    
+
      """
      def __init__(self, stanza_elem):
          self.stanza = stanza_elem
@@ -194,7 +195,7 @@ def add_to_manifest(manifest, partno):
  def add_to_spine(spine, partno):
      """ Adds a node to the spine section in content.opf file """
  
-    e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno});
+    e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
      spine.append(e)
  
  
@@ -286,7 +287,7 @@ def chop(main_text):
      # prepare a container for each chunk
      part_xml = etree.Element('utwor')
      etree.SubElement(part_xml, 'master')
-    main_xml_part = part_xml[0] # master
+    main_xml_part = part_xml[0]  # master
  
      last_node_part = False
      for one_part in main_text:
@@ -304,8 +305,10 @@ def chop(main_text):
      yield part_xml
  
  
-def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
+def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=None):
      """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
+    if _empty_html_static is None:
+        _empty_html_static = []
  
      toc = TOC()
      for element in chunk_xml[0]:
@@ -351,8 +354,7 @@ def transform(wldoc, verbose=False,
              # write book title page
              html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
              chars = used_chars(html_tree.getroot())
-            zip.writestr('OPS/title.html',
-                 etree.tostring(html_tree, method="html", pretty_print=True))
+            zip.writestr('OPS/title.html', etree.tostring(html_tree, method="html", pretty_print=True))
              # add a title page TOC entry
              toc.add(u"Strona tytułowa", "title.html")
          elif wldoc.book_info.parts:
@@ -403,7 +405,6 @@ def transform(wldoc, verbose=False,
  
          return toc, chunk_counter, chars, sample
  
-
      document = deepcopy(wldoc)
      del wldoc
  
@@ -429,11 +430,12 @@ def transform(wldoc, verbose=False,
      mime.compress_type = zipfile.ZIP_STORED
      mime.extra = ''
      zip.writestr(mime, 'application/epub+zip')
-    zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
-                       'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
-                       '<rootfiles><rootfile full-path="OPS/content.opf" ' \
-                       'media-type="application/oebps-package+xml" />' \
-                       '</rootfiles></container>')
+    zip.writestr(
+        'META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
+        'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
+        '<rootfiles><rootfile full-path="OPS/content.opf" '
+        'media-type="application/oebps-package+xml" />'
+        '</rootfiles></container>')
      zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png'))
      zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png'))
      if not style:
@@ -467,14 +469,14 @@ def transform(wldoc, verbose=False,
          opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
          guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
  
-
      annotations = etree.Element('annotations')
  
-    toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
-                               '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
-                               '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
-                               'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
-                               '</navMap></ncx>')
+    toc_file = etree.fromstring(
+        '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
+        '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
+        '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
+        'version="2005-1"><head></head><docTitle></docTitle><navMap>'
+        '</navMap></ncx>')
      nav_map = toc_file[-1]
  
      if html_toc:
@@ -512,7 +514,7 @@ def transform(wldoc, verbose=False,
      zip.writestr('OPS/last.html', etree.tostring(
                          html_tree, method="html", pretty_print=True))
  
-    if not flags or not 'without-fonts' in flags:
+    if not flags or 'without-fonts' not in flags:
          # strip fonts
          tmpdir = mkdtemp('-librarian-epub')
          try:
diff --git a/librarian/fb2.py b/librarian/fb2.py

index 1e110f5..bc1504d 100644 (file)
--- a/librarian/fb2.py
+++ b/librarian/fb2.py
@@ -17,10 +17,11 @@ functions.reg_person_name()
  
  def sectionify(tree):
      """Finds section headers and adds a tree of _section tags."""
-    sections = ['naglowek_czesc',
-            'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena',
-            'naglowek_podrozdzial']
-    section_level = dict((v,k) for (k,v) in enumerate(sections))
+    sections = [
+        'naglowek_czesc',
+        'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena',
+        'naglowek_podrozdzial']
+    section_level = {v: k for (k, v) in enumerate(sections)}
  
      # We can assume there are just subelements an no text at section level.
      for level, section_name in reversed(list(enumerate(sections))):
diff --git a/librarian/functions.py b/librarian/functions.py

index bd05ff4..7eb9d56 100644 (file)
--- a/librarian/functions.py
+++ b/librarian/functions.py
@@ -8,6 +8,7 @@ import re
  
  from librarian.dcparser import Person
  
+
  def _register_function(f):
      """ Register extension function with lxml """
      ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
@@ -15,13 +16,14 @@ def _register_function(f):
  
  
  ENTITY_SUBSTITUTIONS = [
-       (u'---', u'—'),
-       (u'--', u'–'),
-       (u'...', u'…'),
-       (u',,', u'„'),
-       (u'"', u'”'),
+    (u'---', u'—'),
+    (u'--', u'–'),
+    (u'...', u'…'),
+    (u',,', u'„'),
+    (u'"', u'”'),
  ]
  
+
  def substitute_entities(text):
      """XPath extension function converting all entites in passed text."""
      if isinstance(text, list):
diff --git a/librarian/html.py b/librarian/html.py

index 985970a..848935a 100644 (file)
--- a/librarian/html.py
+++ b/librarian/html.py
@@ -22,12 +22,15 @@ STYLESHEETS = {
      'partial': 'xslt/wl2html_partial.xslt'
  }
  
+
  def get_stylesheet(name):
      return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
  
+
  def html_has_content(text):
      return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
  
+
  def transform(wldoc, stylesheet='legacy', options=None, flags=None):
      """Transforms the WL document to XHTML.
  
@@ -53,14 +56,14 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None):
          if not options:
              options = {}
          result = document.transform(style, **options)
-        del document # no longer needed large object :)
+        del document  # no longer needed large object :)
  
          if html_has_content(result):
              add_anchors(result.getroot())
              add_table_of_contents(result.getroot())
  
-            return IOFile.from_string(etree.tostring(result, method='html',
-                xml_declaration=False, pretty_print=True, encoding='utf-8'))
+            return IOFile.from_string(
+                etree.tostring(result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8'))
          else:
              return None
      except KeyError:
@@ -68,6 +71,7 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None):
      except (XMLSyntaxError, XSLTApplyError), e:
          raise ParseError(e)
  
+
  class Fragment(object):
      def __init__(self, id, themes):
          super(Fragment, self).__init__()
@@ -96,7 +100,8 @@ class Fragment(object):
          result = []
          for event, element in self.closed_events():
              if event == 'start':
-                result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
+                result.append(u'<%s %s>' % (
+                    element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
                  if element.text:
                      result.append(element.text)
              elif event == 'end':
@@ -126,7 +131,8 @@ def extract_fragments(input_filename):
      for event, element in etree.iterparse(buf, events=('start', 'end')):
          # Process begin and end elements
          if element.get('class', '') in ('theme-begin', 'theme-end'):
-            if not event == 'end': continue # Process elements only once, on end event
+            if not event == 'end':
+                continue  # Process elements only once, on end event
  
              # Open new fragment
              if element.get('class', '') == 'theme-begin':
@@ -159,11 +165,10 @@ def extract_fragments(input_filename):
                  for fragment_id in open_fragments:
                      open_fragments[fragment_id].append('text', element.tail)
  
-
          # Process all elements except begin and end
          else:
              # Omit annotation tags
-            if (len(element.get('name', '')) or 
+            if (len(element.get('name', '')) or
                      element.get('class', '') in ('annotation', 'anchor')):
                  if event == 'end' and element.tail:
                      for fragment_id in open_fragments:
@@ -206,10 +211,13 @@ def any_ancestor(element, test):
  
  def add_anchors(root):
      counter = 1
+
+    def is_side_text(e):
+        side_classes = ('note', 'motto', 'motto_podpis', 'dedication')
+        return e.get('class') in side_classes or e.get('id') == 'nota_red' or e.tag == 'blockquote'
+
      for element in root.iterdescendants():
-        if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
-        or e.get('id') == 'nota_red'
-        or e.tag == 'blockquote'):
+        if any_ancestor(element, is_side_text):
              continue
  
          if element.tag == 'p' and 'verse' in element.get('class', ''):
@@ -232,9 +240,13 @@ def raw_printable_text(element):
  def add_table_of_contents(root):
      sections = []
      counter = 1
+
+    def is_side_text(e):
+        return e.get('id') in ('footnotes', 'nota_red') or e.get('class') == 'person-list'
+
      for element in root.iterdescendants():
          if element.tag in ('h2', 'h3'):
-            if any_ancestor(element, lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)):
+            if any_ancestor(element, is_side_text):
                  continue
  
              element_text = raw_printable_text(element)
@@ -257,9 +269,9 @@ def add_table_of_contents(root):
  
          if len(subsections):
              subsection_list = etree.SubElement(section_element, 'ol')
-            for n, subsection, text, _ in subsections:
+            for n1, subsection, text1, _ in subsections:
                  subsection_element = etree.SubElement(subsection_list, 'li')
-                add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)
+                add_anchor(subsection_element, "s%d" % n1, with_target=False, link_text=text1)
  
      root.insert(0, toc)
  
@@ -276,4 +288,3 @@ def extract_annotations(html_path):
              text_str = etree.tostring(footnote, method='text', encoding='utf-8').strip()
              html_str = etree.tostring(footnote, method='html', encoding='utf-8')
              yield anchor, text_str, html_str
-
diff --git a/librarian/mobi.py b/librarian/mobi.py

index 9558452..104f1c0 100644 (file)
--- a/librarian/mobi.py
+++ b/librarian/mobi.py
@@ -9,7 +9,6 @@ import subprocess
  from tempfile import NamedTemporaryFile
  
  from librarian import IOFile
-from librarian.cover import WLCover
  from librarian import get_resource
  
  
@@ -28,8 +27,8 @@ def transform(wldoc, verbose=False,
      book_info = document.book_info
  
      # provide a cover by default
-    if not cover:
-        cover = WLCover
+    # if not cover:
+    #     cover = WLCover
      cover_file = NamedTemporaryFile(suffix='.png', delete=False)
      bound_cover = cover(book_info)
      bound_cover.save(cover_file)
@@ -43,8 +42,8 @@ def transform(wldoc, verbose=False,
      if not flags:
          flags = []
      flags = list(flags) + ['without-fonts']
-    epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True,
-            flags=flags, style=get_resource('mobi/style.css'))
+    epub = document.as_epub(
+        verbose=verbose, sample=sample, html_toc=True, flags=flags, style=get_resource('mobi/style.css'))
  
      if verbose:
          kwargs = {}
@@ -54,7 +53,8 @@ def transform(wldoc, verbose=False,
  
      output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False)
      output_file.close()
-    subprocess.check_call(['ebook-convert', epub.get_filename(), output_file.name,
-            '--no-inline-toc', '--cover=%s' % cover_file.name], **kwargs)
+    subprocess.check_call(
+        ['ebook-convert', epub.get_filename(), output_file.name, '--no-inline-toc', '--cover=%s' % cover_file.name],
+        **kwargs)
      os.unlink(cover_file.name)
-    return IOFile.from_filename(output_file.name)
-\ No newline at end of file
+    return IOFile.from_filename(output_file.name)
diff --git a/librarian/packagers.py b/librarian/packagers.py

index ddfd7c8..0dbb6e8 100644 (file)
--- a/librarian/packagers.py
+++ b/librarian/packagers.py
@@ -8,38 +8,38 @@ from copy import deepcopy
  from lxml import etree
  from librarian import pdf, epub, DirDocProvider, ParseError, cover
  from librarian.parser import WLDocument
+from librarian.styles.wolnelektury.partners import cover
  
  
  class Packager(object):
      cover = None
      flags = None
+    converter = NotImplemented
+    ext = NotImplemented
  
      @classmethod
-    def prepare_file(cls, main_input, output_dir, verbose=False):
+    def prepare_file(cls, main_input, output_dir):
          path, fname = os.path.realpath(main_input).rsplit('/', 1)
          provider = DirDocProvider(path)
          slug, ext = os.path.splitext(fname)
  
          if output_dir != '':
-            try:
+            if not os.path.isdir(output_dir):
                  os.makedirs(output_dir)
-            except:
-                pass
          outfile = os.path.join(output_dir, slug + '.' + cls.ext)
  
          doc = WLDocument.from_file(main_input, provider=provider)
-        output_file = cls.converter.transform(doc,
-                cover=cls.cover, flags=cls.flags)
+        output_file = cls.converter.transform(doc, cover=cls.cover, flags=cls.flags)
          doc.save_output_file(output_file, output_path=outfile)
  
-
      @classmethod
      def prepare(cls, input_filenames, output_dir='', verbose=False):
+        main_input = None
          try:
              for main_input in input_filenames:
                  if verbose:
                      print main_input
-                cls.prepare_file(main_input, output_dir, verbose)
+                cls.prepare_file(main_input, output_dir)
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
@@ -52,6 +52,7 @@ class EpubPackager(Packager):
      converter = epub
      ext = 'epub'
  
+
  class PdfPackager(Packager):
      converter = pdf
      ext = 'pdf'
@@ -60,16 +61,20 @@ class PdfPackager(Packager):
  class GandalfEpubPackager(EpubPackager):
      cover = cover.GandalfCover
  
+
  class GandalfPdfPackager(PdfPackager):
      cover = cover.GandalfCover
  
+
  class BookotekaEpubPackager(EpubPackager):
      cover = cover.BookotekaCover
  
+
  class PrestigioEpubPackager(EpubPackager):
      cover = cover.PrestigioCover
      flags = ('less-advertising',)
  
+
  class PrestigioPdfPackager(PdfPackager):
      cover = cover.PrestigioCover
      flags = ('less-advertising',)
@@ -107,6 +112,7 @@ class VirtualoPackager(Packager):
                  <language>PL</language>
              </product>""")
  
+        main_input = None
          try:
              for main_input in input_filenames:
                  if verbose:
@@ -133,17 +139,13 @@ class VirtualoPackager(Packager):
                  cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
                  outfile = os.path.join(outfile_dir, '1.epub')
                  outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
-                doc.save_output_file(doc.as_epub(),
-                        output_path=outfile)
-                doc.save_output_file(doc.as_epub(doc, sample=25), 
-                        output_path=outfile_sample)
+                doc.save_output_file(doc.as_epub(), output_path=outfile)
+                doc.save_output_file(doc.as_epub(doc, sample=25), output_path=outfile_sample)
                  outfile = os.path.join(outfile_dir, '1.mobi')
                  outfile_sample = os.path.join(outfile_dir, '1.sample.mobi')
-                doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover),
-                        output_path=outfile)
+                doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), output_path=outfile)
                  doc.save_output_file(
-                        doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), 
-                        output_path=outfile_sample)
+                    doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), output_path=outfile_sample)
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
diff --git a/librarian/parser.py b/librarian/parser.py

index 9300aa6..113fbbe 100644 (file)
--- a/librarian/parser.py
+++ b/librarian/parser.py
@@ -3,7 +3,7 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from librarian import ValidationError, NoDublinCore,  ParseError, NoProvider
+from librarian import ValidationError, NoDublinCore,  ParseError
  from librarian import RDFNS, IOFile
  from librarian import dcparser
  
@@ -15,11 +15,13 @@ import os
  import re
  from StringIO import StringIO
  
+
  class WLDocument(object):
      LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
      provider = None
  
      _edoc = None
+
      @property
      def edoc(self):
          if self._edoc is None:
@@ -28,13 +30,14 @@ class WLDocument(object):
                  data = data.decode('utf-8')
              data = data.replace(u'\ufeff', '')
              try:
-                parser = etree.XMLParser(remove_blank_text=False)
+                parser = etree.XMLParser()
                  self._edoc = etree.parse(StringIO(data.encode('utf-8')), parser)
              except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
                  raise ParseError(e)
          return self._edoc
  
      _rdf_elem = None
+
      @property
      def rdf_elem(self):
          if self._rdf_elem is None:
@@ -45,6 +48,7 @@ class WLDocument(object):
          return self._rdf_elem
  
      _book_info = None
+
      @property
      def book_info(self):
          if not self.parse_dublincore:
@@ -54,20 +58,19 @@ class WLDocument(object):
                      self.rdf_elem, fallbacks=self.meta_fallbacks, strict=self.strict)
          return self._book_info
  
-    def __init__(self, iofile, provider=None, 
-            parse_dublincore=True, # shouldn't it be in a subclass?
-            strict=False, # ?
-            meta_fallbacks=None # ?
-            ):
+    def __init__(self, iofile, provider=None, parse_dublincore=True,  # shouldn't it be in a subclass?
+                 strict=False,  # ?
+                 meta_fallbacks=None):  # ?
          self.source = iofile
          self.provider = provider
          self.parse_dublincore = parse_dublincore
          self.strict = strict
          self.meta_fallbacks = meta_fallbacks
-        if self.edoc.getroot().tag != 'utwor':
+        root_elem = self.edoc.getroot()
+        if root_elem.tag != 'utwor':
              raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
          if parse_dublincore:
-            self.book_info
+            self.book_info()
  
      @classmethod
      def from_string(cls, xml, *args, **kwargs):
@@ -78,7 +81,6 @@ class WLDocument(object):
          iofile = IOFile.from_filename(xmlfile)
          return cls(iofile, *args, **kwargs)
  
-
      def swap_endlines(self):
          """Converts line breaks in stanzas into <br/> tags."""
          # only swap inside stanzas
@@ -119,7 +121,7 @@ class WLDocument(object):
                  parts.append(part)
              else:
                  tag, n = match.groups()
-                parts.append("*[%d][name() = '%s']" % (int(n)+1, tag) )
+                parts.append("*[%d][name() = '%s']" % (int(n)+1, tag))
  
          if parts[0] == '.':
              parts[0] = ''
@@ -132,7 +134,7 @@ class WLDocument(object):
      def update_dc(self):
          if self.book_info:
              parent = self.rdf_elem.getparent()
-            parent.replace( self.rdf_elem, self.book_info.to_etree(parent) )
+            parent.replace(self.rdf_elem, self.book_info.to_etree(parent))
  
      def serialize(self):
          self.update_dc()
@@ -145,18 +147,19 @@ class WLDocument(object):
              try:
                  xpath = self.path_to_xpath(key)
                  node = self.edoc.xpath(xpath)[0]
-                repl = etree.fromstring(u"<%s>%s</%s>" %(node.tag, data, node.tag) )
+                repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
                  node.getparent().replace(node, repl)
              except Exception, e:
-                unmerged.append( repr( (key, xpath, e) ) )
+                # WTF xpath may be unused; also: too broad except
+                unmerged.append(repr((key, xpath, e)))
  
          return unmerged
  
      def clean_ed_note(self):
          """ deletes forbidden tags from nota_red """
  
-        for node in self.edoc.xpath('|'.join('//nota_red//%s' % tag for tag in
-                    ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))):
+        forbidden_tags = ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw')
+        for node in self.edoc.xpath('|'.join('//nota_red//%s' % tag for tag in forbidden_tags)):
              tail = node.tail
              node.clear()
              node.tag = 'span'
@@ -194,15 +197,12 @@ class WLDocument(object):
              cover_class = WLCover
          return cover_class(self.book_info, *args, **kwargs).output_file()
  
-    def save_output_file(self, output_file, output_path=None,
-            output_dir_path=None, make_author_dir=False, ext=None):
+    def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None):
          if output_dir_path:
              save_path = output_dir_path
              if make_author_dir:
-                save_path = os.path.join(save_path,
-                        unicode(self.book_info.author).encode('utf-8'))
-            save_path = os.path.join(save_path,
-                                self.book_info.uri.slug)
+                save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8'))
+            save_path = os.path.join(save_path, self.book_info.uri.slug)
              if ext:
                  save_path += '.%s' % ext
          else:
diff --git a/librarian/pdf.py b/librarian/pdf.py

index 7889a22..2f5c209 100644 (file)
--- a/librarian/pdf.py
+++ b/librarian/pdf.py
@@ -21,7 +21,6 @@ from subprocess import call, PIPE
  
  from Texml.processor import process
  from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
@@ -39,11 +38,12 @@ STYLESHEETS = {
      'wl2tex': 'pdf/wl2tex.xslt',
  }
  
+
  def insert_tags(doc, split_re, tagname, exclude=None):
      """ inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
  
-    >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>');
-    >>> insert_tags(t, re.compile('-'), 'd');
+    >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>')
+    >>> insert_tags(t, re.compile('-'), 'd')
      >>> print etree.tostring(t)
      <a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
      """
@@ -87,10 +87,14 @@ def fix_hanging(doc):
  
  def move_motifs_inside(doc):
      """ moves motifs to be into block elements """
-    for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'):
+    main_tags = ('powiesc', 'opowiadanie', 'liryka_l', 'liryka_lp',
+                 'dramat_wierszowany_l', 'dramat_wierszowany_lp', 'dramat_wspolczesny')
+    for master in doc.xpath('|'.join('//' + tag for tag in main_tags)):
          for motif in master.xpath('motyw'):
              for sib in motif.itersiblings():
-                if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia', 'begin', 'end', 'motyw', 'extra', 'uwaga'):
+                special_tags = ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia',
+                                'begin', 'end', 'motyw', 'extra', 'uwaga')
+                if sib.tag not in special_tags:
                      # motif shouldn't have a tail - it would be untagged text
                      motif.tail = None
                      motif.getparent().remove(motif)
@@ -136,9 +140,10 @@ def parse_creator(doc):
      Finds all dc:creator and dc.contributor.translator tags
      and adds *_parsed versions with forenames first.
      """
-    for person in doc.xpath("|".join('//dc:'+(tag) for tag in (
-                    'creator', 'contributor.translator')),
-                    namespaces = {'dc': str(DCNS)})[::-1]:
+    persons = doc.xpath(
+        "|".join('//dc:' + tag for tag in ('creator', 'contributor.translator')),
+        namespaces={'dc': str(DCNS)})[::-1]
+    for person in persons:
          if not person.text:
              continue
          p = Person.from_text(person.text)
@@ -193,8 +198,7 @@ def load_including_children(wldoc=None, provider=None, uri=None):
  
      text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
  
-    document = WLDocument.from_string(text,
-                parse_dublincore=True, provider=provider)
+    document = WLDocument.from_string(text, parse_dublincore=True, provider=provider)
      document.swap_endlines()
  
      for child_uri in document.book_info.parts:
@@ -246,8 +250,8 @@ class PDFFormat(Format):
          # Copy style
          shutil.copy(get_resource('pdf/wl.cls'), temp)
          shutil.copy(self.style, os.path.join(temp, 'style.sty'))
-        #for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
-        #    shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
+        # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
+        #     shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
  
          # Save attachments
          if self.cover:
@@ -263,13 +267,13 @@ class PDFFormat(Format):
              cwd = None
          os.chdir(temp)
  
+        p = None
          if self.verbose:
-            for i in range(self.tex_passes):
+            for i in xrange(self.tex_passes):
                  p = call(['xelatex', tex_path])
          else:
-            for i in range(self.tex_passes):
-                p = call(['xelatex', '-interaction=batchmode', tex_path],
-                            stdout=PIPE, stderr=PIPE)
+            for i in xrange(self.tex_passes):
+                p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
          if p:
              raise ParseError("Error parsing .tex file: %s" % tex_path)
  
diff --git a/librarian/picture.py b/librarian/picture.py

index ee3c61d..b665a34 100644 (file)
--- a/librarian/picture.py
+++ b/librarian/picture.py
@@ -1,5 +1,5 @@
-
-from dcparser import (as_person, as_date, Field, WorkInfo, DCNS)
+# -*- coding: utf-8 -*-
+from dcparser import Field, WorkInfo, DCNS
  from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
  from xml.parsers.expat import ExpatError
  from os import path
@@ -10,14 +10,14 @@ import re
  
  
  class WLPictureURI(WLURI):
-    _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
-            '(?P<slug>[-a-z0-9]+)/?$')
+    _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/(?P<slug>[-a-z0-9]+)/?$')
  
      @classmethod
      def from_slug(cls, slug):
          uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
          return cls(uri)
  
+
  def as_wlpictureuri_strict(text):
      return WLPictureURI.strict(text)
  
@@ -36,15 +36,15 @@ class PictureInfo(WorkInfo):
          Field(DCNS('description.medium'), 'medium', required=False),
          Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
          Field(DCNS('format'), 'mime_type', required=False),
-        Field(DCNS('identifier.url'), 'url', WLPictureURI,
-            strict=as_wlpictureuri_strict),
-        )
+        Field(DCNS('identifier.url'), 'url', WLPictureURI, strict=as_wlpictureuri_strict),
+    )
  
  
  class ImageStore(object):
-    EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
-            'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
-            'aiff', 'wbmp', 'xbm']
+    EXT = [
+        'gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
+        'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
+        'aiff', 'wbmp', 'xbm']
      MIME = ['image/gif', 'image/jpeg', 'image/png',
              'application/x-shockwave-flash', 'image/psd', 'image/bmp',
              'image/tiff', 'image/tiff', 'application/octet-stream',
@@ -53,7 +53,7 @@ class ImageStore(object):
  
      def __init__(self, dir_):
          self.dir = dir_
-        return super(ImageStore, self).__init__()
+        super(ImageStore, self).__init__()
  
      def path(self, slug, mime_type):
          """
@@ -94,20 +94,16 @@ class WLPicture(object):
          else:
              self.picture_info = None
  
-    @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        return cls.from_file(StringIO(xml), *args, **kwargs)
-
      @classmethod
      def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
  
          # first, prepare for parsing
          if isinstance(xmlfile, basestring):
-            file = open(xmlfile, 'rb')
+            xmlfile = open(xmlfile, 'rb')
              try:
-                data = file.read()
+                data = xmlfile.read()
              finally:
-                file.close()
+                xmlfile.close()
          else:
              data = xmlfile.read()
  
@@ -121,7 +117,7 @@ class WLPicture(object):
              image_store = ImageStore(path.dirname(xmlfile.name))
  
          try:
-            parser = etree.XMLParser(remove_blank_text=False)
+            parser = etree.XMLParser()
              tree = etree.parse(StringIO(data.encode('utf-8')), parser)
  
              return cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
@@ -152,14 +148,11 @@ class WLPicture(object):
          Iterates the parts of this picture and returns them and their metadata
          """
          for part in self.edoc.iter("div"):
-            pd = {}
-            pd['type'] = part.get('type')
+            pd = {'themes': [], 'object': None, 'type': part.get('type')}
              if pd['type'] == 'area':
                  pd['coords'] = ((int(part.get('x1')), int(part.get('y1'))),
                                  (int(part.get('x2')), int(part.get('y2'))))
  
-            pd['themes'] = []
-            pd['object'] = None
              parent = part
              while True:
                  parent = parent.getparent()
diff --git a/librarian/pyhtml.py b/librarian/pyhtml.py

index 16a2141..163d11c 100644 (file)
--- a/librarian/pyhtml.py
+++ b/librarian/pyhtml.py
@@ -251,7 +251,7 @@ class EduModule(Xmill):
                      subgen = EduModule(self.options)
                      definiens_s = subgen.generate(definiens)
              else:
-                print "!! Missing definiendum in source: '%s'" % element.text
+                print ("!! Missing definiendum in source: '%s'" % element.text).encode('utf-8')
  
          return u"<dt id='%s'>" % self.naglowek_to_anchor(element), u"</dt>" + definiens_s
  
diff --git a/librarian/pypdf.py b/librarian/pypdf.py

index bb2881f..9851cb1 100644 (file)
--- a/librarian/pypdf.py
+++ b/librarian/pypdf.py
@@ -18,8 +18,7 @@ from urllib2 import urlopen
  
  from lxml import etree
  
-from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
-from librarian.dcparser import Person
+from xmlutils import Xmill, ifoption, tag_open_close
  from librarian import DCNS, get_resource, IOFile
  from librarian import functions
  from pdf import PDFFormat, substitute_hyphens, fix_hanging
@@ -33,7 +32,8 @@ def escape(really):
              prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
              postfix = u'</TeXML>'
              if isinstance(value, list):
-                import pdb; pdb.set_trace()
+                import pdb
+                pdb.set_trace()
              if isinstance(value, tuple):
                  return prefix + value[0], value[1] + postfix
              else:
@@ -87,16 +87,15 @@ class EduModule(Xmill):
          return values
  
      def handle_rdf__RDF(self, _):
-        "skip metadata in generation"
+        """skip metadata in generation"""
          return
  
      @escape(True)
      def get_rightsinfo(self, element):
          rights_lic = self.get_dc(element, 'rights.license', True)
-        return u'<cmd name="rightsinfostr">' + \
-          (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
-          u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
-          u'</cmd>'
+        return u'<cmd name="rightsinfostr">' + (rights_lic and u'<opt>%s</opt>' % rights_lic or '') + \
+            u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) + \
+            u'</cmd>'
  
      @escape(True)
      def get_authors(self, element, which=None):
@@ -116,31 +115,31 @@ class EduModule(Xmill):
      def handle_utwor(self, element):
          lines = [
              u'''
-    <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
-        <TeXML escape="0">
-        \\documentclass[%s]{wl}
-        \\usepackage{style}''' % self.options['customization_str'],
-    self.options['has_cover'] and '\usepackage{makecover}',
-    (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
-    (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
-    (self.options['morefloats'] == 'none' and
-     u'''\\IfFileExists{morefloats.sty}{
-            \\usepackage{morefloats}
-        }{}'''),
-    u'''\\def\\authors{%s}''' % self.get_authors(element),
-    u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
-    u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
-    u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
-    
-    u'''\\author{\\authors}''',
-    u'''\\title{%s}''' % self.get_title(element),
-    u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
-    u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
-    u'</TeXML>']
+                <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
+                <TeXML escape="0">
+                \\documentclass[%s]{wl}
+                \\usepackage{style}''' % self.options['customization_str'],
+            self.options['has_cover'] and '\usepackage{makecover}',
+            (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
+            (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
+            (self.options['morefloats'] == 'none' and
+                u'''\\IfFileExists{morefloats.sty}{
+                \\usepackage{morefloats}
+                }{}'''),
+            u'''\\def\\authors{%s}''' % self.get_authors(element),
+            u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
+            u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
+            u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
+
+            u'''\\author{\\authors}''',
+            u'''\\title{%s}''' % self.get_title(element),
+            u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
+            u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
+            u'</TeXML>'
+        ]
  
          return u"".join(filter(None, lines)), u'</TeXML>'
  
-
      @escape(1)
      def handle_powiesc(self, element):
          return u"""
@@ -154,45 +153,42 @@ class EduModule(Xmill):
          return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
  
      handle_akap = \
-    handle_akap = \
-    handle_akap_cd = \
-    handle_akap_cd = \
-    handle_akap_dialog = \
-    handle_akap_dialog = \
-    handle_autor_utworu = \
-    handle_dedykacja = \
-    handle_didaskalia = \
-    handle_didask_tekst = \
-    handle_dlugi_cytat = \
-    handle_dzielo_nadrzedne = \
-    handle_lista_osoba = \
-    handle_mat = \
-    handle_miejsce_czas = \
-    handle_motto = \
-    handle_motto_podpis = \
-    handle_naglowek_akt = \
-    handle_naglowek_czesc = \
-    handle_naglowek_listy = \
-    handle_naglowek_osoba = \
-    handle_naglowek_scena = \
-    handle_nazwa_utworu = \
-    handle_nota = \
-    handle_osoba = \
-    handle_pa = \
-    handle_pe = \
-    handle_podtytul = \
-    handle_poezja_cyt = \
-    handle_pr = \
-    handle_pt = \
-    handle_sekcja_asterysk = \
-    handle_sekcja_swiatlo = \
-    handle_separator_linia = \
-    handle_slowo_obce = \
-    handle_srodtytul = \
-    handle_tytul_dziela = \
-    handle_wyroznienie = \
-    handle_dywiz = \
-    handle_texcommand
+        handle_akap_cd = \
+        handle_akap_dialog = \
+        handle_autor_utworu = \
+        handle_dedykacja = \
+        handle_didaskalia = \
+        handle_didask_tekst = \
+        handle_dlugi_cytat = \
+        handle_dzielo_nadrzedne = \
+        handle_lista_osoba = \
+        handle_mat = \
+        handle_miejsce_czas = \
+        handle_motto = \
+        handle_motto_podpis = \
+        handle_naglowek_akt = \
+        handle_naglowek_czesc = \
+        handle_naglowek_listy = \
+        handle_naglowek_osoba = \
+        handle_naglowek_scena = \
+        handle_nazwa_utworu = \
+        handle_nota = \
+        handle_osoba = \
+        handle_pa = \
+        handle_pe = \
+        handle_podtytul = \
+        handle_poezja_cyt = \
+        handle_pr = \
+        handle_pt = \
+        handle_sekcja_asterysk = \
+        handle_sekcja_swiatlo = \
+        handle_separator_linia = \
+        handle_slowo_obce = \
+        handle_srodtytul = \
+        handle_tytul_dziela = \
+        handle_wyroznienie = \
+        handle_dywiz = \
+        handle_texcommand
  
      def handle_naglowek_rozdzial(self, element):
          if not self.options['teacher']:
@@ -220,6 +216,7 @@ class EduModule(Xmill):
  
      def handle_uwaga(self, _e):
          return None
+
      def handle_extra(self, _e):
          return None
  
@@ -247,13 +244,16 @@ class EduModule(Xmill):
              opis = ''
  
          n = element.xpath('wskazowki')
-        if n: wskazowki = submill.generate(n[0])
-
-        else: wskazowki = ''
+        if n:
+            wskazowki = submill.generate(n[0])
+        else:
+            wskazowki = ''
          n = element.xpath('pomoce')
  
-        if n: pomoce = submill.generate(n[0])
-        else: pomoce = ''
+        if n:
+            pomoce = submill.generate(n[0])
+        else:
+            pomoce = ''
  
          forma = ''.join(element.xpath('forma/text()'))
  
@@ -296,7 +296,7 @@ class EduModule(Xmill):
      def handle_forma(self, *_):
          return
  
-    def handle_lista(self, element, attrs={}):
+    def handle_lista(self, element, attrs=None):
          ltype = element.attrib.get('typ', 'punkt')
          if not element.findall("punkt"):
              if ltype == 'czytelnia':
@@ -309,13 +309,15 @@ class EduModule(Xmill):
                  # print '** missing src on <slowniczek>, setting default'
                  surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
              sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
-            self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
+            self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
  
-        listcmd = {'num': 'enumerate',
-               'punkt': 'itemize',
-               'alfa': 'itemize',
-               'slowniczek': 'itemize',
-               'czytelnia': 'itemize'}[ltype]
+        listcmd = {
+            'num': 'enumerate',
+            'punkt': 'itemize',
+            'alfa': 'itemize',
+            'slowniczek': 'itemize',
+            'czytelnia': 'itemize'
+        }[ltype]
  
          return u'<env name="%s">' % listcmd, u'</env>'
  
@@ -334,7 +336,7 @@ class EduModule(Xmill):
  
          typ = element.attrib['typ']
          self.exercise_counter += 1
-        if not typ in exercise_handlers:
+        if typ not in exercise_handlers:
              return '(no handler)'
          self.options = {'exercise_counter': self.exercise_counter}
          handler = exercise_handlers[typ](self.options, self.state)
@@ -376,14 +378,13 @@ class EduModule(Xmill):
                  max_col = len(ks)
          self.options = {'columnts': max_col}
          # styling:
-                #        has_frames = int(element.attrib.get("ramki", "0"))
-                #        if has_frames: frames_c = "framed"
-                #        else: frames_c = ""
-                #        return u"""<table class="%s">""" % frames_c, u"</table>"
+        #     has_frames = int(element.attrib.get("ramki", "0"))
+        #     if has_frames: frames_c = "framed"
+        #     else: frames_c = ""
+        #     return u"""<table class="%s">""" % frames_c, u"</table>"
          return u'''
  <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
-    ''' % ('l' * max_col), \
-    u'''<cmd name="end"><parm>tabular</parm></cmd>'''
+    ''' % ('l' * max_col), u'''<cmd name="end"><parm>tabular</parm></cmd>'''
  
      @escape(1)
      def handle_wiersz(self, element):
@@ -424,8 +425,7 @@ class EduModule(Xmill):
              print '!! unknown <video> url scheme:', url
              return
          name = m.group(1)
-        thumb = IOFile.from_string(urlopen
-            ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
+        thumb = IOFile.from_string(urlopen("http://img.youtube.com/vi/%s/0.jpg" % name).read())
          img_path = "video/%s.jpg" % name.replace("_", "")
          self.options['format'].attachments[img_path] = thumb
          canon_url = "https://www.youtube.com/watch?v=%s" % name
@@ -436,6 +436,7 @@ class Exercise(EduModule):
      def __init__(self, *args, **kw):
          self.question_counter = 0
          super(Exercise, self).__init__(*args, **kw)
+        self.piece_counter = None
  
      handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
  
@@ -457,7 +458,7 @@ class Exercise(EduModule):
          # Add a single <pytanie> tag if it's not there
          if not element.xpath(".//pytanie"):
              qpre, qpost = self.handle_pytanie(element)
-            pre = pre + qpre
+            pre += qpre
              post = qpost + post
          return pre, post
  
@@ -493,7 +494,6 @@ class Exercise(EduModule):
              return self.solution_header() + etree.tostring(par)
  
  
-
  class Wybor(Exercise):
      def handle_cwiczenie(self, element):
          pre, post = super(Wybor, self).handle_cwiczenie(element)
@@ -508,7 +508,8 @@ class Wybor(Exercise):
                  break
              choices = p.xpath(".//*[@nazwa]")
              uniq = set()
-            for n in choices: uniq.add(n.attrib.get('nazwa', ''))
+            for n in choices:
+                uniq.add(n.attrib.get('nazwa', ''))
              if len(choices) != len(uniq):
                  is_single_choice = False
                  break
@@ -608,18 +609,19 @@ class PrawdaFalsz(Exercise):
          return pre, post
  
  
-
  def fix_lists(tree):
      lists = tree.xpath(".//lista")
      for l in lists:
          if l.text:
              p = l.getprevious()
              if p is not None:
-                if p.tail is None: p.tail = ''
+                if p.tail is None:
+                    p.tail = ''
                  p.tail += l.text
              else:
                  p = l.getparent()
-                if p.text is None: p.text = ''
+                if p.text is None:
+                    p.text = ''
                  p.text += l.text
              l.text = ''
      return tree
@@ -652,4 +654,3 @@ class EduModulePDFFormat(PDFFormat):
  
      def get_image(self, name):
          return self.wldoc.source.attachments[name]
-
diff --git a/librarian/styles/wolnelektury/cover.py b/librarian/styles/wolnelektury/cover.py

index 8181890..ce85d12 100644 (file)
--- a/librarian/styles/wolnelektury/cover.py
+++ b/librarian/styles/wolnelektury/cover.py
@@ -101,20 +101,17 @@ class WLCover(Cover):
                   font=self.author_font,
                   line_height=self.author_lineskip,
                   color=self.author_color,
-                 shadow_color=self.author_shadow,
-                )
+                 shadow_color=self.author_shadow)
  
          box.skip(10)
-        box.draw.line((75, box.height, 275, box.height),
-                fill=self.author_color, width=2)
+        box.draw.line((75, box.height, 275, box.height), fill=self.author_color, width=2)
          box.skip(15)
  
          box.text(self.pretty_title(),
                   line_height=self.title_lineskip,
                   font=self.title_font,
                   color=epoch_color,
-                 shadow_color=self.title_shadow,
-                )
+                 shadow_color=self.title_shadow)
          box_img = box.image()
  
          if self.kind == 'Liryka':
@@ -127,9 +124,9 @@ class WLCover(Cover):
              # center
              box_top = (self.height - box_img.size[1]) / 2
  
-        box_left = self.bar_width + (self.width - self.bar_width -
-                        box_img.size[0]) / 2
-        draw.rectangle((box_left, box_top,
+        box_left = self.bar_width + (self.width - self.bar_width - box_img.size[0]) / 2
+        draw.rectangle((
+            box_left, box_top,
              box_left + box_img.size[0], box_top + box_img.size[1]),
              fill='#fff')
          img.paste(box_img, (box_left, box_top), box_img)
diff --git a/librarian/styles/wolnelektury/pdf.py b/librarian/styles/wolnelektury/pdf.py

deleted file mode 100644 (file)

index 6a43b0e..0000000
--- a/librarian/styles/wolnelektury/pdf.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import shutil
-from librarian import get_resource
-from librarian.pdf import PDFFormat
-from librarian.styles.wolnelektury.cover import WLCover
-
-class WLPDFFormat(PDFFormat):
-    cover_class = WLCover
-    style = get_resource('res/styles/wolnelektury/pdf/wolnelektury.sty')
-
-    def get_tex_dir(self):
-        temp = super(WLPDFFormat, self).get_tex_dir()
-        shutil.copy(get_resource('res/wl-logo.png'), temp)
-        return temp
diff --git a/librarian/text.py b/librarian/text.py

index 70f5f01..c86b3f8 100644 (file)
--- a/librarian/text.py
+++ b/librarian/text.py
@@ -29,6 +29,7 @@ Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowocz
  %(description)s%(contributors)s
  """
  
+
  def transform(wldoc, flags=None, **options):
      """
      Transforms input_file in XML to output_file in TXT.
@@ -53,21 +54,28 @@ def transform(wldoc, flags=None, **options):
              parsed_dc = document.book_info
              description = parsed_dc.description
              url = document.book_info.url
-    
+
              license_description = parsed_dc.license_description
              license = parsed_dc.license
              if license:
-                license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (license_description, license)        
+                license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % \
+                                      (license_description, license)
              else:
-                license_description = u"Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)"
-    
+                license_description = (
+                    u"Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, "
+                    u"co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. "
+                    u"Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), "
+                    u"które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji "
+                    u"Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL "
+                    u"(http://creativecommons.org/licenses/by-sa/3.0/)")
+
              source = parsed_dc.source_name
              if source:
                  source = "\n\nTekst opracowany na podstawie: " + source
              else:
                  source = ''
-    
-            contributors = ', '.join(person.readable() for person in 
+
+            contributors = ', '.join(person.readable() for person in
                                       sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p)))
              if contributors:
                  contributors = "\n\nOpracowanie redakcyjne i przypisy: %s" % contributors
@@ -88,4 +96,3 @@ def transform(wldoc, flags=None, **options):
          }).encode('utf-8'))
      else:
          return IOFile.from_string(unicode(result).encode('utf-8'))
-
diff --git a/librarian/xmlutils.py b/librarian/xmlutils.py

index bbcc884..ae3512a 100644 (file)
--- a/librarian/xmlutils.py
+++ b/librarian/xmlutils.py
@@ -41,7 +41,6 @@ class Xmill(object):
              output = flt(output)
          return output
  
-
      def generate(self, document):
          """Generate text from node using handlers defined in class."""
          output = self._handle_element(document)
@@ -61,18 +60,17 @@ class Xmill(object):
          """
          self._options.append(opts)
  
-
      def _handle_for_element(self, element):
          ns = None
          tagname = None
-#        from nose.tools import set_trace
+        # from nose.tools import set_trace
  
          if element.tag[0] == '{':
              for nshort, nhref in element.nsmap.items():
                  try:
                      if element.tag.index('{%s}' % nhref) == 0:
                          ns = nshort
-                        tagname  = element.tag[len('{%s}' % nhref):]
+                        tagname = element.tag[len('{%s}' % nhref):]
                          break
                  except ValueError:
                      pass
@@ -96,19 +94,22 @@ class Xmill(object):
  
          while True:
              sibling = element.getnext()
-            if sibling is not None: return sibling  # found a new branch to dig into
+            if sibling is not None:
+                return sibling  # found a new branch to dig into
              element = element.getparent()
-            if element is None: return None  # end of tree
+            if element is None:
+                return None  # end of tree
  
      def _handle_element(self, element):
-        if isinstance(element, etree._Comment): return None
-        
+        if isinstance(element, etree._Comment):
+            return None
+
          handler = self._handle_for_element(element)
-        if self.state.get('mute') and not getattr(handler, 'unmuter', False): return None
+        if self.state.get('mute') and not getattr(handler, 'unmuter', False):
+            return None
          # How many scopes
+        options_scopes = len(self._options)
          try:
-            options_scopes = len(self._options)
-
              if handler is None:
                  pre = [self.filter_text(element.text)]
                  post = [self.filter_text(element.tail)]
@@ -129,19 +130,20 @@ class Xmill(object):
          finally:
              # clean up option scopes if necessary
              self._options = self._options[0:options_scopes]
-            
+
          return out
  
  
  def tag_open_close(name_, classes_=None, **attrs):
      u"""Creates tag beginning and end.
-    
+
      >>> tag_open_close("a", "klass", x=u"ą<")
      (u'<a x="\\u0105&lt;" class="klass">', u'</a>')
  
      """
      if classes_:
-        if isinstance(classes_, (tuple, list)): classes_ = ' '.join(classes_)
+        if isinstance(classes_, (tuple, list)):
+            classes_ = ' '.join(classes_)
          attrs['class'] = classes_
  
      e = etree.Element(name_)
@@ -151,6 +153,7 @@ def tag_open_close(name_, classes_=None, **attrs):
      pre, post = etree.tostring(e, encoding=unicode).split(u"> <")
      return pre + u">", u"<" + post
  
+
  def tag(name_, classes_=None, **attrs):
      """Returns a handler which wraps node contents in tag `name', with class attribute
      set to `classes' and other attributes according to keyword paramters
@@ -165,13 +168,16 @@ def tagged(name, classes=None, **attrs):
      set to `classes' and other attributes according to keyword paramters
      """
      if classes:
-        if isinstance(classes, (tuple,list)): classes = ' '.join(classes)
+        if isinstance(classes, (tuple, list)):
+            classes = ' '.join(classes)
          attrs['class'] = classes
-    a = ''.join([' %s="%s"' % (k,v) for (k,v) in attrs.items()])
+    a = ''.join([' %s="%s"' % (k, v) for (k, v) in attrs.items()])
+
      def _decor(f):
          def _wrap(self, element):
              r = f(self, element)
-            if r is None: return
+            if r is None:
+                return
  
              prepend = "<%s%s>" % (name, a)
              append = "</%s>" % name
@@ -196,6 +202,7 @@ def ifoption(**options):
          return _handler
      return _decor
  
+
  def flatten(l, ltypes=(list, tuple)):
      """flatten function from BasicPropery/BasicTypes package
      """
diff --git a/setup.py b/setup.py

index 0a3682f..b661226 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -8,9 +8,10 @@ import os
  import os.path
  from distutils.core import setup
  
+
  def whole_tree(prefix, path):
      files = []
-    for f in (f for f in os.listdir(os.path.join(prefix, path)) if not f[0]=='.'):
+    for f in (f for f in os.listdir(os.path.join(prefix, path)) if not f[0] == '.'):
          new_path = os.path.join(path, f)
          if os.path.isdir(os.path.join(prefix, new_path)):
              files.extend(whole_tree(prefix, new_path))
@@ -34,9 +35,12 @@ setup(
          'librarian.styles.wolnelektury',
          'librarian.styles.wolnelektury.partners',
      ],
-    package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
-                                whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer') +
-                                whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res')},
+    package_data={
+        'librarian': (
+            ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
+            whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer') +
+            whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res'))
+    },
      include_package_data=True,
      install_requires=['lxml>=2.2'],
      scripts=['scripts/book2html',
diff --git a/tests/test_dcparser.py b/tests/test_dcparser.py

index ee29bc9..1e15a28 100644 (file)
--- a/tests/test_dcparser.py
+++ b/tests/test_dcparser.py
@@ -45,4 +45,3 @@ def check_serialize(xml_file):
  def test_serialize():
      for fixture in get_all_fixtures('dcparser', '*.xml'):
          yield check_serialize, fixture
-
diff --git a/tests/test_epub.py b/tests/test_epub.py

index faa76e7..720fec6 100644 (file)
--- a/tests/test_epub.py
+++ b/tests/test_epub.py
@@ -25,7 +25,8 @@ def test_transform():
      for par in tree.findall("//p"):
          if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'):
              editors_attribution = True
-            assert_equal(par.text.rstrip(),
+            assert_equal(
+                par.text.rstrip(),
                  u'Opracowanie redakcyjne i przypisy: '
                  u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
      assert_true(editors_attribution)
diff --git a/tests/test_html.py b/tests/test_html.py

index 21adfb1..9d9b1de 100644 (file)
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -33,6 +33,7 @@ def test_passing_parse_dublincore_to_transform():
              parse_dublincore=False,
          ).as_html()
  
+
  def test_empty():
      assert not WLDocument.from_string(
              '<utwor />',
diff --git a/tests/test_iofile.py b/tests/test_iofile.py

index 097a65a..a422887 100644 (file)
--- a/tests/test_iofile.py
+++ b/tests/test_iofile.py
@@ -1,14 +1,20 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
  import os
-from StringIO import StringIO
  from tempfile import NamedTemporaryFile
  from nose.tools import *
  from librarian import IOFile
  
+
  def test_iofile_from_string_reusable():
      some_file = IOFile.from_string("test")
      some_file.get_file().read()
      assert_equal(some_file.get_file().read(), "test")
  
+
  def test_iofile_from_filename_reusable():
      temp = NamedTemporaryFile(delete=False)
      try:
diff --git a/tests/test_pdf.py b/tests/test_pdf.py

index 75b73bc..f604f58 100644 (file)
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -3,7 +3,6 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-import re
  from tempfile import NamedTemporaryFile
  from nose.tools import *
  from librarian import DirDocProvider
@@ -22,7 +21,6 @@ def test_transform():
      print tex
  
      # Check contributor list.
-    editors = re.search(ur'\\def\\editors\{'
-        ur'Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
-    assert_equal(editors.group(1),
-        u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
+    editors = re.search(
+        ur'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
+    assert_equal(editors.group(1), u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
diff --git a/tests/test_picture.py b/tests/test_picture.py

index f64f624..40ca21c 100644 (file)
--- a/tests/test_picture.py
+++ b/tests/test_picture.py
@@ -4,21 +4,20 @@
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  from librarian import picture, dcparser
-from lxml import etree
  from nose.tools import *
-from os.path import splitext
  from tests.utils import get_all_fixtures, get_fixture
-import codecs
  from os import path
  
+
  def test_wlpictureuri():
      uri = picture.WLPictureURI('http://wolnelektury.pl/katalog/obraz/angelus-novus')
  
+
  def check_load(xml_file):
      pi = dcparser.parse(xml_file, picture.PictureInfo)
      assert pi is not None
      assert isinstance(pi, picture.PictureInfo)
-    
+
  
  def test_load():
      for fixture in get_all_fixtures('picture', '*.xml'):
@@ -35,10 +34,11 @@ def test_wlpicture():
      assert wlp.slug == 'angelus-novus'
  
      assert path.exists(wlp.image_path)
-    
+
      f = wlp.image_file('r')
      f.close()
  
+
  def test_picture_parts():
      wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml')))
      parts = list(wlp.partiter())
@@ -54,7 +54,5 @@ def test_picture_parts():
          if p['object']:
              names.add(p['object'])
  
-    assert motifs == set([u'anioł historii', u'spojrzenie']), "missing motifs, got: %s" % motifs
-    assert names == set([u'obraz cały', u'skrzydło']), 'missing objects, got: %s' % names
-    
-        
+    assert motifs == {u'anioł historii', u'spojrzenie'}, "missing motifs, got: %s" % motifs
+    assert names == {u'obraz cały', u'skrzydło'}, 'missing objects, got: %s' % names
diff --git a/tests/test_pyhtml.py b/tests/test_pyhtml.py

index 319baa7..0de2624 100644 (file)
--- a/tests/test_pyhtml.py
+++ b/tests/test_pyhtml.py
@@ -1,10 +1,11 @@
-
+# -*- coding: utf-8 -*-
  from librarian import xmlutils
  from lxml import etree
  from librarian.pyhtml import EduModule
  from nose.tools import *
  from tests.utils import get_fixture
  
+
  def test_traversal():
      xml = etree.fromstring("<a><b>BBBB</b><c>CCCC</c></a>")
      hg = xmlutils.Xmill()
@@ -13,7 +14,6 @@ def test_traversal():
      assert_equals(hg.next(xml[1]), None)
  
  
-
  class Foo(xmlutils.Xmill):
      def __init__(self):
          super(Foo, self).__init__()
@@ -27,24 +27,23 @@ class Foo(xmlutils.Xmill):
  
      def handle_song(self, ele):
          if ele.getnext() is not None:
-            return "\n","--------------------\n"
-
+            return "\n", "--------------------\n"
  
  
  def test_xml_generation():
      xml = u"""<root>
-<songs>
-<song>
-<title>Oursoul</title>
-<artist>Hindi Zahra</artist>
-</song>
-<song>
-<title>Visitor</title>
-<artist>Portico Quartet</artist>
-</song>
-</songs>
-</root>
-"""
+        <songs>
+        <song>
+        <title>Oursoul</title>
+        <artist>Hindi Zahra</artist>
+        </song>
+        <song>
+        <title>Visitor</title>
+        <artist>Portico Quartet</artist>
+        </song>
+        </songs>
+        </root>
+    """
      txt = Foo().generate(etree.fromstring(xml))
      print txt
  
diff --git a/tests/utils.py b/tests/utils.py

index 3b1f4f5..fc87532 100644 (file)
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -6,7 +6,7 @@
  from __future__ import with_statement
  from os.path import realpath, join, dirname
  import glob
-import os
+
  
  def get_fixture_dir(dir_name):
      """Returns path to fixtures directory dir_name."""
author	Jan Szejko <jan.szejko@gmail.com>
	Fri, 1 Jul 2016 12:26:00 +0000 (14:26 +0200)
committer	Jan Szejko <jan.szejko@gmail.com>
	Fri, 1 Jul 2016 12:26:00 +0000 (14:26 +0200)
librarian/__init__.py		patch \| blob \| history
librarian/book2anything.py		patch \| blob \| history
librarian/cover.py		patch \| blob \| history
librarian/dcparser.py		patch \| blob \| history
librarian/epub.py		patch \| blob \| history
librarian/fb2.py		patch \| blob \| history
librarian/functions.py		patch \| blob \| history
librarian/html.py		patch \| blob \| history
librarian/mobi.py		patch \| blob \| history
librarian/packagers.py		patch \| blob \| history
librarian/parser.py		patch \| blob \| history
librarian/pdf.py		patch \| blob \| history
librarian/picture.py		patch \| blob \| history
librarian/pyhtml.py		patch \| blob \| history
librarian/pypdf.py		patch \| blob \| history
librarian/styles/wolnelektury/cover.py		patch \| blob \| history
librarian/styles/wolnelektury/pdf.py	[deleted file]	patch \| blob \| history
librarian/text.py		patch \| blob \| history
librarian/xmlutils.py		patch \| blob \| history
setup.py		patch \| blob \| history
tests/test_dcparser.py		patch \| blob \| history
tests/test_epub.py		patch \| blob \| history
tests/test_html.py		patch \| blob \| history
tests/test_iofile.py		patch \| blob \| history
tests/test_pdf.py		patch \| blob \| history
tests/test_picture.py		patch \| blob \| history
tests/test_pyhtml.py		patch \| blob \| history
tests/utils.py		patch \| blob \| history