Python 3.4-3.7 support;

author Radek Czajka <rczajka@rczajka.pl>

Wed, 27 Feb 2019 09:13:41 +0000 (10:13 +0100)

committer Radek Czajka <rczajka@rczajka.pl>

Wed, 27 Feb 2019 19:35:56 +0000 (20:35 +0100)
author Radek Czajka <rczajka@rczajka.pl>
Wed, 27 Feb 2019 09:13:41 +0000 (10:13 +0100)
committer Radek Czajka <rczajka@rczajka.pl>
Wed, 27 Feb 2019 19:35:56 +0000 (20:35 +0100)
diff --git a/.gitignore b/.gitignore

index b6c0f8a..0660acf 100755 (executable)
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ build
  .project
  .pydevproject
  .settings
  .project
  .pydevproject
  .settings
+/.tox
+/nosetests.xml
+/htmlcov
diff --git a/AUTHORS.md b/AUTHORS.md

index 70fe140..2eab59f 100644 (file)
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -1,9 +1,17 @@
  Authors
  -------
  
  Authors
  -------
  
-Originally written by Marek Stępniowski <marek@stepniowski>
-       
-Later contributions:
+List of people who have contributed to the project, in chronological order:
+
+* Marek Stępniowski
+* Łukasz Rekucki
+* Radek Czajka
+* Łukasz Anwajler
+* Adam Twardoch
+* Marcin Koziej
+* Michał Górny
+* Aleksander Łukasz
+* Robert Błaut
+* Jan Szejko
+
  
  
-   * Łukasz Rekucki <lrekucki@gmail.com>
-   * Radek Czajka <radek.czajka@gmail.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md

new file mode 100644 (file)

index 0000000..dbc3209
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,20 @@
+# Change Log
+
+This document records all notable changes to Librarian.
+
+## 1.7 (2019-02-27)
+
+### Added
+- Python 3.4+ support, to existing Python 2.7 support.
+- `coverter_path` argument in `mobi.transform`.
+- Proper packaging info.
+- This changelog.
+- Tox configuration for tests.
+
+### Changed
+- `from_bytes` methods replaced all `from_string` methods,
+   i.e. on: OutputFile, WorkInfo, BookInfo, WLDocument, WLPicture.
+- `get_bytes` replaced `get_string` on OutputFile.
+
+### Removed
+- Shims for Python < 2.7.
diff --git a/MANIFEST.in b/MANIFEST.in

new file mode 100644 (file)

index 0000000..af6efac
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,15 @@
+include *.md
+include LICENSE
+include NOTICE
+include tox.ini
+recursive-include scripts *.py *.css
+recursive-include tests *.py *.xml *.html *.out *.txt *.jpeg
+include librarian/xslt/*.xslt
+include librarian/xslt/*.xml
+include librarian/epub/*
+include librarian/pdf/*
+include librarian/fb2/*
+include librarian/fonts/*
+graft librarian/res
+graft librarian/font-optimizer
+
diff --git a/README.md b/README.md

index c0e13e9..dea2381 100644 (file)
--- a/README.md
+++ b/README.md
@@ -3,9 +3,9 @@ License
  
    ![AGPL Logo](http://www.gnu.org/graphics/agplv3-155x51.png)
  
  
    ![AGPL Logo](http://www.gnu.org/graphics/agplv3-155x51.png)
  
-    Copyright © 2008,2009,2010 Fundacja Nowoczesna Polska <fundacja@nowoczesnapolska.org.pl>
+    Copyright © 2008-2019 Fundacja Nowoczesna Polska <fundacja@nowoczesnapolska.org.pl>
  
  
-    For full list of contributors see AUTHORS section at the end.
+    For full list of contributors see AUTHORS file.
  
      This program is free software: you can redistribute it and/or modify
      it under the terms of the GNU Affero General Public License as published by
  
      This program is free software: you can redistribute it and/or modify
      it under the terms of the GNU Affero General Public License as published by
@@ -29,10 +29,12 @@ other formats, which are more suitable for presentation.
  
  Currently we support:
  
  
  Currently we support:
  
- * HTML4, XHTML 1.0
+ * HTML4, XHTML 1.0 (?)
   * Plain text
   * EPUB (XHTML based)
   * Plain text
   * EPUB (XHTML based)
+ * MOBI
   * print-ready PDF
   * print-ready PDF
+ * FB2
  
  Other features:
  
  
  Other features:
  
@@ -84,13 +86,3 @@ To convert a file to PDF:
  To extract book fragments marked as "theme":
  
      bookfragments file1.xml [file2.xml ...]
  To extract book fragments marked as "theme":
  
      bookfragments file1.xml [file2.xml ...]
-
-
-Authors
--------
-Originally written by Marek Stępniowski <marek@stepniowski.com>
-       
-Later contributions:
-
- * Łukasz Rekucki <lrekucki@gmail.com>
- * Radek Czajka <radek.czajka@gmail.com>
-\ No newline at end of file
diff --git a/librarian/__init__.py b/librarian/__init__.py

index 9a9e23e..119b6b1 100644 (file)
--- a/librarian/__init__.py
+++ b/librarian/__init__.py
@@ -3,28 +3,28 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from __future__ import with_statement
+from __future__ import print_function, unicode_literals
  
  import os
  import re
  import shutil
  
  import os
  import re
  import shutil
+from tempfile import NamedTemporaryFile
  import urllib
  import urllib
-
-from util import makedirs
+from lxml import etree
+import six
+from six.moves.urllib.request import FancyURLopener
+from .util import makedirs
  
  
  
  
+@six.python_2_unicode_compatible
  class UnicodeException(Exception):
      def __str__(self):
  class UnicodeException(Exception):
      def __str__(self):
-        """ Dirty workaround for Python Unicode handling problems. """
-        return unicode(self).encode('utf-8')
-
-    def __unicode__(self):
          """ Dirty workaround for Python Unicode handling problems. """
          args = self.args[0] if len(self.args) == 1 else self.args
          try:
          """ Dirty workaround for Python Unicode handling problems. """
          args = self.args[0] if len(self.args) == 1 else self.args
          try:
-            message = unicode(args)
+            message = six.text_type(args)
          except UnicodeDecodeError:
          except UnicodeDecodeError:
-            message = unicode(args, encoding='utf-8', errors='ignore')
+            message = six.text_type(args, encoding='utf-8', errors='ignore')
          return message
  
  class ParseError(UnicodeException):
          return message
  
  class ParseError(UnicodeException):
@@ -79,6 +79,7 @@ PLMETNS = XMLNamespace("http://dl.psnc.pl/schemas/plmet/")
  WLNS = EmptyNamespace()
  
  
  WLNS = EmptyNamespace()
  
  
+@six.python_2_unicode_compatible
  class WLURI(object):
      """Represents a WL URI. Extracts slug from it."""
      slug = None
  class WLURI(object):
      """Represents a WL URI. Extracts slug from it."""
      slug = None
@@ -88,7 +89,7 @@ class WLURI(object):
              '(?P<slug>[-a-z0-9]+)/?$')
  
      def __init__(self, uri):
              '(?P<slug>[-a-z0-9]+)/?$')
  
      def __init__(self, uri):
-        uri = unicode(uri)
+        uri = six.text_type(uri)
          self.uri = uri
          self.slug = uri.rstrip('/').rsplit('/', 1)[-1]
  
          self.uri = uri
          self.slug = uri.rstrip('/').rsplit('/', 1)[-1]
  
@@ -104,16 +105,13 @@ class WLURI(object):
      def from_slug(cls, slug):
          """Contructs an URI from slug.
  
      def from_slug(cls, slug):
          """Contructs an URI from slug.
  
-        >>> WLURI.from_slug('a-slug').uri
-        u'http://wolnelektury.pl/katalog/lektura/a-slug/'
+        >>> print(WLURI.from_slug('a-slug').uri)
+        http://wolnelektury.pl/katalog/lektura/a-slug/
  
          """
          uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
          return cls(uri)
  
  
          """
          uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
          return cls(uri)
  
-    def __unicode__(self):
-        return self.uri
-
      def __str__(self):
          return self.uri
  
      def __str__(self):
          return self.uri
  
@@ -146,11 +144,10 @@ class DirDocProvider(DocProvider):
  
      def by_slug(self, slug):
          fname = slug + '.xml'
  
      def by_slug(self, slug):
          fname = slug + '.xml'
-        return open(os.path.join(self.dir, fname))
+        return open(os.path.join(self.dir, fname), 'rb')
  
  
  
  
-import lxml.etree as etree
-import dcparser
+from . import dcparser
  
  DEFAULT_BOOKINFO = dcparser.BookInfo(
          { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
  
  DEFAULT_BOOKINFO = dcparser.BookInfo(
          { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
@@ -175,14 +172,14 @@ DEFAULT_BOOKINFO = dcparser.BookInfo(
  def xinclude_forURI(uri):
      e = etree.Element(XINS("include"))
      e.set("href", uri)
  def xinclude_forURI(uri):
      e = etree.Element(XINS("include"))
      e.set("href", uri)
-    return etree.tostring(e, encoding=unicode)
+    return etree.tostring(e, encoding='unicode')
  
  def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
      """Wrap the text within the minimal XML structure with a DC template."""
      bookinfo.created_at = creation_date
  
      dcstring = etree.tostring(bookinfo.to_etree(), \
  
  def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
      """Wrap the text within the minimal XML structure with a DC template."""
      bookinfo.created_at = creation_date
  
      dcstring = etree.tostring(bookinfo.to_etree(), \
-        method='xml', encoding=unicode, pretty_print=True)
+        method='xml', encoding='unicode', pretty_print=True)
  
      return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
          u'\n</plain-text>\n</utwor>'
  
      return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
          u'\n</plain-text>\n</utwor>'
@@ -192,7 +189,7 @@ def serialize_raw(element):
      b = u'' + (element.text or '')
  
      for child in element.iterchildren():
      b = u'' + (element.text or '')
  
      for child in element.iterchildren():
-        e = etree.tostring(child, method='xml', encoding=unicode,
+        e = etree.tostring(child, method='xml', encoding='unicode',
                  pretty_print=True)
          b += e
  
                  pretty_print=True)
          b += e
  
@@ -212,7 +209,7 @@ def get_resource(path):
  class OutputFile(object):
      """Represents a file returned by one of the converters."""
  
  class OutputFile(object):
      """Represents a file returned by one of the converters."""
  
-    _string = None
+    _bytes = None
      _filename = None
  
      def __del__(self):
      _filename = None
  
      def __del__(self):
@@ -220,14 +217,14 @@ class OutputFile(object):
              os.unlink(self._filename)
  
      def __nonzero__(self):
              os.unlink(self._filename)
  
      def __nonzero__(self):
-        return self._string is not None or self._filename is not None
+        return self._bytes is not None or self._filename is not None
  
      @classmethod
  
      @classmethod
-    def from_string(cls, string):
+    def from_bytes(cls, bytestring):
          """Converter returns contents of a file as a string."""
  
          instance = cls()
          """Converter returns contents of a file as a string."""
  
          instance = cls()
-        instance._string = string
+        instance._bytes = bytestring
          return instance
  
      @classmethod
          return instance
  
      @classmethod
@@ -238,33 +235,31 @@ class OutputFile(object):
          instance._filename = filename
          return instance
  
          instance._filename = filename
          return instance
  
-    def get_string(self):
-        """Get file's contents as a string."""
+    def get_bytes(self):
+        """Get file's contents as a bytestring."""
  
          if self._filename is not None:
  
          if self._filename is not None:
-            with open(self._filename) as f:
+            with open(self._filename, 'rb') as f:
                  return f.read()
          else:
                  return f.read()
          else:
-            return self._string
+            return self._bytes
  
      def get_file(self):
          """Get file as a file-like object."""
  
  
      def get_file(self):
          """Get file as a file-like object."""
  
-        if self._string is not None:
-            from StringIO import StringIO
-            return StringIO(self._string)
+        if self._bytes is not None:
+            return six.BytesIO(self._bytes)
          elif self._filename is not None:
          elif self._filename is not None:
-            return open(self._filename)
+            return open(self._filename, 'rb')
  
      def get_filename(self):
          """Get file as a fs path."""
  
          if self._filename is not None:
              return self._filename
  
      def get_filename(self):
          """Get file as a fs path."""
  
          if self._filename is not None:
              return self._filename
-        elif self._string is not None:
-            from tempfile import NamedTemporaryFile
+        elif self._bytes is not None:
              temp = NamedTemporaryFile(prefix='librarian-', delete=False)
              temp = NamedTemporaryFile(prefix='librarian-', delete=False)
-            temp.write(self._string)
+            temp.write(self._bytes)
              temp.close()
              self._filename = temp.name
              return self._filename
              temp.close()
              self._filename = temp.name
              return self._filename
@@ -279,6 +274,6 @@ class OutputFile(object):
          shutil.copy(self.get_filename(), path)
  
  
          shutil.copy(self.get_filename(), path)
  
  
-class URLOpener(urllib.FancyURLopener):
+class URLOpener(FancyURLopener):
      version = 'FNP Librarian (http://github.com/fnp/librarian)'
  urllib._urlopener = URLOpener()
      version = 'FNP Librarian (http://github.com/fnp/librarian)'
  urllib._urlopener = URLOpener()
diff --git a/librarian/book2anything.py b/librarian/book2anything.py

index 0da3b61..948d9fd 100755 (executable)
--- a/librarian/book2anything.py
+++ b/librarian/book2anything.py
@@ -4,9 +4,11 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os.path
  import optparse
  import os.path
  import optparse
-
+import six
  from librarian import DirDocProvider, ParseError
  from librarian.parser import WLDocument
  from librarian.cover import make_cover
  from librarian import DirDocProvider, ParseError
  from librarian.parser import WLDocument
  from librarian.cover import make_cover
@@ -102,7 +104,10 @@ class Book2Anything(object):
          try:
              for main_input in input_filenames:
                  if options.verbose:
          try:
              for main_input in input_filenames:
                  if options.verbose:
-                    print main_input
+                    print(main_input)
+
+            if isinstance(main_input, six.binary_type):
+                main_input = main_input.decode('utf-8')
  
              # Where to find input?
              if cls.uses_provider:
  
              # Where to find input?
              if cls.uses_provider:
@@ -126,9 +131,9 @@ class Book2Anything(object):
  
              doc.save_output_file(output, output_file, options.output_dir, options.make_dir, cls.ext)
  
  
              doc.save_output_file(output, output_file, options.output_dir, options.make_dir, cls.ext)
  
-        except ParseError, e:
-            print '%(file)s:%(name)s:%(message)s' % {
+        except ParseError as e:
+            print('%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
                  'name': e.__class__.__name__,
                  'message': e
                  'file': main_input,
                  'name': e.__class__.__name__,
                  'message': e
-            }
+            })
diff --git a/librarian/cover.py b/librarian/cover.py

index 29e24c8..09c8071 100644 (file)
--- a/librarian/cover.py
+++ b/librarian/cover.py
@@ -3,9 +3,11 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  import re
  from PIL import Image, ImageFont, ImageDraw, ImageFilter
  import re
  from PIL import Image, ImageFont, ImageDraw, ImageFilter
-from StringIO import StringIO
+from six import BytesIO
  from librarian import get_resource, OutputFile, URLOpener
  
  
  from librarian import get_resource, OutputFile, URLOpener
  
  
@@ -69,7 +71,7 @@ class TextBox(object):
                  line_width = self.draw.textsize(line, font=font)[0]
              line = line.strip() + ' '
  
                  line_width = self.draw.textsize(line, font=font)[0]
              line = line.strip() + ' '
  
-            pos_x = (self.max_width - line_width) / 2
+            pos_x = (self.max_width - line_width) // 2
  
              if shadow_color:
                  self.shadow_draw.text(
  
              if shadow_color:
                  self.shadow_draw.text(
@@ -144,7 +146,7 @@ class Cover(object):
          if format is not None:
              self.format = format
          if width and height:
          if format is not None:
              self.format = format
          if width and height:
-            self.height = height * self.width / width
+            self.height = int(round(height * self.width / width))
          scale = max(float(width or 0) / self.width, float(height or 0) / self.height)
          if scale >= 1:
              self.scale = scale
          scale = max(float(width or 0) / self.width, float(height or 0) / self.height)
          if scale >= 1:
              self.scale = scale
@@ -171,8 +173,8 @@ class Cover(object):
          # WL logo
          if metr.logo_width:
              logo = Image.open(get_resource('res/wl-logo.png'))
          # WL logo
          if metr.logo_width:
              logo = Image.open(get_resource('res/wl-logo.png'))
-            logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]))
-            img.paste(logo, ((metr.width - metr.logo_width) / 2, img.size[1] - logo.size[1] - metr.logo_bottom))
+            logo = logo.resize((metr.logo_width, int(round(logo.size[1] * metr.logo_width / logo.size[0]))))
+            img.paste(logo, ((metr.width - metr.logo_width) // 2, img.size[1] - logo.size[1] - metr.logo_bottom))
  
          top = metr.author_top
          tbox = TextBox(
  
          top = metr.author_top
          tbox = TextBox(
@@ -223,9 +225,9 @@ class Cover(object):
          return self.final_image().save(*args, **default_kwargs)
  
      def output_file(self, *args, **kwargs):
          return self.final_image().save(*args, **default_kwargs)
  
      def output_file(self, *args, **kwargs):
-        imgstr = StringIO()
+        imgstr = BytesIO()
          self.save(imgstr, *args, **kwargs)
          self.save(imgstr, *args, **kwargs)
-        return OutputFile.from_string(imgstr.getvalue())
+        return OutputFile.from_bytes(imgstr.getvalue())
  
  
  class WLCover(Cover):
  
  
  class WLCover(Cover):
@@ -347,9 +349,9 @@ class WLCover(Cover):
          elif self.box_position == 'bottom':
              box_top = metr.height - metr.box_bottom_margin - box_img.size[1]
          else:   # Middle.
          elif self.box_position == 'bottom':
              box_top = metr.height - metr.box_bottom_margin - box_img.size[1]
          else:   # Middle.
-            box_top = (metr.height - box_img.size[1]) / 2
+            box_top = (metr.height - box_img.size[1]) // 2
  
  
-        box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) / 2
+        box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) // 2
  
          # Draw the white box.
          ImageDraw.Draw(img).rectangle(
  
          # Draw the white box.
          ImageDraw.Draw(img).rectangle(
@@ -389,17 +391,17 @@ class WLCover(Cover):
              if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]:
                  resized = (
                      trg_size[0],
              if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]:
                  resized = (
                      trg_size[0],
-                    src.size[1] * trg_size[0] / src.size[0]
+                    int(round(src.size[1] * trg_size[0] / src.size[0]))
                  )
                  )
-                cut = (resized[1] - trg_size[1]) / 2
+                cut = (resized[1] - trg_size[1]) // 2
                  src = src.resize(resized, Image.ANTIALIAS)
                  src = src.crop((0, cut, src.size[0], src.size[1] - cut))
              else:
                  resized = (
                  src = src.resize(resized, Image.ANTIALIAS)
                  src = src.crop((0, cut, src.size[0], src.size[1] - cut))
              else:
                  resized = (
-                    src.size[0] * trg_size[1] / src.size[1],
+                    int(round(src.size[0] * trg_size[1] / src.size[1])),
                      trg_size[1],
                  )
                      trg_size[1],
                  )
-                cut = (resized[0] - trg_size[0]) / 2
+                cut = (resized[0] - trg_size[0]) // 2
                  src = src.resize(resized, Image.ANTIALIAS)
                  src = src.crop((cut, 0, src.size[0] - cut, src.size[1]))
  
                  src = src.resize(resized, Image.ANTIALIAS)
                  src = src.crop((cut, 0, src.size[0] - cut, src.size[1]))
  
@@ -448,11 +450,11 @@ class LogoWLCover(WLCover):
          img.paste(gradient, (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask)
  
          cursor = metr.width - metr.gradient_logo_margin_right
          img.paste(gradient, (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask)
  
          cursor = metr.width - metr.gradient_logo_margin_right
-        logo_top = metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 - metr.bleed / 2
+        logo_top = int(metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 - metr.bleed / 2)
          for logo_path in self.gradient_logos[::-1]:
              logo = Image.open(get_resource(logo_path))
              logo = logo.resize(
          for logo_path in self.gradient_logos[::-1]:
              logo = Image.open(get_resource(logo_path))
              logo = logo.resize(
-                (logo.size[0] * metr.gradient_logo_height / logo.size[1], metr.gradient_logo_height),
+                (int(round(logo.size[0] * metr.gradient_logo_height / logo.size[1])), metr.gradient_logo_height),
                  Image.ANTIALIAS)
              cursor -= logo.size[0]
              img.paste(logo, (cursor, logo_top), mask=logo)
                  Image.ANTIALIAS)
              cursor -= logo.size[0]
              img.paste(logo, (cursor, logo_top), mask=logo)
diff --git a/librarian/dcparser.py b/librarian/dcparser.py

index f8dfaf9..eeb750a 100644 (file)
--- a/librarian/dcparser.py
+++ b/librarian/dcparser.py
@@ -3,10 +3,14 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from xml.parsers.expat import ExpatError
  from datetime import date
  from xml.parsers.expat import ExpatError
  from datetime import date
+from functools import total_ordering
  import time
  import re
  import time
  import re
+import six
  from librarian.util import roman_to_int
  
  from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
  from librarian.util import roman_to_int
  
  from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
@@ -16,7 +20,7 @@ import lxml.etree as etree  # ElementTree API using libxml2
  from lxml.etree import XMLSyntaxError
  
  
  from lxml.etree import XMLSyntaxError
  
  
-class TextPlus(unicode):
+class TextPlus(six.text_type):
      pass
  
  
      pass
  
  
@@ -27,6 +31,8 @@ class DatePlus(date):
  # ==============
  # = Converters =
  # ==============
  # ==============
  # = Converters =
  # ==============
+@six.python_2_unicode_compatible
+@total_ordering
  class Person(object):
      """Single person with last name and a list of first names."""
      def __init__(self, last_name, *first_names):
  class Person(object):
      """Single person with last name and a list of first names."""
      def __init__(self, last_name, *first_names):
@@ -55,13 +61,13 @@ class Person(object):
      def __eq__(self, right):
          return self.last_name == right.last_name and self.first_names == right.first_names
  
      def __eq__(self, right):
          return self.last_name == right.last_name and self.first_names == right.first_names
  
-    def __cmp__(self, other):
-        return cmp((self.last_name, self.first_names), (other.last_name, other.first_names))
+    def __lt__(self, other):
+        return (self.last_name, self.first_names) < (other.last_name, other.first_names)
  
      def __hash__(self):
          return hash((self.last_name, self.first_names))
  
  
      def __hash__(self):
          return hash((self.last_name, self.first_names))
  
-    def __unicode__(self):
+    def __str__(self):
          if len(self.first_names) > 0:
              return '%s, %s' % (self.last_name, ' '.join(self.first_names))
          else:
          if len(self.first_names) > 0:
              return '%s, %s' % (self.last_name, ' '.join(self.first_names))
          else:
@@ -83,7 +89,7 @@ for now we will translate this to some single date losing information of course.
      """
      try:
          # check out the "N. poł X w." syntax
      """
      try:
          # check out the "N. poł X w." syntax
-        if isinstance(text, str):
+        if isinstance(text, six.binary_type):
              text = text.decode("utf-8")
  
          century_format = u"(?:([12]) *poł[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?"
              text = text.decode("utf-8")
  
          century_format = u"(?:([12]) *poł[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?"
@@ -94,7 +100,7 @@ for now we will translate this to some single date losing information of course.
          if m:
              half = m.group(1)
              decade = m.group(3)
          if m:
              half = m.group(1)
              decade = m.group(3)
-            century = roman_to_int(str(m.group(2)))
+            century = roman_to_int(m.group(2))
              if half is not None:
                  if decade is not None:
                      raise ValueError("Bad date format. Cannot specify both half and decade of century")
              if half is not None:
                  if decade is not None:
                      raise ValueError("Bad date format. Cannot specify both half and decade of century")
@@ -114,7 +120,7 @@ for now we will translate this to some single date losing information of course.
              raise ValueError
  
          return DatePlus(t[0], t[1], t[2])
              raise ValueError
  
          return DatePlus(t[0], t[1], t[2])
-    except ValueError, e:
+    except ValueError as e:
          raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
  
  
          raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
  
  
@@ -123,7 +129,7 @@ def as_person(text):
  
  
  def as_unicode(text):
  
  
  def as_unicode(text):
-    if isinstance(text, unicode):
+    if isinstance(text, six.text_type):
          return text
      else:
          return TextPlus(text.decode('utf-8'))
          return text
      else:
          return TextPlus(text.decode('utf-8'))
@@ -174,7 +180,7 @@ class Field(object):
                  if hasattr(val[0], 'lang'):
                      setattr(nv, 'lang', val[0].lang)
                  return nv
                  if hasattr(val[0], 'lang'):
                      setattr(nv, 'lang', val[0].lang)
                  return nv
-        except ValueError, e:
+        except ValueError as e:
              raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
  
      def validate(self, fdict, fallbacks=None, strict=False):
              raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
  
      def validate(self, fdict, fallbacks=None, strict=False):
@@ -221,9 +227,7 @@ class DCInfo(type):
          return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict)
  
  
          return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict)
  
  
-class WorkInfo(object):
-    __metaclass__ = DCInfo
-
+class WorkInfo(six.with_metaclass(DCInfo, object)):
      FIELDS = (
          Field(DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
          Field(DCNS('title'), 'title'),
      FIELDS = (
          Field(DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
          Field(DCNS('title'), 'title'),
@@ -255,9 +259,8 @@ class WorkInfo(object):
      )
  
      @classmethod
      )
  
      @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        from StringIO import StringIO
-        return cls.from_file(StringIO(xml), *args, **kwargs)
+    def from_bytes(cls, xml, *args, **kwargs):
+        return cls.from_file(six.BytesIO(xml), *args, **kwargs)
  
      @classmethod
      def from_file(cls, xmlfile, *args, **kwargs):
  
      @classmethod
      def from_file(cls, xmlfile, *args, **kwargs):
@@ -282,9 +285,9 @@ class WorkInfo(object):
  
              # extract data from the element and make the info
              return cls.from_element(desc_tag, *args, **kwargs)
  
              # extract data from the element and make the info
              return cls.from_element(desc_tag, *args, **kwargs)
-        except XMLSyntaxError, e:
+        except XMLSyntaxError as e:
              raise ParseError(e)
              raise ParseError(e)
-        except ExpatError, e:
+        except ExpatError as e:
              raise ParseError(e)
  
      @classmethod
              raise ParseError(e)
  
      @classmethod
@@ -306,7 +309,7 @@ class WorkInfo(object):
              fv = field_dict.get(e.tag, [])
              if e.text is not None:
                  text = e.text
              fv = field_dict.get(e.tag, [])
              if e.text is not None:
                  text = e.text
-                if not isinstance(text, unicode):
+                if not isinstance(text, six.text_type):
                      text = text.decode('utf-8')
                  val = TextPlus(text)
                  val.lang = e.attrib.get(XMLNS('lang'), lang)
                      text = text.decode('utf-8')
                  val = TextPlus(text)
                  val.lang = e.attrib.get(XMLNS('lang'), lang)
@@ -394,11 +397,11 @@ class WorkInfo(object):
                      for x in v:
                          e = etree.Element(field.uri)
                          if x is not None:
                      for x in v:
                          e = etree.Element(field.uri)
                          if x is not None:
-                            e.text = unicode(x)
+                            e.text = six.text_type(x)
                          description.append(e)
                  else:
                      e = etree.Element(field.uri)
                          description.append(e)
                  else:
                      e = etree.Element(field.uri)
-                    e.text = unicode(v)
+                    e.text = six.text_type(v)
                      description.append(e)
  
          return root
                      description.append(e)
  
          return root
@@ -413,9 +416,9 @@ class WorkInfo(object):
                  if field.multiple:
                      if len(v) == 0:
                          continue
                  if field.multiple:
                      if len(v) == 0:
                          continue
-                    v = [unicode(x) for x in v if x is not None]
+                    v = [six.text_type(x) for x in v if x is not None]
                  else:
                  else:
-                    v = unicode(v)
+                    v = six.text_type(v)
  
                  dc[field.name] = {'uri': field.uri, 'value': v}
          rdf['fields'] = dc
  
                  dc[field.name] = {'uri': field.uri, 'value': v}
          rdf['fields'] = dc
@@ -430,15 +433,15 @@ class WorkInfo(object):
                  if field.multiple:
                      if len(v) == 0:
                          continue
                  if field.multiple:
                      if len(v) == 0:
                          continue
-                    v = [unicode(x) for x in v if x is not None]
+                    v = [six.text_type(x) for x in v if x is not None]
                  else:
                  else:
-                    v = unicode(v)
+                    v = six.text_type(v)
                  result[field.name] = v
  
              if field.salias:
                  v = getattr(self, field.salias)
                  if v is not None:
                  result[field.name] = v
  
              if field.salias:
                  v = getattr(self, field.salias)
                  if v is not None:
-                    result[field.salias] = unicode(v)
+                    result[field.salias] = six.text_type(v)
  
          return result
  
  
          return result
  
diff --git a/librarian/embeds/__init__.py b/librarian/embeds/__init__.py

index 3b1abdb..fa74530 100644 (file)
--- a/librarian/embeds/__init__.py
+++ b/librarian/embeds/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import importlib
  from lxml import etree
  
  import importlib
  from lxml import etree
  
diff --git a/librarian/embeds/latex.py b/librarian/embeds/latex.py

index 0201d08..8425d03 100644 (file)
--- a/librarian/embeds/latex.py
+++ b/librarian/embeds/latex.py
@@ -1,4 +1,6 @@
  # -*- coding: utf-8 -*-
  # -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
  import os
  import shutil
  from subprocess import call, PIPE
  import os
  import shutil
  from subprocess import call, PIPE
@@ -10,14 +12,14 @@ from . import DataEmbed, create_embed, downgrades_to
  class LaTeX(DataEmbed):
      @downgrades_to('image/png')
      def to_png(self):
  class LaTeX(DataEmbed):
      @downgrades_to('image/png')
      def to_png(self):
-        tmpl = open(get_resource('res/embeds/latex/template.tex')).read().decode('utf-8')
+        tmpl = open(get_resource('res/embeds/latex/template.tex'), 'rb').read().decode('utf-8')
          tempdir = mkdtemp('-librarian-embed-latex')
          fpath = os.path.join(tempdir, 'doc.tex')
          tempdir = mkdtemp('-librarian-embed-latex')
          fpath = os.path.join(tempdir, 'doc.tex')
-        with open(fpath, 'w') as f:
+        with open(fpath, 'wb') as f:
              f.write((tmpl % {'code': self.data}).encode('utf-8'))
          call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
          call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim',
               os.path.join(tempdir, 'doc.png')])
              f.write((tmpl % {'code': self.data}).encode('utf-8'))
          call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
          call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim',
               os.path.join(tempdir, 'doc.png')])
-        pngdata = open(os.path.join(tempdir, 'doc.png')).read()
+        pngdata = open(os.path.join(tempdir, 'doc.png'), 'rb').read()
          shutil.rmtree(tempdir)
          return create_embed('image/png', data=pngdata)
          shutil.rmtree(tempdir)
          return create_embed('image/png', data=pngdata)
diff --git a/librarian/embeds/mathml.py b/librarian/embeds/mathml.py

index dd78f05..bd58baf 100644 (file)
--- a/librarian/embeds/mathml.py
+++ b/librarian/embeds/mathml.py
@@ -1,5 +1,8 @@
  # -*- coding: utf-8 -*-
  # -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
  from lxml import etree
  from lxml import etree
+import six
  from librarian import get_resource
  from . import TreeEmbed, create_embed, downgrades_to
  
  from librarian import get_resource
  from . import TreeEmbed, create_embed, downgrades_to
  
@@ -9,4 +12,4 @@ class MathML(TreeEmbed):
      def to_latex(self):
          xslt = etree.parse(get_resource('res/embeds/mathml/mathml2latex.xslt'))
          output = self.tree.xslt(xslt)
      def to_latex(self):
          xslt = etree.parse(get_resource('res/embeds/mathml/mathml2latex.xslt'))
          output = self.tree.xslt(xslt)
-        return create_embed('application/x-latex', data=unicode(output))
+        return create_embed('application/x-latex', data=six.text_type(output))
diff --git a/librarian/epub.py b/librarian/epub.py

index 333b56f..e9670d5 100644 (file)
--- a/librarian/epub.py
+++ b/librarian/epub.py
@@ -3,13 +3,13 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from __future__ import with_statement
+from __future__ import print_function, unicode_literals
  
  import os
  import os.path
  import re
  import subprocess
  
  import os
  import os.path
  import re
  import subprocess
-from StringIO import StringIO
+from six import BytesIO
  from copy import deepcopy
  from mimetypes import guess_type
  
  from copy import deepcopy
  from mimetypes import guess_type
  
@@ -30,7 +30,7 @@ functions.reg_lang_code_3to2()
  
  
  def squeeze_whitespace(s):
  
  
  def squeeze_whitespace(s):
-    return re.sub(r'\s+', ' ', s)
+    return re.sub(b'\\s+', b' ', s)
  
  
  def set_hyph_language(source_tree):
  
  
  def set_hyph_language(source_tree):
@@ -38,7 +38,7 @@ def set_hyph_language(source_tree):
          result = ''
          text = ''.join(text)
          with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
          result = ''
          text = ''.join(text)
          with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
-            for line in f:
+            for line in f.read().decode('latin1').split('\n'):
                  list = line.strip().split('|')
                  if list[0] == text:
                      result = list[2]
                  list = line.strip().split('|')
                  if list[0] == text:
                      result = list[2]
@@ -77,12 +77,12 @@ def hyphenate_and_fix_conjunctions(source_tree, hyph):
  def inner_xml(node):
      """ returns node's text and children as a string
  
  def inner_xml(node):
      """ returns node's text and children as a string
  
-    >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
+    >>> print(inner_xml(etree.fromstring('<a>x<b>y</b>z</a>')))
      x<b>y</b>z
      """
  
      nt = node.text if node.text is not None else ''
      x<b>y</b>z
      """
  
      nt = node.text if node.text is not None else ''
-    return ''.join([nt] + [etree.tostring(child) for child in node])
+    return ''.join([nt] + [etree.tostring(child, encoding='unicode') for child in node])
  
  
  def set_inner_xml(node, text):
  
  
  def set_inner_xml(node, text):
@@ -90,7 +90,7 @@ def set_inner_xml(node, text):
  
      >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
      >>> set_inner_xml(e, 'x<b>y</b>z')
  
      >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
      >>> set_inner_xml(e, 'x<b>y</b>z')
-    >>> print etree.tostring(e)
+    >>> print(etree.tostring(e, encoding='unicode'))
      <a>x<b>y</b>z</a>
      """
  
      <a>x<b>y</b>z</a>
      """
  
@@ -102,7 +102,7 @@ def set_inner_xml(node, text):
  def node_name(node):
      """ Find out a node's name
  
  def node_name(node):
      """ Find out a node's name
  
-    >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
+    >>> print(node_name(etree.fromstring('<a>X<b>Y</b>Z</a>')))
      XYZ
      """
  
      XYZ
      """
  
@@ -122,7 +122,7 @@ def xslt(xml, sheet, **kwargs):
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
          transform = etree.XSLT(etree.parse(xsltf))
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
          transform = etree.XSLT(etree.parse(xsltf))
-        params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
+        params = dict((key, transform.strparam(value)) for key, value in kwargs.items())
          return transform(xml, **params)
  
  
          return transform(xml, **params)
  
  
@@ -172,8 +172,8 @@ class Stanza(object):
  
      >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
      >>> Stanza(s).versify()
  
      >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
      >>> Stanza(s).versify()
-    >>> print etree.tostring(s)
-    <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
+    >>> print(etree.tostring(s, encoding='unicode'))
+    <strofa><wers_normalny>a <b>c</b><b>c</b></wers_normalny><wers_normalny>b<x>x/
      y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
  
      """
      y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
  
      """
@@ -325,8 +325,8 @@ class TOC(object):
          return "\n".join(texts)
  
      def html(self):
          return "\n".join(texts)
  
      def html(self):
-        with open(get_resource('epub/toc.html')) as f:
-            t = unicode(f.read(), 'utf-8')
+        with open(get_resource('epub/toc.html'), 'rb') as f:
+            t = f.read().decode('utf-8')
          return t % self.html_part()
  
  
          return t % self.html_part()
  
  
@@ -546,16 +546,16 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
      mime = zipfile.ZipInfo()
      mime.filename = 'mimetype'
      mime.compress_type = zipfile.ZIP_STORED
      mime = zipfile.ZipInfo()
      mime.filename = 'mimetype'
      mime.compress_type = zipfile.ZIP_STORED
-    mime.extra = ''
-    zip.writestr(mime, 'application/epub+zip')
+    mime.extra = b''
+    zip.writestr(mime, b'application/epub+zip')
      zip.writestr(
          'META-INF/container.xml',
      zip.writestr(
          'META-INF/container.xml',
-        '<?xml version="1.0" ?>'
-        '<container version="1.0" '
-        'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
-        '<rootfiles><rootfile full-path="OPS/content.opf" '
-        'media-type="application/oebps-package+xml" />'
-        '</rootfiles></container>'
+        b'<?xml version="1.0" ?>'
+        b'<container version="1.0" '
+        b'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
+        b'<rootfiles><rootfile full-path="OPS/content.opf" '
+        b'media-type="application/oebps-package+xml" />'
+        b'</rootfiles></container>'
      )
      zip.write(get_resource('res/wl-logo-small.png'),
                os.path.join('OPS', 'logo_wolnelektury.png'))
      )
      zip.write(get_resource('res/wl-logo-small.png'),
                os.path.join('OPS', 'logo_wolnelektury.png'))
@@ -569,7 +569,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
          if cover is True:
              cover = make_cover
  
          if cover is True:
              cover = make_cover
  
-        cover_file = StringIO()
+        cover_file = BytesIO()
          bound_cover = cover(document.book_info)
          bound_cover.save(cover_file)
          cover_name = 'cover.%s' % bound_cover.ext()
          bound_cover = cover(document.book_info)
          bound_cover.save(cover_file)
          cover_name = 'cover.%s' % bound_cover.ext()
@@ -602,12 +602,12 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
      annotations = etree.Element('annotations')
  
      toc_file = etree.fromstring(
      annotations = etree.Element('annotations')
  
      toc_file = etree.fromstring(
-        '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
-        '"-//NISO//DTD ncx 2005-1//EN" '
-        '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
-        '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
-        'version="2005-1"><head></head><docTitle></docTitle><navMap>'
-        '</navMap></ncx>'
+        b'<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
+        b'"-//NISO//DTD ncx 2005-1//EN" '
+        b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
+        b'<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
+        b'version="2005-1"><head></head><docTitle></docTitle><navMap>'
+        b'</navMap></ncx>'
      )
      nav_map = toc_file[-1]
  
      )
      nav_map = toc_file[-1]
  
@@ -645,7 +645,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
          '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="support" />'))
          '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="support" />'))
-    html_string = open(get_resource('epub/support.html')).read()
+    html_string = open(get_resource('epub/support.html'), 'rb').read()
      chars.update(used_chars(etree.fromstring(html_string)))
      zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
  
      chars.update(used_chars(etree.fromstring(html_string)))
      zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
  
@@ -679,7 +679,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                                os.path.join(tmpdir, fname)]
              env = {"PERL_USE_UNSAFE_INC": "1"}
              if verbose:
                                os.path.join(tmpdir, fname)]
              env = {"PERL_USE_UNSAFE_INC": "1"}
              if verbose:
-                print "Running font-optimizer"
+                print("Running font-optimizer")
                  subprocess.check_call(optimizer_call, env=env)
              else:
                  dev_null = open(os.devnull, 'w')
                  subprocess.check_call(optimizer_call, env=env)
              else:
                  dev_null = open(os.devnull, 'w')
diff --git a/librarian/fb2.py b/librarian/fb2.py

index 25a4c1f..6dd1c35 100644 (file)
--- a/librarian/fb2.py
+++ b/librarian/fb2.py
@@ -3,9 +3,12 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  import os.path
  from copy import deepcopy
  from lxml import etree
  import os.path
  from copy import deepcopy
  from lxml import etree
+import six
  
  from librarian import functions, OutputFile
  from .epub import replace_by_verse
  
  from librarian import functions, OutputFile
  from .epub import replace_by_verse
@@ -62,6 +65,6 @@ def transform(wldoc, verbose=False,
  
      result = document.transform(style)
  
  
      result = document.transform(style)
  
-    return OutputFile.from_string(unicode(result).encode('utf-8'))
+    return OutputFile.from_bytes(six.text_type(result).encode('utf-8'))
  
  # vim:et
  
  # vim:et
diff --git a/librarian/functions.py b/librarian/functions.py

index 75e2911..e5a47d6 100644 (file)
--- a/librarian/functions.py
+++ b/librarian/functions.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from lxml import etree
  import re
  
  from lxml import etree
  import re
  
@@ -112,7 +114,7 @@ def reg_lang_code_3to2():
          result = ''
          text = ''.join(text)
          with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
          result = ''
          text = ''.join(text)
          with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
-            for line in f:
+            for line in f.read().decode('latin1').split('\n'):
                  list = line.strip().split('|')
                  if list[0] == text:
                      result = list[2]
                  list = line.strip().split('|')
                  if list[0] == text:
                      result = list[2]
diff --git a/librarian/html.py b/librarian/html.py

index a566f71..67f0061 100644 (file)
--- a/librarian/html.py
+++ b/librarian/html.py
@@ -3,9 +3,10 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os
  import re
  import os
  import re
-import cStringIO
  import copy
  
  from lxml import etree
  import copy
  
  from lxml import etree
@@ -13,6 +14,8 @@ from librarian import XHTMLNS, ParseError, OutputFile
  from librarian import functions
  
  from lxml.etree import XMLSyntaxError, XSLTApplyError
  from librarian import functions
  
  from lxml.etree import XMLSyntaxError, XSLTApplyError
+import six
+
  
  functions.reg_substitute_entities()
  functions.reg_person_name()
  
  functions.reg_substitute_entities()
  functions.reg_person_name()
@@ -33,11 +36,10 @@ def html_has_content(text):
  
  
  def transform_abstrakt(abstrakt_element):
  
  
  def transform_abstrakt(abstrakt_element):
-    from cStringIO import StringIO
      style_filename = get_stylesheet('legacy')
      style = etree.parse(style_filename)
      xml = etree.tostring(abstrakt_element)
      style_filename = get_stylesheet('legacy')
      style = etree.parse(style_filename)
      xml = etree.tostring(abstrakt_element)
-    document = etree.parse(StringIO(xml.replace('abstrakt', 'dlugi_cytat')))  # HACK
+    document = etree.parse(six.BytesIO(xml.replace('abstrakt', 'dlugi_cytat')))  # HACK
      result = document.xslt(style)
      html = re.sub('<a name="sec[0-9]*"/>', '', etree.tostring(result))
      return re.sub('</?blockquote[^>]*>', '', html)
      result = document.xslt(style)
      html = re.sub('<a name="sec[0-9]*"/>', '', etree.tostring(result))
      return re.sub('</?blockquote[^>]*>', '', html)
@@ -77,16 +79,17 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None):
              add_table_of_themes(result.getroot())
              add_table_of_contents(result.getroot())
  
              add_table_of_themes(result.getroot())
              add_table_of_contents(result.getroot())
  
-            return OutputFile.from_string(etree.tostring(
+            return OutputFile.from_bytes(etree.tostring(
                  result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8'))
          else:
              return None
      except KeyError:
          raise ValueError("'%s' is not a valid stylesheet.")
                  result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8'))
          else:
              return None
      except KeyError:
          raise ValueError("'%s' is not a valid stylesheet.")
-    except (XMLSyntaxError, XSLTApplyError), e:
+    except (XMLSyntaxError, XSLTApplyError) as e:
          raise ParseError(e)
  
  
          raise ParseError(e)
  
  
+@six.python_2_unicode_compatible
  class Fragment(object):
      def __init__(self, id, themes):
          super(Fragment, self).__init__()
  class Fragment(object):
      def __init__(self, id, themes):
          super(Fragment, self).__init__()
@@ -106,7 +109,7 @@ class Fragment(object):
                  try:
                      stack.pop()
                  except IndexError:
                  try:
                      stack.pop()
                  except IndexError:
-                    print 'CLOSED NON-OPEN TAG:', element
+                    print('CLOSED NON-OPEN TAG:', element)
  
          stack.reverse()
          return self.events + stack
  
          stack.reverse()
          return self.events + stack
@@ -128,7 +131,7 @@ class Fragment(object):
  
          return ''.join(result)
  
  
          return ''.join(result)
  
-    def __unicode__(self):
+    def __str__(self):
          return self.to_string()
  
  
          return self.to_string()
  
  
@@ -139,7 +142,7 @@ def extract_fragments(input_filename):
  
      # iterparse would die on a HTML document
      parser = etree.HTMLParser(encoding='utf-8')
  
      # iterparse would die on a HTML document
      parser = etree.HTMLParser(encoding='utf-8')
-    buf = cStringIO.StringIO()
+    buf = six.BytesIO()
      buf.write(etree.tostring(etree.parse(input_filename, parser).getroot()[0][0], encoding='utf-8'))
      buf.seek(0)
  
      buf.write(etree.tostring(etree.parse(input_filename, parser).getroot()[0][0], encoding='utf-8'))
      buf.seek(0)
  
@@ -173,7 +176,7 @@ def extract_fragments(input_filename):
                  try:
                      fragment = open_fragments[element.get('fid')]
                  except KeyError:
                  try:
                      fragment = open_fragments[element.get('fid')]
                  except KeyError:
-                    print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
+                    print('%s:closed not open fragment #%s' % (input_filename, element.get('fid')))
                  else:
                      closed_fragments[fragment.id] = fragment
                      del open_fragments[fragment.id]
                  else:
                      closed_fragments[fragment.id] = fragment
                      del open_fragments[fragment.id]
@@ -207,7 +210,7 @@ def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None
              link_text = prefix
          anchor = etree.Element('a', href='#%s' % prefix)
          anchor.set('class', 'anchor')
              link_text = prefix
          anchor = etree.Element('a', href='#%s' % prefix)
          anchor.set('class', 'anchor')
-        anchor.text = unicode(link_text)
+        anchor.text = six.text_type(link_text)
          parent.insert(index, anchor)
  
      if with_target:
          parent.insert(index, anchor)
  
      if with_target:
@@ -247,7 +250,7 @@ def raw_printable_text(element):
      for e in working.findall('a'):
          if e.get('class') in ('annotation', 'theme-begin'):
              e.text = ''
      for e in working.findall('a'):
          if e.get('class') in ('annotation', 'theme-begin'):
              e.text = ''
-    return etree.tostring(working, method='text', encoding=unicode).strip()
+    return etree.tostring(working, method='text', encoding='unicode').strip()
  
  
  def add_table_of_contents(root):
  
  
  def add_table_of_contents(root):
@@ -300,7 +303,7 @@ def add_table_of_themes(root):
          theme_names = [s.strip() for s in fragment.text.split(',')]
          for theme_name in theme_names:
              book_themes.setdefault(theme_name, []).append(fragment.get('name'))
          theme_names = [s.strip() for s in fragment.text.split(',')]
          for theme_name in theme_names:
              book_themes.setdefault(theme_name, []).append(fragment.get('name'))
-    book_themes = book_themes.items()
+    book_themes = list(book_themes.items())
      book_themes.sort(key=lambda s: sortify(s[0]))
      themes_div = etree.Element('div', id="themes")
      themes_ol = etree.SubElement(themes_div, 'ol')
      book_themes.sort(key=lambda s: sortify(s[0]))
      themes_div = etree.Element('div', id="themes")
      themes_ol = etree.SubElement(themes_div, 'ol')
@@ -326,7 +329,7 @@ def extract_annotations(html_path):
      parser = etree.HTMLParser(encoding='utf-8')
      tree = etree.parse(html_path, parser)
      footnotes = tree.find('//*[@id="footnotes"]')
      parser = etree.HTMLParser(encoding='utf-8')
      tree = etree.parse(html_path, parser)
      footnotes = tree.find('//*[@id="footnotes"]')
-    re_qualifier = re.compile(ur'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014')
+    re_qualifier = re.compile(r'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014')
      if footnotes is not None:
          for footnote in footnotes.findall('div'):
              fn_type = footnote.get('class').split('-')[1]
      if footnotes is not None:
          for footnote in footnotes.findall('div'):
              fn_type = footnote.get('class').split('-')[1]
@@ -335,8 +338,8 @@ def extract_annotations(html_path):
              footnote.text = None
              if len(footnote) and footnote[-1].tail == '\n':
                  footnote[-1].tail = None
              footnote.text = None
              if len(footnote) and footnote[-1].tail == '\n':
                  footnote[-1].tail = None
-            text_str = etree.tostring(footnote, method='text', encoding=unicode).strip()
-            html_str = etree.tostring(footnote, method='html', encoding=unicode).strip()
+            text_str = etree.tostring(footnote, method='text', encoding='unicode').strip()
+            html_str = etree.tostring(footnote, method='html', encoding='unicode').strip()
  
              match = re_qualifier.match(text_str)
              if match:
  
              match = re_qualifier.match(text_str)
              if match:
diff --git a/librarian/hyphenator.py b/librarian/hyphenator.py

index 18d402b..aa5b4c3 100644 (file)
--- a/librarian/hyphenator.py
+++ b/librarian/hyphenator.py
@@ -14,6 +14,7 @@ info@wilbertberendsen.nl
  License: LGPL.
  
  """
  License: LGPL.
  
  """
+from __future__ import print_function, unicode_literals
  
  import sys
  import re
  
  import sys
  import re
@@ -235,5 +236,5 @@ if __name__ == "__main__":
      h = Hyphenator(dict_file, left=1, right=1)
  
      for i in h(word):
      h = Hyphenator(dict_file, left=1, right=1)
  
      for i in h(word):
-        print i
+        print(i)
  
  
diff --git a/librarian/mobi.py b/librarian/mobi.py

index c3c8f28..6f1f5d6 100644 (file)
--- a/librarian/mobi.py
+++ b/librarian/mobi.py
@@ -3,6 +3,7 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
  
  from copy import deepcopy
  import os
  
  from copy import deepcopy
  import os
@@ -13,13 +14,16 @@ from librarian import OutputFile
  
  
  def transform(wldoc, verbose=False, sample=None, cover=None,
  
  
  def transform(wldoc, verbose=False, sample=None, cover=None,
-              use_kindlegen=False, flags=None, hyphenate=True, ilustr_path=''):
+              use_kindlegen=False, flags=None, hyphenate=True, ilustr_path='',
+              converter_path=None):
      """ produces a MOBI file
  
      wldoc: a WLDocument
      sample=n: generate sample e-book (with at least n paragraphs)
      cover: a cover.Cover factory overriding default
      flags: less-advertising,
      """ produces a MOBI file
  
      wldoc: a WLDocument
      sample=n: generate sample e-book (with at least n paragraphs)
      cover: a cover.Cover factory overriding default
      flags: less-advertising,
+    converter_path: override path to MOBI converter,
+      either ebook-convert or kindlegen
      """
  
      document = deepcopy(wldoc)
      """
  
      document = deepcopy(wldoc)
@@ -40,10 +44,12 @@ def transform(wldoc, verbose=False, sample=None, cover=None,
  
      if use_kindlegen:
          output_file_basename = os.path.basename(output_file.name)
  
      if use_kindlegen:
          output_file_basename = os.path.basename(output_file.name)
-        subprocess.check_call(['kindlegen', '-c2', epub.get_filename(),
-                              '-o', output_file_basename], **kwargs)
+        subprocess.check_call([converter_path or 'kindlegen',
+                               '-c2', epub.get_filename(),
+                               '-o', output_file_basename], **kwargs)
      else:
      else:
-        subprocess.check_call(['ebook-convert', epub.get_filename(),
+        subprocess.check_call([converter_path or 'ebook-convert',
+                               epub.get_filename(),
                                 output_file.name, '--no-inline-toc',
                                 '--mobi-file-type=both',
                                 '--mobi-ignore-margins'], **kwargs)
                                 output_file.name, '--no-inline-toc',
                                 '--mobi-file-type=both',
                                 '--mobi-ignore-margins'], **kwargs)
diff --git a/librarian/packagers.py b/librarian/packagers.py

index f57a983..b3f5548 100644 (file)
--- a/librarian/packagers.py
+++ b/librarian/packagers.py
@@ -3,11 +3,13 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os
  from librarian import pdf, epub, mobi, DirDocProvider, ParseError
  from librarian.parser import WLDocument
  
  import os
  from librarian import pdf, epub, mobi, DirDocProvider, ParseError
  from librarian.parser import WLDocument
  
-from util import makedirs
+from .util import makedirs
  
  
  class Packager(object):
  
  
  class Packager(object):
@@ -39,14 +41,14 @@ class Packager(object):
          try:
              for main_input in input_filenames:
                  if verbose:
          try:
              for main_input in input_filenames:
                  if verbose:
-                    print main_input
+                    print(main_input)
                  cls.prepare_file(main_input, output_dir, verbose, overwrite)
                  cls.prepare_file(main_input, output_dir, verbose, overwrite)
-        except ParseError, e:
-            print '%(file)s:%(name)s:%(message)s' % {
+        except ParseError as e:
+            print('%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
                  'name': e.__class__.__name__,
                  'message': e.message
                  'file': main_input,
                  'name': e.__class__.__name__,
                  'message': e.message
-            }
+            })
  
  
  class EpubPackager(Packager):
  
  
  class EpubPackager(Packager):
diff --git a/librarian/parser.py b/librarian/parser.py

index 43cb0a9..73ddd52 100644 (file)
--- a/librarian/parser.py
+++ b/librarian/parser.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import ValidationError, NoDublinCore,  ParseError, NoProvider
  from librarian import RDFNS
  from librarian.cover import make_cover
  from librarian import ValidationError, NoDublinCore,  ParseError, NoProvider
  from librarian import RDFNS
  from librarian.cover import make_cover
@@ -14,7 +16,7 @@ from lxml.etree import XMLSyntaxError, XSLTApplyError
  
  import os
  import re
  
  import os
  import re
-from StringIO import StringIO
+import six
  
  
  class WLDocument(object):
  
  
  class WLDocument(object):
@@ -45,14 +47,14 @@ class WLDocument(object):
              self.book_info = None
  
      @classmethod
              self.book_info = None
  
      @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        return cls.from_file(StringIO(xml), *args, **kwargs)
+    def from_bytes(cls, xml, *args, **kwargs):
+        return cls.from_file(six.BytesIO(xml), *args, **kwargs)
  
      @classmethod
      def from_file(cls, xmlfile, *args, **kwargs):
  
          # first, prepare for parsing
  
      @classmethod
      def from_file(cls, xmlfile, *args, **kwargs):
  
          # first, prepare for parsing
-        if isinstance(xmlfile, basestring):
+        if isinstance(xmlfile, six.text_type):
              file = open(xmlfile, 'rb')
              try:
                  data = file.read()
              file = open(xmlfile, 'rb')
              try:
                  data = file.read()
@@ -61,17 +63,17 @@ class WLDocument(object):
          else:
              data = xmlfile.read()
  
          else:
              data = xmlfile.read()
  
-        if not isinstance(data, unicode):
+        if not isinstance(data, six.text_type):
              data = data.decode('utf-8')
  
          data = data.replace(u'\ufeff', '')
  
          try:
              parser = etree.XMLParser(remove_blank_text=False)
              data = data.decode('utf-8')
  
          data = data.replace(u'\ufeff', '')
  
          try:
              parser = etree.XMLParser(remove_blank_text=False)
-            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+            tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser)
  
              return cls(tree, *args, **kwargs)
  
              return cls(tree, *args, **kwargs)
-        except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+        except (ExpatError, XMLSyntaxError, XSLTApplyError) as e:
              raise ParseError(e)
  
      def swap_endlines(self):
              raise ParseError(e)
  
      def swap_endlines(self):
@@ -139,7 +141,7 @@ class WLDocument(object):
  
      def serialize(self):
          self.update_dc()
  
      def serialize(self):
          self.update_dc()
-        return etree.tostring(self.edoc, encoding=unicode, pretty_print=True)
+        return etree.tostring(self.edoc, encoding='unicode', pretty_print=True)
  
      def merge_chunks(self, chunk_dict):
          unmerged = []
  
      def merge_chunks(self, chunk_dict):
          unmerged = []
@@ -150,7 +152,7 @@ class WLDocument(object):
                  node = self.edoc.xpath(xpath)[0]
                  repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
                  node.getparent().replace(node, repl)
                  node = self.edoc.xpath(xpath)[0]
                  repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
                  node.getparent().replace(node, repl)
-            except Exception, e:
+            except Exception as e:
                  unmerged.append(repr((key, xpath, e)))
  
          return unmerged
                  unmerged.append(repr((key, xpath, e)))
  
          return unmerged
@@ -220,7 +222,7 @@ class WLDocument(object):
          if output_dir_path:
              save_path = output_dir_path
              if make_author_dir:
          if output_dir_path:
              save_path = output_dir_path
              if make_author_dir:
-                save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8'))
+                save_path = os.path.join(save_path, six.text_type(self.book_info.author).encode('utf-8'))
              save_path = os.path.join(save_path, self.book_info.url.slug)
              if ext:
                  save_path += '.%s' % ext
              save_path = os.path.join(save_path, self.book_info.url.slug)
              if ext:
                  save_path += '.%s' % ext
diff --git a/librarian/partners.py b/librarian/partners.py

index 33198f7..671cf4d 100644 (file)
--- a/librarian/partners.py
+++ b/librarian/partners.py
@@ -11,9 +11,10 @@ along with custom cover images etc.
  
  New partners shouldn't be added here, but in the partners repository.
  """
  
  New partners shouldn't be added here, but in the partners repository.
  """
+from __future__ import print_function, unicode_literals
  
  from librarian import packagers, cover
  
  from librarian import packagers, cover
-from util import makedirs
+from .util import makedirs
  
  
  class GandalfEpub(packagers.EpubPackager):
  
  
  class GandalfEpub(packagers.EpubPackager):
@@ -79,7 +80,7 @@ class Virtualo(packagers.Packager):
          try:
              for main_input in input_filenames:
                  if verbose:
          try:
              for main_input in input_filenames:
                  if verbose:
-                    print main_input
+                    print(main_input)
                  path, fname = os.path.realpath(main_input).rsplit('/', 1)
                  provider = DirDocProvider(path)
                  slug, ext = os.path.splitext(fname)
                  path, fname = os.path.realpath(main_input).rsplit('/', 1)
                  provider = DirDocProvider(path)
                  slug, ext = os.path.splitext(fname)
@@ -110,13 +111,13 @@ class Virtualo(packagers.Packager):
                  doc.save_output_file(
                      doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
                      output_path=outfile_sample)
                  doc.save_output_file(
                      doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
                      output_path=outfile_sample)
-        except ParseError, e:
-            print '%(file)s:%(name)s:%(message)s' % {
+        except ParseError as e:
+            print('%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
                  'name': e.__class__.__name__,
                  'message': e.message
                  'file': main_input,
                  'name': e.__class__.__name__,
                  'message': e.message
-            }
+            })
  
          xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
  
          xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
-        xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
+        xml_file.write(etree.tostring(xml, pretty_print=True, encoding='unicode').encode('utf-8'))
          xml_file.close()
          xml_file.close()
diff --git a/librarian/pdf.py b/librarian/pdf.py

index d67bddf..e6d897d 100644 (file)
--- a/librarian/pdf.py
+++ b/librarian/pdf.py
@@ -9,11 +9,11 @@ Creates one big XML from the book and its children, converts it to LaTeX
  with TeXML, then runs it by XeLaTeX.
  
  """
  with TeXML, then runs it by XeLaTeX.
  
  """
-from __future__ import with_statement
+from __future__ import print_function, unicode_literals
+
  import os
  import os.path
  import shutil
  import os
  import os.path
  import shutil
-from StringIO import StringIO
  from tempfile import mkdtemp, NamedTemporaryFile
  import re
  from copy import deepcopy
  from tempfile import mkdtemp, NamedTemporaryFile
  import re
  from copy import deepcopy
@@ -23,6 +23,7 @@ from itertools import chain
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
+import six
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
@@ -57,7 +58,7 @@ def insert_tags(doc, split_re, tagname, exclude=None):
  
      >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>')
      >>> insert_tags(t, re.compile('-'), 'd')
  
      >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>')
      >>> insert_tags(t, re.compile('-'), 'd')
-    >>> print etree.tostring(t)
+    >>> print(etree.tostring(t, encoding='unicode'))
      <a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
      """
  
      <a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
      """
  
@@ -196,11 +197,11 @@ def package_available(package, args='', verbose=False):
      tempdir = mkdtemp('-wl2pdf-test')
      fpath = os.path.join(tempdir, 'test.tex')
      f = open(fpath, 'w')
      tempdir = mkdtemp('-wl2pdf-test')
      fpath = os.path.join(tempdir, 'test.tex')
      f = open(fpath, 'w')
-    f.write(r"""
-        \documentclass{wl}
-        \usepackage[%s]{%s}
-        \begin{document}
-        \end{document}
+    f.write("""
+        \\documentclass{wl}
+        \\usepackage[%s]{%s}
+        \\begin{document}
+        \\end{document}
          """ % (args, package))
      f.close()
      if verbose:
          """ % (args, package))
      f.close()
      if verbose:
@@ -306,8 +307,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          del document  # no longer needed large object :)
  
          tex_path = os.path.join(temp, 'doc.tex')
          del document  # no longer needed large object :)
  
          tex_path = os.path.join(temp, 'doc.tex')
-        fout = open(tex_path, 'w')
-        process(StringIO(texml), fout, 'utf-8')
+        fout = open(tex_path, 'wb')
+        process(six.BytesIO(texml), fout, 'utf-8')
          fout.close()
          del texml
  
          fout.close()
          del texml
  
@@ -329,7 +330,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
  
          # some things work better when compiled twice
          # (table of contents, [line numbers - disabled])
  
          # some things work better when compiled twice
          # (table of contents, [line numbers - disabled])
-        for run in xrange(2):
+        for run in range(2):
              if verbose:
                  p = call(['xelatex', tex_path])
              else:
              if verbose:
                  p = call(['xelatex', tex_path])
              else:
@@ -346,7 +347,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          shutil.rmtree(temp)
          return OutputFile.from_filename(output_file.name)
  
          shutil.rmtree(temp)
          return OutputFile.from_filename(output_file.name)
  
-    except (XMLSyntaxError, XSLTApplyError), e:
+    except (XMLSyntaxError, XSLTApplyError) as e:
          raise ParseError(e)
  
  
          raise ParseError(e)
  
  
@@ -361,14 +362,14 @@ def load_including_children(wldoc=None, provider=None, uri=None):
          text = f.read().decode('utf-8')
          f.close()
      elif wldoc is not None:
          text = f.read().decode('utf-8')
          f.close()
      elif wldoc is not None:
-        text = etree.tostring(wldoc.edoc, encoding=unicode)
+        text = etree.tostring(wldoc.edoc, encoding='unicode')
          provider = wldoc.provider
      else:
          raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
  
          provider = wldoc.provider
      else:
          raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
  
-    text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
+    text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
  
  
-    document = WLDocument.from_string(text, parse_dublincore=True, provider=provider)
+    document = WLDocument.from_bytes(text.encode('utf-8'), parse_dublincore=True, provider=provider)
      document.swap_endlines()
  
      for child_uri in document.book_info.parts:
      document.swap_endlines()
  
      for child_uri in document.book_info.parts:
diff --git a/librarian/picture.py b/librarian/picture.py

index 1aa1d07..d255f55 100644 (file)
--- a/librarian/picture.py
+++ b/librarian/picture.py
@@ -1,14 +1,16 @@
  # -*- coding: utf-8 -*-
  # -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
  from operator import and_
  
  from operator import and_
  
-from dcparser import Field, WorkInfo, DCNS
+from .dcparser import Field, WorkInfo, DCNS
  from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
  from xml.parsers.expat import ExpatError
  from os import path
  from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
  from xml.parsers.expat import ExpatError
  from os import path
-from StringIO import StringIO
  from lxml import etree
  from lxml.etree import (XMLSyntaxError, XSLTApplyError, Element)
  import re
  from lxml import etree
  from lxml.etree import (XMLSyntaxError, XSLTApplyError, Element)
  import re
+import six
  
  
  class WLPictureURI(WLURI):
  
  
  class WLPictureURI(WLURI):
@@ -99,14 +101,14 @@ class WLPicture(object):
          self.frame = None
  
      @classmethod
          self.frame = None
  
      @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        return cls.from_file(StringIO(xml), *args, **kwargs)
+    def from_bytes(cls, xml, *args, **kwargs):
+        return cls.from_file(six.BytesIO(xml), *args, **kwargs)
  
      @classmethod
      def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
  
          # first, prepare for parsing
  
      @classmethod
      def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
  
          # first, prepare for parsing
-        if isinstance(xmlfile, basestring):
+        if isinstance(xmlfile, six.text_type):
              file = open(xmlfile, 'rb')
              try:
                  data = file.read()
              file = open(xmlfile, 'rb')
              try:
                  data = file.read()
@@ -115,7 +117,7 @@ class WLPicture(object):
          else:
              data = xmlfile.read()
  
          else:
              data = xmlfile.read()
  
-        if not isinstance(data, unicode):
+        if not isinstance(data, six.text_type):
              data = data.decode('utf-8')
  
          data = data.replace(u'\ufeff', '')
              data = data.decode('utf-8')
  
          data = data.replace(u'\ufeff', '')
@@ -126,12 +128,12 @@ class WLPicture(object):
  
          try:
              parser = etree.XMLParser(remove_blank_text=False)
  
          try:
              parser = etree.XMLParser(remove_blank_text=False)
-            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+            tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser)
  
              me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
              me.load_frame_info()
              return me
  
              me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
              me.load_frame_info()
              return me
-        except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+        except (ExpatError, XMLSyntaxError, XSLTApplyError) as e:
              raise ParseError(e)
  
      @property
              raise ParseError(e)
  
      @property
@@ -184,7 +186,7 @@ class WLPicture(object):
              pd['coords'] = coords
  
              def want_unicode(x):
              pd['coords'] = coords
  
              def want_unicode(x):
-                if not isinstance(x, unicode):
+                if not isinstance(x, six.text_type):
                      return x.decode('utf-8')
                  else:
                      return x
                      return x.decode('utf-8')
                  else:
                      return x
diff --git a/librarian/sponsor.py b/librarian/sponsor.py

index c9bc35b..1374cda 100644 (file)
--- a/librarian/sponsor.py
+++ b/librarian/sponsor.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import get_resource
  
  
  from librarian import get_resource
  
  
diff --git a/librarian/text.py b/librarian/text.py

index 4064849..7ba6d29 100644 (file)
--- a/librarian/text.py
+++ b/librarian/text.py
@@ -3,10 +3,13 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  import copy
  from librarian import functions, OutputFile
  from lxml import etree
  import os
  import copy
  from librarian import functions, OutputFile
  from lxml import etree
  import os
+import six
  
  
  functions.reg_substitute_entities()
  
  
  functions.reg_substitute_entities()
@@ -103,7 +106,7 @@ def transform(wldoc, flags=None, **options):
              'description': description,
              'url': url,
              'license_description': license_description,
              'description': description,
              'url': url,
              'license_description': license_description,
-            'text': unicode(result),
+            'text': six.text_type(result),
              'source': source,
              'contributors': contributors,
              'funders': funders,
              'source': source,
              'contributors': contributors,
              'funders': funders,
@@ -111,5 +114,5 @@ def transform(wldoc, flags=None, **options):
              'isbn': isbn,
          }).encode('utf-8')
      else:
              'isbn': isbn,
          }).encode('utf-8')
      else:
-        result = unicode(result).encode('utf-8')
-    return OutputFile.from_string("\r\n".join(result.splitlines()) + "\r\n")
+        result = six.text_type(result).encode('utf-8')
+    return OutputFile.from_bytes(b"\r\n".join(result.splitlines()) + b"\r\n")
diff --git a/librarian/util.py b/librarian/util.py

index 0886fd5..c302084 100644 (file)
--- a/librarian/util.py
+++ b/librarian/util.py
@@ -2,6 +2,8 @@
  # by Paul Winkler 
  # http://code.activestate.com/recipes/81611-roman-numerals/
  # PSFL (GPL compatible)
  # by Paul Winkler 
  # http://code.activestate.com/recipes/81611-roman-numerals/
  # PSFL (GPL compatible)
+from __future__ import print_function, unicode_literals
+
  import os
  
  
  import os
  
  
@@ -18,11 +20,11 @@ def int_to_roman(input):
      Traceback (most recent call last):
      ValueError: Argument must be between 1 and 3999
  
      Traceback (most recent call last):
      ValueError: Argument must be between 1 and 3999
  
-    >>> int_to_roman(1.5)
+    >>> int_to_roman(1.5)  # doctest: +IGNORE_EXCEPTION_DETAIL
      Traceback (most recent call last):
      TypeError: expected integer, got <type 'float'>
  
      Traceback (most recent call last):
      TypeError: expected integer, got <type 'float'>
  
-    >>> for i in range(1, 21): print int_to_roman(i)
+    >>> for i in range(1, 21): print(int_to_roman(i))
      ...
      I
      II
      ...
      I
      II
@@ -44,15 +46,15 @@ def int_to_roman(input):
      XVIII
      XIX
      XX
      XVIII
      XIX
      XX
-    >>> print int_to_roman(2000)
+    >>> print(int_to_roman(2000))
      MM
      MM
-    >>> print int_to_roman(1999)
+    >>> print(int_to_roman(1999))
      MCMXCIX
      """
      if type(input) != type(1):
      MCMXCIX
      """
      if type(input) != type(1):
-        raise TypeError, "expected integer, got %s" % type(input)
+        raise TypeError("expected integer, got %s" % type(input))
      if not 0 < input < 4000:
      if not 0 < input < 4000:
-        raise ValueError, "Argument must be between 1 and 3999"    
+        raise ValueError("Argument must be between 1 and 3999")
      ints = (1000, 900,  500, 400, 100,  90, 50,  40, 10,  9,    5,  4,    1)
      nums = ('M',  'CM', 'D', 'CD','C', 'XC','L','XL','X','IX','V','IV','I')
      result = ""
      ints = (1000, 900,  500, 400, 100,  90, 50,  40, 10,  9,    5,  4,    1)
      nums = ('M',  'CM', 'D', 'CD','C', 'XC','L','XL','X','IX','V','IV','I')
      result = ""
@@ -66,17 +68,17 @@ def roman_to_int(input):
      """
      Convert a roman numeral to an integer.
      
      """
      Convert a roman numeral to an integer.
      
-    >>> r = range(1, 4000)
+    >>> r = list(range(1, 4000))
      >>> nums = [int_to_roman(i) for i in r]
      >>> ints = [roman_to_int(n) for n in nums]
      >>> nums = [int_to_roman(i) for i in r]
      >>> ints = [roman_to_int(n) for n in nums]
-    >>> print r == ints
+    >>> print(r == ints)
      1
  
      >>> roman_to_int('VVVIV')
      Traceback (most recent call last):
       ...
      ValueError: input is not a valid roman numeral: VVVIV
      1
  
      >>> roman_to_int('VVVIV')
      Traceback (most recent call last):
       ...
      ValueError: input is not a valid roman numeral: VVVIV
-    >>> roman_to_int(1)
+    >>> roman_to_int(1)  # doctest: +IGNORE_EXCEPTION_DETAIL
      Traceback (most recent call last):
       ...
      TypeError: expected string, got <type 'int'>
      Traceback (most recent call last):
       ...
      TypeError: expected string, got <type 'int'>
@@ -90,14 +92,14 @@ def roman_to_int(input):
      ValueError: input is not a valid roman numeral: IL
      """
      if type(input) != type(""):
      ValueError: input is not a valid roman numeral: IL
      """
      if type(input) != type(""):
-        raise TypeError, "expected string, got %s" % type(input)
+        raise TypeError("expected string, got %s" % type(input))
      input = input.upper()
      nums = ['M', 'D', 'C', 'L', 'X', 'V', 'I']
      ints = [1000, 500, 100, 50,  10,  5,    1]
      places = []
      for c in input:
          if not c in nums:
      input = input.upper()
      nums = ['M', 'D', 'C', 'L', 'X', 'V', 'I']
      ints = [1000, 500, 100, 50,  10,  5,    1]
      places = []
      for c in input:
          if not c in nums:
-            raise ValueError, "input is not a valid roman numeral: %s" % input
+            raise ValueError("input is not a valid roman numeral: %s" % input)
      for i in range(len(input)):
          c = input[i]
          value = ints[nums.index(c)]
      for i in range(len(input)):
          c = input[i]
          value = ints[nums.index(c)]
@@ -116,9 +118,9 @@ def roman_to_int(input):
      if int_to_roman(sum) == input:
          return sum
      else:
      if int_to_roman(sum) == input:
          return sum
      else:
-        raise ValueError, 'input is not a valid roman numeral: %s' % input
+        raise ValueError('input is not a valid roman numeral: %s' % input)
  
  
  def makedirs(path):
      if not os.path.isdir(path):
  
  
  def makedirs(path):
      if not os.path.isdir(path):
-        os.makedirs(path)
-\ No newline at end of file
+        os.makedirs(path)
diff --git a/scripts/book2cover b/scripts/book2cover

index 444563c..a81fc63 100755 (executable)
--- a/scripts/book2cover
+++ b/scripts/book2cover
@@ -4,8 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from StringIO import StringIO
-from librarian import OutputFile
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2epub b/scripts/book2epub

index 7a7a41d..5b906b9 100755 (executable)
--- a/scripts/book2epub
+++ b/scripts/book2epub
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2fb2 b/scripts/book2fb2

index 584ae99..de4615b 100755 (executable)
--- a/scripts/book2fb2
+++ b/scripts/book2fb2
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything
  
  
  from librarian.book2anything import Book2Anything
  
  
diff --git a/scripts/book2html b/scripts/book2html

index 2c1d04e..f6d459d 100755 (executable)
--- a/scripts/book2html
+++ b/scripts/book2html
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2mobi b/scripts/book2mobi

index b283309..b0d0686 100755 (executable)
--- a/scripts/book2mobi
+++ b/scripts/book2mobi
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2partner b/scripts/book2partner

index f1892bb..8982354 100755 (executable)
--- a/scripts/book2partner
+++ b/scripts/book2partner
@@ -4,20 +4,15 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
+from collections import OrderedDict
  import inspect
  import optparse
  import os
  import sys
  
  from librarian import packagers
  import inspect
  import optparse
  import os
  import sys
  
  from librarian import packagers
-try:
-    from collections import OrderedDict
-except ImportError:
-    try:
-        from django.utils.datastructures import SortedDict
-        OrderedDict = SortedDict
-    except ImportError:
-        OrderedDict = dict
  
  
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
@@ -64,12 +59,12 @@ if __name__ == '__main__':
              if inspect.isclass(package) and issubclass(package, packagers.Packager):
                  packages[package_name] = package
      if not packages:
              if inspect.isclass(package) and issubclass(package, packagers.Packager):
                  packages[package_name] = package
      if not packages:
-        print 'No packages found!'
+        print('No packages found!')
  
      if options.list_packages:
  
      if options.list_packages:
-        print 'Available packages:'
+        print('Available packages:')
          for package_name, package in packages.items():
          for package_name, package in packages.items():
-            print ' ', package_name
+            print(' ', package_name)
          exit(0)
  
      if len(input_filenames) < 1 or not options.packages:
          exit(0)
  
      if len(input_filenames) < 1 or not options.packages:
@@ -79,6 +74,6 @@ if __name__ == '__main__':
      used_packages = [packages[p] for p in options.packages.split(',')]
      for package in used_packages:
          if options.verbose:
      used_packages = [packages[p] for p in options.packages.split(',')]
      for package in used_packages:
          if options.verbose:
-            print 'Package:', package.__name__
+            print('Package:', package.__name__)
          package.prepare(input_filenames,
              options.output_dir, options.verbose, options.overwrite)
          package.prepare(input_filenames,
              options.output_dir, options.verbose, options.overwrite)
diff --git a/scripts/book2pdf b/scripts/book2pdf

index ccb5fac..3c363f1 100755 (executable)
--- a/scripts/book2pdf
+++ b/scripts/book2pdf
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2txt b/scripts/book2txt

index c706a07..0e84ac9 100755 (executable)
--- a/scripts/book2txt
+++ b/scripts/book2txt
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  from librarian.parser import WLDocument
  
  from librarian.book2anything import Book2Anything, Option
  from librarian.parser import WLDocument
  
diff --git a/scripts/bookfragments b/scripts/bookfragments

index 0d94497..b283297 100755 (executable)
--- a/scripts/bookfragments
+++ b/scripts/bookfragments
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os
  import optparse
  
  import os
  import optparse
  
@@ -29,14 +31,14 @@ if __name__ == '__main__':
      # Do some real work
      for input_filename in input_filenames:
          if options.verbose:
      # Do some real work
      for input_filename in input_filenames:
          if options.verbose:
-            print input_filename
+            print(input_filename)
  
          output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
  
          closed_fragments, open_fragments = html.extract_fragments(input_filename)
  
          for fragment_id in open_fragments:
  
          output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
  
          closed_fragments, open_fragments = html.extract_fragments(input_filename)
  
          for fragment_id in open_fragments:
-            print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
+            print('%s:warning:unclosed fragment #%s' % (input_filename, fragment_id))
  
          output_file = open(output_filename, 'w')
          output_file.write("""
  
          output_file = open(output_filename, 'w')
          output_file.write("""
diff --git a/scripts/fn_qualifiers_list_from_redmine.py b/scripts/fn_qualifiers_list_from_redmine.py

old mode 100644 (file)

new mode 100755 (executable)

index 020b119..66b00cc
--- a/scripts/fn_qualifiers_list_from_redmine.py
+++ b/scripts/fn_qualifiers_list_from_redmine.py
@@ -5,16 +5,17 @@
  This scripts reads the table of footnote qualifiers from Redmine
  and produces contents of fn_qualifiers.py – a list of valid qualifiers.
  """
  This scripts reads the table of footnote qualifiers from Redmine
  and produces contents of fn_qualifiers.py – a list of valid qualifiers.
  """
+from __future__ import print_function, unicode_literals
  
  from lxml import etree
  
  from lxml import etree
-from urllib2 import urlopen
+from six.moves.urllib.request import urlopen
  
  url = 'http://redmine.nowoczesnapolska.org.pl/projects/wl-publikacje/wiki/Lista_skr%C3%B3t%C3%B3w'
  
  parser = etree.HTMLParser()
  tree = etree.parse(urlopen(url), parser)
  
  
  url = 'http://redmine.nowoczesnapolska.org.pl/projects/wl-publikacje/wiki/Lista_skr%C3%B3t%C3%B3w'
  
  parser = etree.HTMLParser()
  tree = etree.parse(urlopen(url), parser)
  
-print """\
+print("""\
  # -*- coding: utf-8
  \"""
  List of standard footnote qualifiers.
  # -*- coding: utf-8
  \"""
  List of standard footnote qualifiers.
@@ -24,12 +25,12 @@ do not edit it.
  from __future__ import unicode_literals
  
  
  from __future__ import unicode_literals
  
  
-FN_QUALIFIERS = {""".encode('utf-8')
+FN_QUALIFIERS = {""")
  
  for td in tree.findall('//td'):
  
  for td in tree.findall('//td'):
-    print ("    '%s': '%s'," % (
+    print(("    '%s': '%s'," % (
          td[0].text.replace('\\', '\\\\').replace("'", "\\'"),
          td[0].tail.strip(' -').replace('\\', '\\\\').replace("'", "\\'")
          td[0].text.replace('\\', '\\\\').replace("'", "\\'"),
          td[0].tail.strip(' -').replace('\\', '\\\\').replace("'", "\\'")
-    )).encode('utf-8')
+    )))
  
  
-print """    }""".encode('utf-8')
+print("""    }""")
diff --git a/scripts/genslugs b/scripts/genslugs

index a234096..9745b68 100755 (executable)
--- a/scripts/genslugs
+++ b/scripts/genslugs
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os
  import optparse
  
  import os
  import optparse
  
@@ -36,13 +38,13 @@ if __name__ == '__main__':
      # Do some real work
      for input_filename in input_filenames:
          if options.verbose:
      # Do some real work
      for input_filename in input_filenames:
          if options.verbose:
-            print input_filename
+            print(input_filename)
  
          doc = etree.parse(input_filename)
          try:
              title = doc.find('//{http://purl.org/dc/elements/1.1/}title').text
          except AttributeError:
  
          doc = etree.parse(input_filename)
          try:
              title = doc.find('//{http://purl.org/dc/elements/1.1/}title').text
          except AttributeError:
-            print '%s:error:Book title not found. Skipping.' % input_filename
+            print('%s:error:Book title not found. Skipping.' % input_filename)
              continue
  
          parent = ''
              continue
  
          parent = ''
@@ -52,14 +54,14 @@ if __name__ == '__main__':
          except AttributeError:
              pass
          except IndexError:
          except AttributeError:
              pass
          except IndexError:
-            print '%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url)
+            print('%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url))
  
          book_url = doc.find('//{http://purl.org/dc/elements/1.1/}identifier.url')
          if book_url is None:
              book_description = doc.find('//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description')
              book_url = etree.SubElement(book_description, '{http://purl.org/dc/elements/1.1/}identifier.url')
          if not options.force and book_url.text.startswith('http://'):
  
          book_url = doc.find('//{http://purl.org/dc/elements/1.1/}identifier.url')
          if book_url is None:
              book_description = doc.find('//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description')
              book_url = etree.SubElement(book_description, '{http://purl.org/dc/elements/1.1/}identifier.url')
          if not options.force and book_url.text.startswith('http://'):
-            print '%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text)
+            print('%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text))
              continue
  
          book_url.text = BOOK_URL + slughifi(parent + title)[:60]
              continue
  
          book_url.text = BOOK_URL + slughifi(parent + title)[:60]
diff --git a/setup.py b/setup.py

index 10abe6e..b391f0c 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
  #
  import os
  import os.path
  #
  import os
  import os.path
-from distutils.core import setup
+from setuptools import setup
  
  def whole_tree(prefix, path):
      files = []
  
  def whole_tree(prefix, path):
      files = []
@@ -21,7 +21,7 @@ def whole_tree(prefix, path):
  
  setup(
      name='librarian',
  
  setup(
      name='librarian',
-    version='1.6',
+    version='1.7',
      description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
      author="Marek Stępniowski",
      author_email='marek@stepniowski.com',
      description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
      author="Marek Stępniowski",
      author_email='marek@stepniowski.com',
@@ -29,13 +29,15 @@ setup(
      maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl',
      url='http://github.com/fnp/librarian',
      packages=['librarian', 'librarian.embeds'],
      maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl',
      url='http://github.com/fnp/librarian',
      packages=['librarian', 'librarian.embeds'],
-    package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
+    package_data={'librarian': ['xslt/*.xslt', 'xslt/*.xml', 'epub/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
                                  whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res') +
                                  whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')},
      include_package_data=True,
      install_requires=[
                                  whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res') +
                                  whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')},
      include_package_data=True,
      install_requires=[
-        'lxml>=2.2',
+        'lxml>=2.2,<=4.3',
          'Pillow',
          'Pillow',
+        'six',
+        'texml',
      ],
      scripts=['scripts/book2html',
               'scripts/book2txt',
      ],
      scripts=['scripts/book2html',
               'scripts/book2txt',
@@ -47,5 +49,4 @@ setup(
               'scripts/book2cover',
               'scripts/bookfragments',
               'scripts/genslugs'],
               'scripts/book2cover',
               'scripts/bookfragments',
               'scripts/genslugs'],
-    tests_require=['nose>=0.11', 'coverage>=3.0.1'],
  )
  )
diff --git a/tests/files/dcparser/andersen_brzydkie_kaczatko.out b/tests/files/dcparser/andersen_brzydkie_kaczatko.out

index c0fb00b..9f07b39 100644 (file)
--- a/tests/files/dcparser/andersen_brzydkie_kaczatko.out
+++ b/tests/files/dcparser/andersen_brzydkie_kaczatko.out
@@ -1,5 +1,5 @@
  {
  {
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Andersen/Brzydkie_kaczątko',
      'source_name': u'Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925',
      'author': u'Andersen, Hans Christian',
      'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Andersen/Brzydkie_kaczątko',
      'source_name': u'Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925',
      'author': u'Andersen, Hans Christian',
diff --git a/tests/files/dcparser/biedrzycki_akslop.out b/tests/files/dcparser/biedrzycki_akslop.out

index a7eeffe..588a4b7 100644 (file)
--- a/tests/files/dcparser/biedrzycki_akslop.out
+++ b/tests/files/dcparser/biedrzycki_akslop.out
@@ -1,6 +1,6 @@
  {
      'editors': [u'Sekuła, Aleksandra'],
  {
      'editors': [u'Sekuła, Aleksandra'],
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop',
      'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993',
      'author': u'Biedrzycki, Miłosz',
      'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop',
      'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993',
      'author': u'Biedrzycki, Miłosz',
diff --git a/tests/files/dcparser/kochanowski_piesn7.out b/tests/files/dcparser/kochanowski_piesn7.out

index b3eba1e..96198a3 100644 (file)
--- a/tests/files/dcparser/kochanowski_piesn7.out
+++ b/tests/files/dcparser/kochanowski_piesn7.out
@@ -1,5 +1,5 @@
  {
  {
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)',
      'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976',
      'author': u'Kochanowski, Jan',
      'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)',
      'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976',
      'author': u'Kochanowski, Jan',
diff --git a/tests/files/dcparser/mickiewicz_rybka.out b/tests/files/dcparser/mickiewicz_rybka.out

index a35f935..f3c76c0 100644 (file)
--- a/tests/files/dcparser/mickiewicz_rybka.out
+++ b/tests/files/dcparser/mickiewicz_rybka.out
@@ -1,6 +1,6 @@
  {
      'editors': [u'Sekuła, Aleksandra', u'Kallenbach, Józef'],
  {
      'editors': [u'Sekuła, Aleksandra', u'Kallenbach, Józef'],
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka',
      'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922',
      'author': u'Mickiewicz, Adam',
      'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka',
      'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922',
      'author': u'Mickiewicz, Adam',
diff --git a/tests/files/dcparser/sofokles_antygona.out b/tests/files/dcparser/sofokles_antygona.out

index d934602..477988f 100644 (file)
--- a/tests/files/dcparser/sofokles_antygona.out
+++ b/tests/files/dcparser/sofokles_antygona.out
@@ -1,6 +1,6 @@
  {
      'editors': [u'Sekuła, Aleksandra'],
  {
      'editors': [u'Sekuła, Aleksandra'],
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona',
      'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939',
      'author': u'Sofokles',
      'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona',
      'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939',
      'author': u'Sofokles',
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.fb2 b/tests/files/text/asnyk_miedzy_nami_expected.fb2

new file mode 100644 (file)

index 0000000..b9e4e13
--- /dev/null
+++ b/tests/files/text/asnyk_miedzy_nami_expected.fb2
@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:wl="http://wolnelektury.pl/functions" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:l="http://www.w3.org/1999/xlink">
+  <body>
+    <title>
+      <p>Adam Asnyk</p>
+      <p>Między nami nic nie było</p>
+    </title>
+    <epigraph>
+      <p>
+                                       Utwór opracowany został w ramach projektu
+                                               <a l:href="http://www.wolnelektury.pl/">Wolne Lektury</a>
+                                       przez <a l:href="http://www.nowoczesnapolska.org.pl/">fundację
+                                               Nowoczesna Polska</a>.
+                               </p>
+    </epigraph>
+    <section>
+      <poem>
+        <stanza>
+          <v>Między nami nic nie było!</v>
+          <v>Żadnych zwierzeń, wyznań żadnych!</v>
+          <v>Nic nas z sobą nie łączyło —</v>
+          <v>Prócz wiosennych marzeń zdradnych;</v>
+        </stanza>
+        <stanza>
+          <v>Prócz tych woni, barw i blasków,</v>
+          <v>Unoszących się w przestrzeni;</v>
+          <v>Prócz szumiących śpiewem lasków</v>
+          <v>I tej świeżej łąk zieleni;</v>
+        </stanza>
+        <stanza>
+          <v>Prócz tych kaskad i potoków,</v>
+          <v>Zraszających każdy parów,</v>
+          <v>Prócz girlandy tęcz, obłoków,</v>
+          <v>Prócz natury słodkich czarów;</v>
+        </stanza>
+        <stanza>
+          <v>Prócz tych wspólnych, jasnych zdrojów,</v>
+          <v>Z których serce zachwyt piło;</v>
+          <v>Prócz pierwiosnków i powojów,—</v>
+          <v>Między nami nic nie było!</v>
+        </stanza>
+      </poem>
+    </section>
+  </body>
+  <body name="notes"/>
+</FictionBook>
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.txt b/tests/files/text/asnyk_miedzy_nami_expected.txt

index 3942928..92cc1bd 100644 (file)
--- a/tests/files/text/asnyk_miedzy_nami_expected.txt
+++ b/tests/files/text/asnyk_miedzy_nami_expected.txt
@@ -37,6 +37,8 @@ Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się w domen
  \r
  Tekst opracowany na podstawie: (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898\r
  \r
  \r
  Tekst opracowany na podstawie: (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898\r
  \r
+Wydawca: Fundacja Nowoczesna Polska\r
+\r
  Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.\r
  \r
  Opracowanie redakcyjne i przypisy: Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.\r
  Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.\r
  \r
  Opracowanie redakcyjne i przypisy: Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.\r
diff --git a/tests/files/text/asnyk_miedzy_nami_expected_raw.txt b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt

new file mode 100644 (file)

index 0000000..cac61d8
--- /dev/null
+++ b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt
@@ -0,0 +1,22 @@
+\r
+\r
+Między nami nic nie było!\r
+Żadnych zwierzeń, wyznań żadnych!\r
+Nic nas z sobą nie łączyło —\r
+Prócz wiosennych marzeń zdradnych;\r
+\r
+Prócz tych woni, barw i blasków,\r
+Unoszących się w przestrzeni;\r
+Prócz szumiących śpiewem lasków\r
+I tej świeżej łąk zieleni;\r
+\r
+Prócz tych kaskad i potoków,\r
+Zraszających każdy parów,\r
+Prócz girlandy tęcz, obłoków,\r
+Prócz natury słodkich czarów;\r
+\r
+Prócz tych wspólnych, jasnych zdrojów,\r
+Z których serce zachwyt piło;\r
+Prócz pierwiosnków i powojów,—\r
+Między nami nic nie było!\r
+\r
diff --git a/tests/test_dcparser.py b/tests/test_dcparser.py

index cab5b1c..4dab764 100644 (file)
--- a/tests/test_dcparser.py
+++ b/tests/test_dcparser.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import dcparser
  from lxml import etree
  from nose.tools import *
  from librarian import dcparser
  from lxml import etree
  from nose.tools import *
@@ -13,9 +15,9 @@ from datetime import date
  
  
  def check_dcparser(xml_file, result_file):
  
  
  def check_dcparser(xml_file, result_file):
-    xml = file(xml_file).read()
+    xml = open(xml_file, 'rb').read()
      result = codecs.open(result_file, encoding='utf-8').read()
      result = codecs.open(result_file, encoding='utf-8').read()
-    info = dcparser.BookInfo.from_string(xml).to_dict()
+    info = dcparser.BookInfo.from_bytes(xml).to_dict()
      should_be = eval(result)
      for key in should_be:
          assert_equals(info[key], should_be[key])
      should_be = eval(result)
      for key in should_be:
          assert_equals(info[key], should_be[key])
@@ -28,13 +30,13 @@ def test_dcparser():
  
  
  def check_serialize(xml_file):
  
  
  def check_serialize(xml_file):
-    xml = file(xml_file).read()
-    info = dcparser.BookInfo.from_string(xml)
+    xml = open(xml_file, 'rb').read()
+    info = dcparser.BookInfo.from_bytes(xml)
  
      # serialize
  
      # serialize
-    serialized = etree.tostring(info.to_etree(), encoding=unicode).encode('utf-8')
+    serialized = etree.tostring(info.to_etree(), encoding='unicode').encode('utf-8')
      # then parse again
      # then parse again
-    info_bis = dcparser.BookInfo.from_string(serialized)
+    info_bis = dcparser.BookInfo.from_bytes(serialized)
  
      # check if they are the same
      for key in vars(info):
  
      # check if they are the same
      for key in vars(info):
@@ -49,7 +51,7 @@ def test_serialize():
  
  
  def test_asdate():
  
  
  def test_asdate():
-    assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 03))
+    assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 3))
      assert_equals(dcparser.as_date(u"2011"), date(2011, 1, 1))
      assert_equals(dcparser.as_date(u"2 poł. XIX w."), date(1950, 1, 1))
      assert_equals(dcparser.as_date(u"XVII w., l. 20"), date(1720, 1, 1))
      assert_equals(dcparser.as_date(u"2011"), date(2011, 1, 1))
      assert_equals(dcparser.as_date(u"2 poł. XIX w."), date(1950, 1, 1))
      assert_equals(dcparser.as_date(u"XVII w., l. 20"), date(1720, 1, 1))
diff --git a/tests/test_epub.py b/tests/test_epub.py

index 720fec6..4ac874a 100644 (file)
--- a/tests/test_epub.py
+++ b/tests/test_epub.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from zipfile import ZipFile
  from lxml import html
  from nose.tools import *
  from zipfile import ZipFile
  from lxml import html
  from nose.tools import *
@@ -30,3 +32,13 @@ def test_transform():
                  u'Opracowanie redakcyjne i przypisy: '
                  u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
      assert_true(editors_attribution)
                  u'Opracowanie redakcyjne i przypisy: '
                  u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
      assert_true(editors_attribution)
+
+
+def test_transform_hyphenate():
+    epub = WLDocument.from_file(
+            get_fixture('text', 'asnyk_zbior.xml'),
+            provider=DirDocProvider(get_fixture('text', ''))
+        ).as_epub(
+            flags=['without_fonts'],
+            hyphenate=True
+        ).get_file()
diff --git a/tests/test_fb2.py b/tests/test_fb2.py

new file mode 100644 (file)

index 0000000..2b8de67
--- /dev/null
+++ b/tests/test_fb2.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from __future__ import unicode_literals
+
+from librarian import NoDublinCore
+from librarian.parser import WLDocument
+from nose.tools import *
+from .utils import get_fixture
+
+
+def test_transform():
+    expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.fb2')
+
+    text = WLDocument.from_file(
+            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+        ).as_fb2().get_bytes()
+
+    assert_equal(text, open(expected_output_file_path, 'rb').read())
+
diff --git a/tests/test_html.py b/tests/test_html.py

index a0de630..d77d8fe 100644 (file)
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -3,10 +3,12 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import NoDublinCore
  from librarian.parser import WLDocument
  from nose.tools import *
  from librarian import NoDublinCore
  from librarian.parser import WLDocument
  from nose.tools import *
-from utils import get_fixture
+from .utils import get_fixture
  
  
  def test_transform():
  
  
  def test_transform():
@@ -14,9 +16,9 @@ def test_transform():
  
      html = WLDocument.from_file(
              get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
  
      html = WLDocument.from_file(
              get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
-        ).as_html().get_string()
+        ).as_html().get_bytes()
  
  
-    assert_equal(html, file(expected_output_file_path).read())
+    assert_equal(html, open(expected_output_file_path, 'rb').read())
  
  
  @raises(NoDublinCore)
  
  
  @raises(NoDublinCore)
@@ -35,7 +37,7 @@ def test_passing_parse_dublincore_to_transform():
  
  
  def test_empty():
  
  
  def test_empty():
-    assert not WLDocument.from_string(
-            '<utwor />',
+    assert not WLDocument.from_bytes(
+            b'<utwor />',
              parse_dublincore=False,
          ).as_html()
              parse_dublincore=False,
          ).as_html()
diff --git a/tests/test_html_annotations.py b/tests/test_html_annotations.py

index 234f297..410577c 100644 (file)
--- a/tests/test_html_annotations.py
+++ b/tests/test_html_annotations.py
@@ -21,73 +21,73 @@ def test_annotations():
  
          ('<pe/>', (
              'pe',
  
          ('<pe/>', (
              'pe',
-            [], 
-            '',
-            '<p></p>'
+            [],
+            '[przypis edytorski]',
+            '<p> [przypis edytorski]</p>'
              ),
              'Empty footnote'),
  
          ('<pr>Definiendum --- definiens.</pr>', (
              'pr',
              ),
              'Empty footnote'),
  
          ('<pr>Definiendum --- definiens.</pr>', (
              'pr',
-            [], 
-            'Definiendum \u2014 definiens.', 
-            '<p>Definiendum \u2014 definiens.</p>'
+            [],
+            'Definiendum \u2014 definiens. [przypis redakcyjny]',
+            '<p>Definiendum \u2014 definiens. [przypis redakcyjny]</p>'
              ),
              'Plain footnote.'),
  
          ('<pt><slowo_obce>Definiendum</slowo_obce> --- definiens.</pt>', (
              'pt',
              ),
              'Plain footnote.'),
  
          ('<pt><slowo_obce>Definiendum</slowo_obce> --- definiens.</pt>', (
              'pt',
-            [], 
-            'Definiendum \u2014 definiens.', 
-            '<p><em class="foreign-word">Definiendum</em> \u2014 definiens.</p>'
+            [],
+            'Definiendum \u2014 definiens. [przypis tłumacza]',
+            '<p><em class="foreign-word">Definiendum</em> \u2014 definiens. [przypis tłumacza]</p>'
              ),
              'Standard footnote.'),
  
          ('<pr>Definiendum (łac.) --- definiens.</pr>', (
              'pr',
              ),
              'Standard footnote.'),
  
          ('<pr>Definiendum (łac.) --- definiens.</pr>', (
              'pr',
-            ['łac.'], 
-            'Definiendum (łac.) \u2014 definiens.', 
-            '<p>Definiendum (łac.) \u2014 definiens.</p>'
+            ['łac.'],
+            'Definiendum (łac.) \u2014 definiens. [przypis redakcyjny]',
+            '<p>Definiendum (łac.) \u2014 definiens. [przypis redakcyjny]</p>'
              ),
              'Plain footnote with qualifier'),
  
          ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- definiens.</pe>', (
              'pe',
              ),
              'Plain footnote with qualifier'),
  
          ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- definiens.</pe>', (
              'pe',
-            ['łac.'], 
-            'Definiendum (łac.) \u2014 definiens.', 
-            '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 definiens.</p>'
+            ['łac.'],
+            'Definiendum (łac.) \u2014 definiens. [przypis edytorski]',
+            '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 definiens. [przypis edytorski]</p>'
              ),
              'Standard footnote with qualifier.'),
  
          ('<pt> <slowo_obce>Definiendum</slowo_obce> (daw.) --- definiens.</pt>', (
              'pt',
              ),
              'Standard footnote with qualifier.'),
  
          ('<pt> <slowo_obce>Definiendum</slowo_obce> (daw.) --- definiens.</pt>', (
              'pt',
-            ['daw.'], 
-            'Definiendum (daw.) \u2014 definiens.', 
-            '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens.</p>'
+            ['daw.'],
+            'Definiendum (daw.) \u2014 definiens. [przypis tłumacza]',
+            '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens. [przypis tłumacza]</p>'
              ),
              'Standard footnote with leading whitespace and qualifier.'),
  
          ('<pr>Definiendum (łac.) --- <slowo_obce>definiens</slowo_obce>.</pr>', (
              'pr',
              ),
              'Standard footnote with leading whitespace and qualifier.'),
  
          ('<pr>Definiendum (łac.) --- <slowo_obce>definiens</slowo_obce>.</pr>', (
              'pr',
-            ['łac.'], 
-            'Definiendum (łac.) \u2014 definiens.', 
-            '<p>Definiendum (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>'
+            ['łac.'],
+            'Definiendum (łac.) \u2014 definiens. [przypis redakcyjny]',
+            '<p>Definiendum (łac.) \u2014 <em class="foreign-word">definiens</em>. [przypis redakcyjny]</p>'
              ),
              'Plain footnote with qualifier and some emphasis.'),
  
          ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- <slowo_obce>definiens</slowo_obce>.</pe>', (
              'pe',
              ['łac.'],
              ),
              'Plain footnote with qualifier and some emphasis.'),
  
          ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- <slowo_obce>definiens</slowo_obce>.</pe>', (
              'pe',
              ['łac.'],
-            'Definiendum (łac.) \u2014 definiens.',
-            '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>'
+            'Definiendum (łac.) \u2014 definiens. [przypis edytorski]',
+            '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 <em class="foreign-word">definiens</em>. [przypis edytorski]</p>'
              ),
              'Standard footnote with qualifier and some emphasis.'),
  
          ('<pe>Definiendum (łac.) --- definiens (some) --- more text.</pe>', (
              'pe',
              ['łac.'],
              ),
              'Standard footnote with qualifier and some emphasis.'),
  
          ('<pe>Definiendum (łac.) --- definiens (some) --- more text.</pe>', (
              'pe',
              ['łac.'],
-            'Definiendum (łac.) \u2014 definiens (some) \u2014 more text.',
-            '<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text.</p>',
+            'Definiendum (łac.) \u2014 definiens (some) \u2014 more text. [przypis edytorski]',
+            '<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text. [przypis edytorski]</p>',
              ),
              'Footnote with a second parentheses and mdash.'),
  
              ),
              'Footnote with a second parentheses and mdash.'),
  
@@ -96,9 +96,9 @@ def test_annotations():
              'pe',
              ['daw.', 'niem.'],
              'gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, '
              'pe',
              ['daw.', 'niem.'],
              'gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, '
-            'szeregowiec w wojsku polskim cudzoziemskiego autoramentu.',
+            'szeregowiec w wojsku polskim cudzoziemskiego autoramentu. [przypis edytorski]',
              '<p><em class="foreign-word">gemajna</em> (daw., z niem. <em class="foreign-word">gemein</em>: zwykły) '
              '<p><em class="foreign-word">gemajna</em> (daw., z niem. <em class="foreign-word">gemein</em>: zwykły) '
-            '\u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</p>'
+            '\u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu. [przypis edytorski]</p>'
              ),
              'Footnote with multiple and qualifiers and emphasis.'),
  
              ),
              'Footnote with multiple and qualifiers and emphasis.'),
  
@@ -106,7 +106,9 @@ def test_annotations():
  
      xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join(
          t[0] for t in annotations)
  
      xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join(
          t[0] for t in annotations)
-    html = WLDocument.from_string(xml_src, parse_dublincore=False).as_html().get_file()
+    html = WLDocument.from_bytes(
+        xml_src.encode('utf-8'),
+        parse_dublincore=False).as_html().get_file()
      res_annotations = list(extract_annotations(html))
  
      for i, (src, expected, name) in enumerate(annotations):
      res_annotations = list(extract_annotations(html))
  
      for i, (src, expected, name) in enumerate(annotations):
diff --git a/tests/test_html_fragments.py b/tests/test_html_fragments.py

index 3e87a9e..16057bc 100644 (file)
--- a/tests/test_html_fragments.py
+++ b/tests/test_html_fragments.py
@@ -3,9 +3,11 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.html import extract_fragments
  from nose.tools import *
  from librarian.html import extract_fragments
  from nose.tools import *
-from utils import get_fixture
+from .utils import get_fixture
  
  
  def test_fragments():
  
  
  def test_fragments():
@@ -14,5 +16,5 @@ def test_fragments():
      closed_fragments, open_fragments = extract_fragments(
          get_fixture('text', 'asnyk_miedzy_nami_expected.html'))
      assert not open_fragments
      closed_fragments, open_fragments = extract_fragments(
          get_fixture('text', 'asnyk_miedzy_nami_expected.html'))
      assert not open_fragments
-    fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f) for f in closed_fragments.values())
-    assert_equal(fragments_text, file(expected_output_file_path).read().decode('utf-8'))
+    fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f) for f in sorted(closed_fragments.values(), key=lambda f: f.id))
+    assert_equal(fragments_text, open(expected_output_file_path, 'rb').read().decode('utf-8'))
diff --git a/tests/test_mobi.py b/tests/test_mobi.py

new file mode 100644 (file)

index 0000000..3b29e72
--- /dev/null
+++ b/tests/test_mobi.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from __future__ import unicode_literals
+
+from zipfile import ZipFile
+from lxml import html
+from nose.tools import *
+from librarian import DirDocProvider
+from librarian.parser import WLDocument
+from tests.utils import get_fixture
+
+
+def test_transform():
+    mobi = WLDocument.from_file(
+            get_fixture('text', 'asnyk_zbior.xml'),
+            provider=DirDocProvider(get_fixture('text', ''))
+        ).as_mobi(converter_path='true').get_file()
diff --git a/tests/test_pdf.py b/tests/test_pdf.py

index 5b2dba1..98d1fa6 100644 (file)
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -3,11 +3,14 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
+import re
  from tempfile import NamedTemporaryFile
  from nose.tools import *
  from librarian import DirDocProvider
  from librarian.parser import WLDocument
  from tempfile import NamedTemporaryFile
  from nose.tools import *
  from librarian import DirDocProvider
  from librarian.parser import WLDocument
-from utils import get_fixture
+from .utils import get_fixture
  
  
  def test_transform():
  
  
  def test_transform():
@@ -17,9 +20,8 @@ def test_transform():
              get_fixture('text', 'asnyk_zbior.xml'),
              provider=DirDocProvider(get_fixture('text', ''))
          ).as_pdf(save_tex=temp.name)
              get_fixture('text', 'asnyk_zbior.xml'),
              provider=DirDocProvider(get_fixture('text', ''))
          ).as_pdf(save_tex=temp.name)
-    tex = open(temp.name).read().decode('utf-8')
-    print tex
+    tex = open(temp.name, 'rb').read().decode('utf-8')
  
      # Check contributor list.
  
      # Check contributor list.
-    editors = re.search(ur'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
+    editors = re.search(r'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
      assert_equal(editors.group(1), u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
      assert_equal(editors.group(1), u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
diff --git a/tests/test_picture.py b/tests/test_picture.py

index 00b03ce..f97609b 100644 (file)
--- a/tests/test_picture.py
+++ b/tests/test_picture.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import picture, dcparser
  from tests.utils import get_all_fixtures, get_fixture
  from os import path
  from librarian import picture, dcparser
  from tests.utils import get_all_fixtures, get_fixture
  from os import path
@@ -46,7 +48,6 @@ def test_picture_parts():
      motifs = set()
      names = set()
  
      motifs = set()
      names = set()
  
-    print parts
      for p in parts:
          for m in p['themes']:
              motifs.add(m)
      for p in parts:
          for m in p['themes']:
              motifs.add(m)
diff --git a/tests/test_text.py b/tests/test_text.py

index 70dfb60..14c728f 100644 (file)
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -3,10 +3,12 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import NoDublinCore
  from librarian.parser import WLDocument
  from nose.tools import *
  from librarian import NoDublinCore
  from librarian.parser import WLDocument
  from nose.tools import *
-from utils import get_fixture
+from .utils import get_fixture
  
  
  def test_transform():
  
  
  def test_transform():
@@ -14,9 +16,19 @@ def test_transform():
  
      text = WLDocument.from_file(
              get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
  
      text = WLDocument.from_file(
              get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
-        ).as_text().get_string()
+        ).as_text().get_bytes()
+
+    assert_equal(text, open(expected_output_file_path, 'rb').read())
+
+
+def test_transform_raw():
+    expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected_raw.txt')
+
+    text = WLDocument.from_file(
+            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+        ).as_text(flags=['raw-text']).get_bytes()
  
  
-    assert_equal(text, file(expected_output_file_path).read())
+    assert_equal(text, open(expected_output_file_path, 'rb').read())
  
  
  @raises(NoDublinCore)
  
  
  @raises(NoDublinCore)
diff --git a/tests/utils.py b/tests/utils.py

index fc87532..7da206c 100644 (file)
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -3,7 +3,6 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from __future__ import with_statement
  from os.path import realpath, join, dirname
  import glob
  
  from os.path import realpath, join, dirname
  import glob
  
diff --git a/tox.ini b/tox.ini

new file mode 100644 (file)

index 0000000..5b28a3b
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,28 @@
+[tox]
+envlist =
+    clean,
+    py{27,34,35,36,37},
+    stats
+
+[testenv]
+deps =
+    nose
+    coverage
+passenv = HOME  ; Needed to find locally installed fonts when testing PDF production.
+commands =
+    nosetests --with-coverage --cover-package=librarian -d --with-doctest --with-xunit --exe
+install_command = pip install --extra-index-url https://py.mdrn.pl/simple {packages}
+
+[testenv:clean]
+basepython = python2
+commands =
+    coverage erase
+deps = coverage
+
+[testenv:stats]
+basepython = python2
+commands =
+    coverage report
+    coverage html
+deps = coverage
+
author	Radek Czajka <rczajka@rczajka.pl>
	Wed, 27 Feb 2019 09:13:41 +0000 (10:13 +0100)
committer	Radek Czajka <rczajka@rczajka.pl>
	Wed, 27 Feb 2019 19:35:56 +0000 (20:35 +0100)
.gitignore		patch \| blob \| history
AUTHORS.md		patch \| blob \| history
CHANGELOG.md	[new file with mode: 0644]	patch \| blob
MANIFEST.in	[new file with mode: 0644]	patch \| blob
README.md		patch \| blob \| history
librarian/__init__.py		patch \| blob \| history
librarian/book2anything.py		patch \| blob \| history
librarian/cover.py		patch \| blob \| history
librarian/dcparser.py		patch \| blob \| history
librarian/embeds/__init__.py		patch \| blob \| history
librarian/embeds/latex.py		patch \| blob \| history
librarian/embeds/mathml.py		patch \| blob \| history
librarian/epub.py		patch \| blob \| history
librarian/fb2.py		patch \| blob \| history
librarian/functions.py		patch \| blob \| history
librarian/html.py		patch \| blob \| history
librarian/hyphenator.py		patch \| blob \| history
librarian/mobi.py		patch \| blob \| history
librarian/packagers.py		patch \| blob \| history
librarian/parser.py		patch \| blob \| history
librarian/partners.py		patch \| blob \| history
librarian/pdf.py		patch \| blob \| history
librarian/picture.py		patch \| blob \| history
librarian/sponsor.py		patch \| blob \| history
librarian/text.py		patch \| blob \| history
librarian/util.py		patch \| blob \| history
scripts/book2cover		patch \| blob \| history
scripts/book2epub		patch \| blob \| history
scripts/book2fb2		patch \| blob \| history
scripts/book2html		patch \| blob \| history
scripts/book2mobi		patch \| blob \| history
scripts/book2partner		patch \| blob \| history
scripts/book2pdf		patch \| blob \| history
scripts/book2txt		patch \| blob \| history
scripts/bookfragments		patch \| blob \| history
scripts/fn_qualifiers_list_from_redmine.py	[changed mode: 0644->0755]	patch \| blob \| history
scripts/genslugs		patch \| blob \| history
setup.py		patch \| blob \| history
tests/files/dcparser/andersen_brzydkie_kaczatko.out		patch \| blob \| history
tests/files/dcparser/biedrzycki_akslop.out		patch \| blob \| history
tests/files/dcparser/kochanowski_piesn7.out		patch \| blob \| history
tests/files/dcparser/mickiewicz_rybka.out		patch \| blob \| history
tests/files/dcparser/sofokles_antygona.out		patch \| blob \| history
tests/files/text/asnyk_miedzy_nami_expected.fb2	[new file with mode: 0644]	patch \| blob
tests/files/text/asnyk_miedzy_nami_expected.txt		patch \| blob \| history
tests/files/text/asnyk_miedzy_nami_expected_raw.txt	[new file with mode: 0644]	patch \| blob
tests/test_dcparser.py		patch \| blob \| history
tests/test_epub.py		patch \| blob \| history
tests/test_fb2.py	[new file with mode: 0644]	patch \| blob
tests/test_html.py		patch \| blob \| history
tests/test_html_annotations.py		patch \| blob \| history
tests/test_html_fragments.py		patch \| blob \| history
tests/test_mobi.py	[new file with mode: 0644]	patch \| blob
tests/test_pdf.py		patch \| blob \| history
tests/test_picture.py		patch \| blob \| history
tests/test_text.py		patch \| blob \| history
tests/utils.py		patch \| blob \| history
tox.ini	[new file with mode: 0644]	patch \| blob