Python 3.4-3.7 support;

author Radek Czajka <rczajka@rczajka.pl>

Wed, 27 Feb 2019 09:13:41 +0000 (10:13 +0100)

committer Radek Czajka <rczajka@rczajka.pl>

Wed, 27 Feb 2019 19:35:56 +0000 (20:35 +0100)
author Radek Czajka <rczajka@rczajka.pl>
Wed, 27 Feb 2019 09:13:41 +0000 (10:13 +0100)
committer Radek Czajka <rczajka@rczajka.pl>
Wed, 27 Feb 2019 19:35:56 +0000 (20:35 +0100)
diff --git a/.gitignore b/.gitignore

index b6c0f8a..0660acf 100755 (executable)
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ build
  .project
  .pydevproject
  .settings
+/.tox
+/nosetests.xml
+/htmlcov
diff --git a/AUTHORS.md b/AUTHORS.md

index 70fe140..2eab59f 100644 (file)
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -1,9 +1,17 @@
  Authors
  -------
  
-Originally written by Marek Stępniowski <marek@stepniowski>
-       
-Later contributions:
+List of people who have contributed to the project, in chronological order:
+
+* Marek Stępniowski
+* Łukasz Rekucki
+* Radek Czajka
+* Łukasz Anwajler
+* Adam Twardoch
+* Marcin Koziej
+* Michał Górny
+* Aleksander Łukasz
+* Robert Błaut
+* Jan Szejko
+
  
-   * Łukasz Rekucki <lrekucki@gmail.com>
-   * Radek Czajka <radek.czajka@gmail.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md

new file mode 100644 (file)

index 0000000..dbc3209
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,20 @@
+# Change Log
+
+This document records all notable changes to Librarian.
+
+## 1.7 (2019-02-27)
+
+### Added
+- Python 3.4+ support, to existing Python 2.7 support.
+- `coverter_path` argument in `mobi.transform`.
+- Proper packaging info.
+- This changelog.
+- Tox configuration for tests.
+
+### Changed
+- `from_bytes` methods replaced all `from_string` methods,
+   i.e. on: OutputFile, WorkInfo, BookInfo, WLDocument, WLPicture.
+- `get_bytes` replaced `get_string` on OutputFile.
+
+### Removed
+- Shims for Python < 2.7.
diff --git a/MANIFEST.in b/MANIFEST.in

new file mode 100644 (file)

index 0000000..af6efac
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,15 @@
+include *.md
+include LICENSE
+include NOTICE
+include tox.ini
+recursive-include scripts *.py *.css
+recursive-include tests *.py *.xml *.html *.out *.txt *.jpeg
+include librarian/xslt/*.xslt
+include librarian/xslt/*.xml
+include librarian/epub/*
+include librarian/pdf/*
+include librarian/fb2/*
+include librarian/fonts/*
+graft librarian/res
+graft librarian/font-optimizer
+
diff --git a/README.md b/README.md

index c0e13e9..dea2381 100644 (file)
--- a/README.md
+++ b/README.md
@@ -3,9 +3,9 @@ License
  
    ![AGPL Logo](http://www.gnu.org/graphics/agplv3-155x51.png)
  
-    Copyright © 2008,2009,2010 Fundacja Nowoczesna Polska <fundacja@nowoczesnapolska.org.pl>
+    Copyright © 2008-2019 Fundacja Nowoczesna Polska <fundacja@nowoczesnapolska.org.pl>
  
-    For full list of contributors see AUTHORS section at the end.
+    For full list of contributors see AUTHORS file.
  
      This program is free software: you can redistribute it and/or modify
      it under the terms of the GNU Affero General Public License as published by
@@ -29,10 +29,12 @@ other formats, which are more suitable for presentation.
  
  Currently we support:
  
- * HTML4, XHTML 1.0
+ * HTML4, XHTML 1.0 (?)
   * Plain text
   * EPUB (XHTML based)
+ * MOBI
   * print-ready PDF
+ * FB2
  
  Other features:
  
@@ -84,13 +86,3 @@ To convert a file to PDF:
  To extract book fragments marked as "theme":
  
      bookfragments file1.xml [file2.xml ...]
-
-
-Authors
--------
-Originally written by Marek Stępniowski <marek@stepniowski.com>
-       
-Later contributions:
-
- * Łukasz Rekucki <lrekucki@gmail.com>
- * Radek Czajka <radek.czajka@gmail.com>
-\ No newline at end of file
diff --git a/librarian/__init__.py b/librarian/__init__.py

index 9a9e23e..119b6b1 100644 (file)
--- a/librarian/__init__.py
+++ b/librarian/__init__.py
@@ -3,28 +3,28 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from __future__ import with_statement
+from __future__ import print_function, unicode_literals
  
  import os
  import re
  import shutil
+from tempfile import NamedTemporaryFile
  import urllib
-
-from util import makedirs
+from lxml import etree
+import six
+from six.moves.urllib.request import FancyURLopener
+from .util import makedirs
  
  
+@six.python_2_unicode_compatible
  class UnicodeException(Exception):
      def __str__(self):
-        """ Dirty workaround for Python Unicode handling problems. """
-        return unicode(self).encode('utf-8')
-
-    def __unicode__(self):
          """ Dirty workaround for Python Unicode handling problems. """
          args = self.args[0] if len(self.args) == 1 else self.args
          try:
-            message = unicode(args)
+            message = six.text_type(args)
          except UnicodeDecodeError:
-            message = unicode(args, encoding='utf-8', errors='ignore')
+            message = six.text_type(args, encoding='utf-8', errors='ignore')
          return message
  
  class ParseError(UnicodeException):
@@ -79,6 +79,7 @@ PLMETNS = XMLNamespace("http://dl.psnc.pl/schemas/plmet/")
  WLNS = EmptyNamespace()
  
  
+@six.python_2_unicode_compatible
  class WLURI(object):
      """Represents a WL URI. Extracts slug from it."""
      slug = None
@@ -88,7 +89,7 @@ class WLURI(object):
              '(?P<slug>[-a-z0-9]+)/?$')
  
      def __init__(self, uri):
-        uri = unicode(uri)
+        uri = six.text_type(uri)
          self.uri = uri
          self.slug = uri.rstrip('/').rsplit('/', 1)[-1]
  
@@ -104,16 +105,13 @@ class WLURI(object):
      def from_slug(cls, slug):
          """Contructs an URI from slug.
  
-        >>> WLURI.from_slug('a-slug').uri
-        u'http://wolnelektury.pl/katalog/lektura/a-slug/'
+        >>> print(WLURI.from_slug('a-slug').uri)
+        http://wolnelektury.pl/katalog/lektura/a-slug/
  
          """
          uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
          return cls(uri)
  
-    def __unicode__(self):
-        return self.uri
-
      def __str__(self):
          return self.uri
  
@@ -146,11 +144,10 @@ class DirDocProvider(DocProvider):
  
      def by_slug(self, slug):
          fname = slug + '.xml'
-        return open(os.path.join(self.dir, fname))
+        return open(os.path.join(self.dir, fname), 'rb')
  
  
-import lxml.etree as etree
-import dcparser
+from . import dcparser
  
  DEFAULT_BOOKINFO = dcparser.BookInfo(
          { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
@@ -175,14 +172,14 @@ DEFAULT_BOOKINFO = dcparser.BookInfo(
  def xinclude_forURI(uri):
      e = etree.Element(XINS("include"))
      e.set("href", uri)
-    return etree.tostring(e, encoding=unicode)
+    return etree.tostring(e, encoding='unicode')
  
  def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
      """Wrap the text within the minimal XML structure with a DC template."""
      bookinfo.created_at = creation_date
  
      dcstring = etree.tostring(bookinfo.to_etree(), \
-        method='xml', encoding=unicode, pretty_print=True)
+        method='xml', encoding='unicode', pretty_print=True)
  
      return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
          u'\n</plain-text>\n</utwor>'
@@ -192,7 +189,7 @@ def serialize_raw(element):
      b = u'' + (element.text or '')
  
      for child in element.iterchildren():
-        e = etree.tostring(child, method='xml', encoding=unicode,
+        e = etree.tostring(child, method='xml', encoding='unicode',
                  pretty_print=True)
          b += e
  
@@ -212,7 +209,7 @@ def get_resource(path):
  class OutputFile(object):
      """Represents a file returned by one of the converters."""
  
-    _string = None
+    _bytes = None
      _filename = None
  
      def __del__(self):
@@ -220,14 +217,14 @@ class OutputFile(object):
              os.unlink(self._filename)
  
      def __nonzero__(self):
-        return self._string is not None or self._filename is not None
+        return self._bytes is not None or self._filename is not None
  
      @classmethod
-    def from_string(cls, string):
+    def from_bytes(cls, bytestring):
          """Converter returns contents of a file as a string."""
  
          instance = cls()
-        instance._string = string
+        instance._bytes = bytestring
          return instance
  
      @classmethod
@@ -238,33 +235,31 @@ class OutputFile(object):
          instance._filename = filename
          return instance
  
-    def get_string(self):
-        """Get file's contents as a string."""
+    def get_bytes(self):
+        """Get file's contents as a bytestring."""
  
          if self._filename is not None:
-            with open(self._filename) as f:
+            with open(self._filename, 'rb') as f:
                  return f.read()
          else:
-            return self._string
+            return self._bytes
  
      def get_file(self):
          """Get file as a file-like object."""
  
-        if self._string is not None:
-            from StringIO import StringIO
-            return StringIO(self._string)
+        if self._bytes is not None:
+            return six.BytesIO(self._bytes)
          elif self._filename is not None:
-            return open(self._filename)
+            return open(self._filename, 'rb')
  
      def get_filename(self):
          """Get file as a fs path."""
  
          if self._filename is not None:
              return self._filename
-        elif self._string is not None:
-            from tempfile import NamedTemporaryFile
+        elif self._bytes is not None:
              temp = NamedTemporaryFile(prefix='librarian-', delete=False)
-            temp.write(self._string)
+            temp.write(self._bytes)
              temp.close()
              self._filename = temp.name
              return self._filename
@@ -279,6 +274,6 @@ class OutputFile(object):
          shutil.copy(self.get_filename(), path)
  
  
-class URLOpener(urllib.FancyURLopener):
+class URLOpener(FancyURLopener):
      version = 'FNP Librarian (http://github.com/fnp/librarian)'
  urllib._urlopener = URLOpener()
diff --git a/librarian/book2anything.py b/librarian/book2anything.py

index 0da3b61..948d9fd 100755 (executable)
--- a/librarian/book2anything.py
+++ b/librarian/book2anything.py
@@ -4,9 +4,11 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os.path
  import optparse
-
+import six
  from librarian import DirDocProvider, ParseError
  from librarian.parser import WLDocument
  from librarian.cover import make_cover
@@ -102,7 +104,10 @@ class Book2Anything(object):
          try:
              for main_input in input_filenames:
                  if options.verbose:
-                    print main_input
+                    print(main_input)
+
+            if isinstance(main_input, six.binary_type):
+                main_input = main_input.decode('utf-8')
  
              # Where to find input?
              if cls.uses_provider:
@@ -126,9 +131,9 @@ class Book2Anything(object):
  
              doc.save_output_file(output, output_file, options.output_dir, options.make_dir, cls.ext)
  
-        except ParseError, e:
-            print '%(file)s:%(name)s:%(message)s' % {
+        except ParseError as e:
+            print('%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
                  'name': e.__class__.__name__,
                  'message': e
-            }
+            })
diff --git a/librarian/cover.py b/librarian/cover.py

index 29e24c8..09c8071 100644 (file)
--- a/librarian/cover.py
+++ b/librarian/cover.py
@@ -3,9 +3,11 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  import re
  from PIL import Image, ImageFont, ImageDraw, ImageFilter
-from StringIO import StringIO
+from six import BytesIO
  from librarian import get_resource, OutputFile, URLOpener
  
  
@@ -69,7 +71,7 @@ class TextBox(object):
                  line_width = self.draw.textsize(line, font=font)[0]
              line = line.strip() + ' '
  
-            pos_x = (self.max_width - line_width) / 2
+            pos_x = (self.max_width - line_width) // 2
  
              if shadow_color:
                  self.shadow_draw.text(
@@ -144,7 +146,7 @@ class Cover(object):
          if format is not None:
              self.format = format
          if width and height:
-            self.height = height * self.width / width
+            self.height = int(round(height * self.width / width))
          scale = max(float(width or 0) / self.width, float(height or 0) / self.height)
          if scale >= 1:
              self.scale = scale
@@ -171,8 +173,8 @@ class Cover(object):
          # WL logo
          if metr.logo_width:
              logo = Image.open(get_resource('res/wl-logo.png'))
-            logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]))
-            img.paste(logo, ((metr.width - metr.logo_width) / 2, img.size[1] - logo.size[1] - metr.logo_bottom))
+            logo = logo.resize((metr.logo_width, int(round(logo.size[1] * metr.logo_width / logo.size[0]))))
+            img.paste(logo, ((metr.width - metr.logo_width) // 2, img.size[1] - logo.size[1] - metr.logo_bottom))
  
          top = metr.author_top
          tbox = TextBox(
@@ -223,9 +225,9 @@ class Cover(object):
          return self.final_image().save(*args, **default_kwargs)
  
      def output_file(self, *args, **kwargs):
-        imgstr = StringIO()
+        imgstr = BytesIO()
          self.save(imgstr, *args, **kwargs)
-        return OutputFile.from_string(imgstr.getvalue())
+        return OutputFile.from_bytes(imgstr.getvalue())
  
  
  class WLCover(Cover):
@@ -347,9 +349,9 @@ class WLCover(Cover):
          elif self.box_position == 'bottom':
              box_top = metr.height - metr.box_bottom_margin - box_img.size[1]
          else:   # Middle.
-            box_top = (metr.height - box_img.size[1]) / 2
+            box_top = (metr.height - box_img.size[1]) // 2
  
-        box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) / 2
+        box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) // 2
  
          # Draw the white box.
          ImageDraw.Draw(img).rectangle(
@@ -389,17 +391,17 @@ class WLCover(Cover):
              if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]:
                  resized = (
                      trg_size[0],
-                    src.size[1] * trg_size[0] / src.size[0]
+                    int(round(src.size[1] * trg_size[0] / src.size[0]))
                  )
-                cut = (resized[1] - trg_size[1]) / 2
+                cut = (resized[1] - trg_size[1]) // 2
                  src = src.resize(resized, Image.ANTIALIAS)
                  src = src.crop((0, cut, src.size[0], src.size[1] - cut))
              else:
                  resized = (
-                    src.size[0] * trg_size[1] / src.size[1],
+                    int(round(src.size[0] * trg_size[1] / src.size[1])),
                      trg_size[1],
                  )
-                cut = (resized[0] - trg_size[0]) / 2
+                cut = (resized[0] - trg_size[0]) // 2
                  src = src.resize(resized, Image.ANTIALIAS)
                  src = src.crop((cut, 0, src.size[0] - cut, src.size[1]))
  
@@ -448,11 +450,11 @@ class LogoWLCover(WLCover):
          img.paste(gradient, (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask)
  
          cursor = metr.width - metr.gradient_logo_margin_right
-        logo_top = metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 - metr.bleed / 2
+        logo_top = int(metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 - metr.bleed / 2)
          for logo_path in self.gradient_logos[::-1]:
              logo = Image.open(get_resource(logo_path))
              logo = logo.resize(
-                (logo.size[0] * metr.gradient_logo_height / logo.size[1], metr.gradient_logo_height),
+                (int(round(logo.size[0] * metr.gradient_logo_height / logo.size[1])), metr.gradient_logo_height),
                  Image.ANTIALIAS)
              cursor -= logo.size[0]
              img.paste(logo, (cursor, logo_top), mask=logo)
diff --git a/librarian/dcparser.py b/librarian/dcparser.py

index f8dfaf9..eeb750a 100644 (file)
--- a/librarian/dcparser.py
+++ b/librarian/dcparser.py
@@ -3,10 +3,14 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from xml.parsers.expat import ExpatError
  from datetime import date
+from functools import total_ordering
  import time
  import re
+import six
  from librarian.util import roman_to_int
  
  from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
@@ -16,7 +20,7 @@ import lxml.etree as etree  # ElementTree API using libxml2
  from lxml.etree import XMLSyntaxError
  
  
-class TextPlus(unicode):
+class TextPlus(six.text_type):
      pass
  
  
@@ -27,6 +31,8 @@ class DatePlus(date):
  # ==============
  # = Converters =
  # ==============
+@six.python_2_unicode_compatible
+@total_ordering
  class Person(object):
      """Single person with last name and a list of first names."""
      def __init__(self, last_name, *first_names):
@@ -55,13 +61,13 @@ class Person(object):
      def __eq__(self, right):
          return self.last_name == right.last_name and self.first_names == right.first_names
  
-    def __cmp__(self, other):
-        return cmp((self.last_name, self.first_names), (other.last_name, other.first_names))
+    def __lt__(self, other):
+        return (self.last_name, self.first_names) < (other.last_name, other.first_names)
  
      def __hash__(self):
          return hash((self.last_name, self.first_names))
  
-    def __unicode__(self):
+    def __str__(self):
          if len(self.first_names) > 0:
              return '%s, %s' % (self.last_name, ' '.join(self.first_names))
          else:
@@ -83,7 +89,7 @@ for now we will translate this to some single date losing information of course.
      """
      try:
          # check out the "N. poł X w." syntax
-        if isinstance(text, str):
+        if isinstance(text, six.binary_type):
              text = text.decode("utf-8")
  
          century_format = u"(?:([12]) *poł[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?"
@@ -94,7 +100,7 @@ for now we will translate this to some single date losing information of course.
          if m:
              half = m.group(1)
              decade = m.group(3)
-            century = roman_to_int(str(m.group(2)))
+            century = roman_to_int(m.group(2))
              if half is not None:
                  if decade is not None:
                      raise ValueError("Bad date format. Cannot specify both half and decade of century")
@@ -114,7 +120,7 @@ for now we will translate this to some single date losing information of course.
              raise ValueError
  
          return DatePlus(t[0], t[1], t[2])
-    except ValueError, e:
+    except ValueError as e:
          raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
  
  
@@ -123,7 +129,7 @@ def as_person(text):
  
  
  def as_unicode(text):
-    if isinstance(text, unicode):
+    if isinstance(text, six.text_type):
          return text
      else:
          return TextPlus(text.decode('utf-8'))
@@ -174,7 +180,7 @@ class Field(object):
                  if hasattr(val[0], 'lang'):
                      setattr(nv, 'lang', val[0].lang)
                  return nv
-        except ValueError, e:
+        except ValueError as e:
              raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
  
      def validate(self, fdict, fallbacks=None, strict=False):
@@ -221,9 +227,7 @@ class DCInfo(type):
          return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict)
  
  
-class WorkInfo(object):
-    __metaclass__ = DCInfo
-
+class WorkInfo(six.with_metaclass(DCInfo, object)):
      FIELDS = (
          Field(DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
          Field(DCNS('title'), 'title'),
@@ -255,9 +259,8 @@ class WorkInfo(object):
      )
  
      @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        from StringIO import StringIO
-        return cls.from_file(StringIO(xml), *args, **kwargs)
+    def from_bytes(cls, xml, *args, **kwargs):
+        return cls.from_file(six.BytesIO(xml), *args, **kwargs)
  
      @classmethod
      def from_file(cls, xmlfile, *args, **kwargs):
@@ -282,9 +285,9 @@ class WorkInfo(object):
  
              # extract data from the element and make the info
              return cls.from_element(desc_tag, *args, **kwargs)
-        except XMLSyntaxError, e:
+        except XMLSyntaxError as e:
              raise ParseError(e)
-        except ExpatError, e:
+        except ExpatError as e:
              raise ParseError(e)
  
      @classmethod
@@ -306,7 +309,7 @@ class WorkInfo(object):
              fv = field_dict.get(e.tag, [])
              if e.text is not None:
                  text = e.text
-                if not isinstance(text, unicode):
+                if not isinstance(text, six.text_type):
                      text = text.decode('utf-8')
                  val = TextPlus(text)
                  val.lang = e.attrib.get(XMLNS('lang'), lang)
@@ -394,11 +397,11 @@ class WorkInfo(object):
                      for x in v:
                          e = etree.Element(field.uri)
                          if x is not None:
-                            e.text = unicode(x)
+                            e.text = six.text_type(x)
                          description.append(e)
                  else:
                      e = etree.Element(field.uri)
-                    e.text = unicode(v)
+                    e.text = six.text_type(v)
                      description.append(e)
  
          return root
@@ -413,9 +416,9 @@ class WorkInfo(object):
                  if field.multiple:
                      if len(v) == 0:
                          continue
-                    v = [unicode(x) for x in v if x is not None]
+                    v = [six.text_type(x) for x in v if x is not None]
                  else:
-                    v = unicode(v)
+                    v = six.text_type(v)
  
                  dc[field.name] = {'uri': field.uri, 'value': v}
          rdf['fields'] = dc
@@ -430,15 +433,15 @@ class WorkInfo(object):
                  if field.multiple:
                      if len(v) == 0:
                          continue
-                    v = [unicode(x) for x in v if x is not None]
+                    v = [six.text_type(x) for x in v if x is not None]
                  else:
-                    v = unicode(v)
+                    v = six.text_type(v)
                  result[field.name] = v
  
              if field.salias:
                  v = getattr(self, field.salias)
                  if v is not None:
-                    result[field.salias] = unicode(v)
+                    result[field.salias] = six.text_type(v)
  
          return result
  
diff --git a/librarian/embeds/__init__.py b/librarian/embeds/__init__.py

index 3b1abdb..fa74530 100644 (file)
--- a/librarian/embeds/__init__.py
+++ b/librarian/embeds/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import importlib
  from lxml import etree
  
diff --git a/librarian/embeds/latex.py b/librarian/embeds/latex.py

index 0201d08..8425d03 100644 (file)
--- a/librarian/embeds/latex.py
+++ b/librarian/embeds/latex.py
@@ -1,4 +1,6 @@
  # -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
  import os
  import shutil
  from subprocess import call, PIPE
@@ -10,14 +12,14 @@ from . import DataEmbed, create_embed, downgrades_to
  class LaTeX(DataEmbed):
      @downgrades_to('image/png')
      def to_png(self):
-        tmpl = open(get_resource('res/embeds/latex/template.tex')).read().decode('utf-8')
+        tmpl = open(get_resource('res/embeds/latex/template.tex'), 'rb').read().decode('utf-8')
          tempdir = mkdtemp('-librarian-embed-latex')
          fpath = os.path.join(tempdir, 'doc.tex')
-        with open(fpath, 'w') as f:
+        with open(fpath, 'wb') as f:
              f.write((tmpl % {'code': self.data}).encode('utf-8'))
          call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
          call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim',
               os.path.join(tempdir, 'doc.png')])
-        pngdata = open(os.path.join(tempdir, 'doc.png')).read()
+        pngdata = open(os.path.join(tempdir, 'doc.png'), 'rb').read()
          shutil.rmtree(tempdir)
          return create_embed('image/png', data=pngdata)
diff --git a/librarian/embeds/mathml.py b/librarian/embeds/mathml.py

index dd78f05..bd58baf 100644 (file)
--- a/librarian/embeds/mathml.py
+++ b/librarian/embeds/mathml.py
@@ -1,5 +1,8 @@
  # -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
  from lxml import etree
+import six
  from librarian import get_resource
  from . import TreeEmbed, create_embed, downgrades_to
  
@@ -9,4 +12,4 @@ class MathML(TreeEmbed):
      def to_latex(self):
          xslt = etree.parse(get_resource('res/embeds/mathml/mathml2latex.xslt'))
          output = self.tree.xslt(xslt)
-        return create_embed('application/x-latex', data=unicode(output))
+        return create_embed('application/x-latex', data=six.text_type(output))
diff --git a/librarian/epub.py b/librarian/epub.py

index 333b56f..e9670d5 100644 (file)
--- a/librarian/epub.py
+++ b/librarian/epub.py
@@ -3,13 +3,13 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from __future__ import with_statement
+from __future__ import print_function, unicode_literals
  
  import os
  import os.path
  import re
  import subprocess
-from StringIO import StringIO
+from six import BytesIO
  from copy import deepcopy
  from mimetypes import guess_type
  
@@ -30,7 +30,7 @@ functions.reg_lang_code_3to2()
  
  
  def squeeze_whitespace(s):
-    return re.sub(r'\s+', ' ', s)
+    return re.sub(b'\\s+', b' ', s)
  
  
  def set_hyph_language(source_tree):
@@ -38,7 +38,7 @@ def set_hyph_language(source_tree):
          result = ''
          text = ''.join(text)
          with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
-            for line in f:
+            for line in f.read().decode('latin1').split('\n'):
                  list = line.strip().split('|')
                  if list[0] == text:
                      result = list[2]
@@ -77,12 +77,12 @@ def hyphenate_and_fix_conjunctions(source_tree, hyph):
  def inner_xml(node):
      """ returns node's text and children as a string
  
-    >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
+    >>> print(inner_xml(etree.fromstring('<a>x<b>y</b>z</a>')))
      x<b>y</b>z
      """
  
      nt = node.text if node.text is not None else ''
-    return ''.join([nt] + [etree.tostring(child) for child in node])
+    return ''.join([nt] + [etree.tostring(child, encoding='unicode') for child in node])
  
  
  def set_inner_xml(node, text):
@@ -90,7 +90,7 @@ def set_inner_xml(node, text):
  
      >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
      >>> set_inner_xml(e, 'x<b>y</b>z')
-    >>> print etree.tostring(e)
+    >>> print(etree.tostring(e, encoding='unicode'))
      <a>x<b>y</b>z</a>
      """
  
@@ -102,7 +102,7 @@ def set_inner_xml(node, text):
  def node_name(node):
      """ Find out a node's name
  
-    >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
+    >>> print(node_name(etree.fromstring('<a>X<b>Y</b>Z</a>')))
      XYZ
      """
  
@@ -122,7 +122,7 @@ def xslt(xml, sheet, **kwargs):
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
          transform = etree.XSLT(etree.parse(xsltf))
-        params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
+        params = dict((key, transform.strparam(value)) for key, value in kwargs.items())
          return transform(xml, **params)
  
  
@@ -172,8 +172,8 @@ class Stanza(object):
  
      >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
      >>> Stanza(s).versify()
-    >>> print etree.tostring(s)
-    <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
+    >>> print(etree.tostring(s, encoding='unicode'))
+    <strofa><wers_normalny>a <b>c</b><b>c</b></wers_normalny><wers_normalny>b<x>x/
      y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
  
      """
@@ -325,8 +325,8 @@ class TOC(object):
          return "\n".join(texts)
  
      def html(self):
-        with open(get_resource('epub/toc.html')) as f:
-            t = unicode(f.read(), 'utf-8')
+        with open(get_resource('epub/toc.html'), 'rb') as f:
+            t = f.read().decode('utf-8')
          return t % self.html_part()
  
  
@@ -546,16 +546,16 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
      mime = zipfile.ZipInfo()
      mime.filename = 'mimetype'
      mime.compress_type = zipfile.ZIP_STORED
-    mime.extra = ''
-    zip.writestr(mime, 'application/epub+zip')
+    mime.extra = b''
+    zip.writestr(mime, b'application/epub+zip')
      zip.writestr(
          'META-INF/container.xml',
-        '<?xml version="1.0" ?>'
-        '<container version="1.0" '
-        'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
-        '<rootfiles><rootfile full-path="OPS/content.opf" '
-        'media-type="application/oebps-package+xml" />'
-        '</rootfiles></container>'
+        b'<?xml version="1.0" ?>'
+        b'<container version="1.0" '
+        b'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
+        b'<rootfiles><rootfile full-path="OPS/content.opf" '
+        b'media-type="application/oebps-package+xml" />'
+        b'</rootfiles></container>'
      )
      zip.write(get_resource('res/wl-logo-small.png'),
                os.path.join('OPS', 'logo_wolnelektury.png'))
@@ -569,7 +569,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
          if cover is True:
              cover = make_cover
  
-        cover_file = StringIO()
+        cover_file = BytesIO()
          bound_cover = cover(document.book_info)
          bound_cover.save(cover_file)
          cover_name = 'cover.%s' % bound_cover.ext()
@@ -602,12 +602,12 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
      annotations = etree.Element('annotations')
  
      toc_file = etree.fromstring(
-        '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
-        '"-//NISO//DTD ncx 2005-1//EN" '
-        '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
-        '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
-        'version="2005-1"><head></head><docTitle></docTitle><navMap>'
-        '</navMap></ncx>'
+        b'<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
+        b'"-//NISO//DTD ncx 2005-1//EN" '
+        b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
+        b'<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
+        b'version="2005-1"><head></head><docTitle></docTitle><navMap>'
+        b'</navMap></ncx>'
      )
      nav_map = toc_file[-1]
  
@@ -645,7 +645,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
          '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="support" />'))
-    html_string = open(get_resource('epub/support.html')).read()
+    html_string = open(get_resource('epub/support.html'), 'rb').read()
      chars.update(used_chars(etree.fromstring(html_string)))
      zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
  
@@ -679,7 +679,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                                os.path.join(tmpdir, fname)]
              env = {"PERL_USE_UNSAFE_INC": "1"}
              if verbose:
-                print "Running font-optimizer"
+                print("Running font-optimizer")
                  subprocess.check_call(optimizer_call, env=env)
              else:
                  dev_null = open(os.devnull, 'w')
diff --git a/librarian/fb2.py b/librarian/fb2.py

index 25a4c1f..6dd1c35 100644 (file)
--- a/librarian/fb2.py
+++ b/librarian/fb2.py
@@ -3,9 +3,12 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  import os.path
  from copy import deepcopy
  from lxml import etree
+import six
  
  from librarian import functions, OutputFile
  from .epub import replace_by_verse
@@ -62,6 +65,6 @@ def transform(wldoc, verbose=False,
  
      result = document.transform(style)
  
-    return OutputFile.from_string(unicode(result).encode('utf-8'))
+    return OutputFile.from_bytes(six.text_type(result).encode('utf-8'))
  
  # vim:et
diff --git a/librarian/functions.py b/librarian/functions.py

index 75e2911..e5a47d6 100644 (file)
--- a/librarian/functions.py
+++ b/librarian/functions.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from lxml import etree
  import re
  
@@ -112,7 +114,7 @@ def reg_lang_code_3to2():
          result = ''
          text = ''.join(text)
          with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
-            for line in f:
+            for line in f.read().decode('latin1').split('\n'):
                  list = line.strip().split('|')
                  if list[0] == text:
                      result = list[2]
diff --git a/librarian/html.py b/librarian/html.py

index a566f71..67f0061 100644 (file)
--- a/librarian/html.py
+++ b/librarian/html.py
@@ -3,9 +3,10 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os
  import re
-import cStringIO
  import copy
  
  from lxml import etree
@@ -13,6 +14,8 @@ from librarian import XHTMLNS, ParseError, OutputFile
  from librarian import functions
  
  from lxml.etree import XMLSyntaxError, XSLTApplyError
+import six
+
  
  functions.reg_substitute_entities()
  functions.reg_person_name()
@@ -33,11 +36,10 @@ def html_has_content(text):
  
  
  def transform_abstrakt(abstrakt_element):
-    from cStringIO import StringIO
      style_filename = get_stylesheet('legacy')
      style = etree.parse(style_filename)
      xml = etree.tostring(abstrakt_element)
-    document = etree.parse(StringIO(xml.replace('abstrakt', 'dlugi_cytat')))  # HACK
+    document = etree.parse(six.BytesIO(xml.replace('abstrakt', 'dlugi_cytat')))  # HACK
      result = document.xslt(style)
      html = re.sub('<a name="sec[0-9]*"/>', '', etree.tostring(result))
      return re.sub('</?blockquote[^>]*>', '', html)
@@ -77,16 +79,17 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None):
              add_table_of_themes(result.getroot())
              add_table_of_contents(result.getroot())
  
-            return OutputFile.from_string(etree.tostring(
+            return OutputFile.from_bytes(etree.tostring(
                  result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8'))
          else:
              return None
      except KeyError:
          raise ValueError("'%s' is not a valid stylesheet.")
-    except (XMLSyntaxError, XSLTApplyError), e:
+    except (XMLSyntaxError, XSLTApplyError) as e:
          raise ParseError(e)
  
  
+@six.python_2_unicode_compatible
  class Fragment(object):
      def __init__(self, id, themes):
          super(Fragment, self).__init__()
@@ -106,7 +109,7 @@ class Fragment(object):
                  try:
                      stack.pop()
                  except IndexError:
-                    print 'CLOSED NON-OPEN TAG:', element
+                    print('CLOSED NON-OPEN TAG:', element)
  
          stack.reverse()
          return self.events + stack
@@ -128,7 +131,7 @@ class Fragment(object):
  
          return ''.join(result)
  
-    def __unicode__(self):
+    def __str__(self):
          return self.to_string()
  
  
@@ -139,7 +142,7 @@ def extract_fragments(input_filename):
  
      # iterparse would die on a HTML document
      parser = etree.HTMLParser(encoding='utf-8')
-    buf = cStringIO.StringIO()
+    buf = six.BytesIO()
      buf.write(etree.tostring(etree.parse(input_filename, parser).getroot()[0][0], encoding='utf-8'))
      buf.seek(0)
  
@@ -173,7 +176,7 @@ def extract_fragments(input_filename):
                  try:
                      fragment = open_fragments[element.get('fid')]
                  except KeyError:
-                    print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
+                    print('%s:closed not open fragment #%s' % (input_filename, element.get('fid')))
                  else:
                      closed_fragments[fragment.id] = fragment
                      del open_fragments[fragment.id]
@@ -207,7 +210,7 @@ def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None
              link_text = prefix
          anchor = etree.Element('a', href='#%s' % prefix)
          anchor.set('class', 'anchor')
-        anchor.text = unicode(link_text)
+        anchor.text = six.text_type(link_text)
          parent.insert(index, anchor)
  
      if with_target:
@@ -247,7 +250,7 @@ def raw_printable_text(element):
      for e in working.findall('a'):
          if e.get('class') in ('annotation', 'theme-begin'):
              e.text = ''
-    return etree.tostring(working, method='text', encoding=unicode).strip()
+    return etree.tostring(working, method='text', encoding='unicode').strip()
  
  
  def add_table_of_contents(root):
@@ -300,7 +303,7 @@ def add_table_of_themes(root):
          theme_names = [s.strip() for s in fragment.text.split(',')]
          for theme_name in theme_names:
              book_themes.setdefault(theme_name, []).append(fragment.get('name'))
-    book_themes = book_themes.items()
+    book_themes = list(book_themes.items())
      book_themes.sort(key=lambda s: sortify(s[0]))
      themes_div = etree.Element('div', id="themes")
      themes_ol = etree.SubElement(themes_div, 'ol')
@@ -326,7 +329,7 @@ def extract_annotations(html_path):
      parser = etree.HTMLParser(encoding='utf-8')
      tree = etree.parse(html_path, parser)
      footnotes = tree.find('//*[@id="footnotes"]')
-    re_qualifier = re.compile(ur'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014')
+    re_qualifier = re.compile(r'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014')
      if footnotes is not None:
          for footnote in footnotes.findall('div'):
              fn_type = footnote.get('class').split('-')[1]
@@ -335,8 +338,8 @@ def extract_annotations(html_path):
              footnote.text = None
              if len(footnote) and footnote[-1].tail == '\n':
                  footnote[-1].tail = None
-            text_str = etree.tostring(footnote, method='text', encoding=unicode).strip()
-            html_str = etree.tostring(footnote, method='html', encoding=unicode).strip()
+            text_str = etree.tostring(footnote, method='text', encoding='unicode').strip()
+            html_str = etree.tostring(footnote, method='html', encoding='unicode').strip()
  
              match = re_qualifier.match(text_str)
              if match:
diff --git a/librarian/hyphenator.py b/librarian/hyphenator.py

index 18d402b..aa5b4c3 100644 (file)
--- a/librarian/hyphenator.py
+++ b/librarian/hyphenator.py
@@ -14,6 +14,7 @@ info@wilbertberendsen.nl
  License: LGPL.
  
  """
+from __future__ import print_function, unicode_literals
  
  import sys
  import re
@@ -235,5 +236,5 @@ if __name__ == "__main__":
      h = Hyphenator(dict_file, left=1, right=1)
  
      for i in h(word):
-        print i
+        print(i)
  
diff --git a/librarian/mobi.py b/librarian/mobi.py

index c3c8f28..6f1f5d6 100644 (file)
--- a/librarian/mobi.py
+++ b/librarian/mobi.py
@@ -3,6 +3,7 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
  
  from copy import deepcopy
  import os
@@ -13,13 +14,16 @@ from librarian import OutputFile
  
  
  def transform(wldoc, verbose=False, sample=None, cover=None,
-              use_kindlegen=False, flags=None, hyphenate=True, ilustr_path=''):
+              use_kindlegen=False, flags=None, hyphenate=True, ilustr_path='',
+              converter_path=None):
      """ produces a MOBI file
  
      wldoc: a WLDocument
      sample=n: generate sample e-book (with at least n paragraphs)
      cover: a cover.Cover factory overriding default
      flags: less-advertising,
+    converter_path: override path to MOBI converter,
+      either ebook-convert or kindlegen
      """
  
      document = deepcopy(wldoc)
@@ -40,10 +44,12 @@ def transform(wldoc, verbose=False, sample=None, cover=None,
  
      if use_kindlegen:
          output_file_basename = os.path.basename(output_file.name)
-        subprocess.check_call(['kindlegen', '-c2', epub.get_filename(),
-                              '-o', output_file_basename], **kwargs)
+        subprocess.check_call([converter_path or 'kindlegen',
+                               '-c2', epub.get_filename(),
+                               '-o', output_file_basename], **kwargs)
      else:
-        subprocess.check_call(['ebook-convert', epub.get_filename(),
+        subprocess.check_call([converter_path or 'ebook-convert',
+                               epub.get_filename(),
                                 output_file.name, '--no-inline-toc',
                                 '--mobi-file-type=both',
                                 '--mobi-ignore-margins'], **kwargs)
diff --git a/librarian/packagers.py b/librarian/packagers.py

index f57a983..b3f5548 100644 (file)
--- a/librarian/packagers.py
+++ b/librarian/packagers.py
@@ -3,11 +3,13 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os
  from librarian import pdf, epub, mobi, DirDocProvider, ParseError
  from librarian.parser import WLDocument
  
-from util import makedirs
+from .util import makedirs
  
  
  class Packager(object):
@@ -39,14 +41,14 @@ class Packager(object):
          try:
              for main_input in input_filenames:
                  if verbose:
-                    print main_input
+                    print(main_input)
                  cls.prepare_file(main_input, output_dir, verbose, overwrite)
-        except ParseError, e:
-            print '%(file)s:%(name)s:%(message)s' % {
+        except ParseError as e:
+            print('%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
                  'name': e.__class__.__name__,
                  'message': e.message
-            }
+            })
  
  
  class EpubPackager(Packager):
diff --git a/librarian/parser.py b/librarian/parser.py

index 43cb0a9..73ddd52 100644 (file)
--- a/librarian/parser.py
+++ b/librarian/parser.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import ValidationError, NoDublinCore,  ParseError, NoProvider
  from librarian import RDFNS
  from librarian.cover import make_cover
@@ -14,7 +16,7 @@ from lxml.etree import XMLSyntaxError, XSLTApplyError
  
  import os
  import re
-from StringIO import StringIO
+import six
  
  
  class WLDocument(object):
@@ -45,14 +47,14 @@ class WLDocument(object):
              self.book_info = None
  
      @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        return cls.from_file(StringIO(xml), *args, **kwargs)
+    def from_bytes(cls, xml, *args, **kwargs):
+        return cls.from_file(six.BytesIO(xml), *args, **kwargs)
  
      @classmethod
      def from_file(cls, xmlfile, *args, **kwargs):
  
          # first, prepare for parsing
-        if isinstance(xmlfile, basestring):
+        if isinstance(xmlfile, six.text_type):
              file = open(xmlfile, 'rb')
              try:
                  data = file.read()
@@ -61,17 +63,17 @@ class WLDocument(object):
          else:
              data = xmlfile.read()
  
-        if not isinstance(data, unicode):
+        if not isinstance(data, six.text_type):
              data = data.decode('utf-8')
  
          data = data.replace(u'\ufeff', '')
  
          try:
              parser = etree.XMLParser(remove_blank_text=False)
-            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+            tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser)
  
              return cls(tree, *args, **kwargs)
-        except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+        except (ExpatError, XMLSyntaxError, XSLTApplyError) as e:
              raise ParseError(e)
  
      def swap_endlines(self):
@@ -139,7 +141,7 @@ class WLDocument(object):
  
      def serialize(self):
          self.update_dc()
-        return etree.tostring(self.edoc, encoding=unicode, pretty_print=True)
+        return etree.tostring(self.edoc, encoding='unicode', pretty_print=True)
  
      def merge_chunks(self, chunk_dict):
          unmerged = []
@@ -150,7 +152,7 @@ class WLDocument(object):
                  node = self.edoc.xpath(xpath)[0]
                  repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
                  node.getparent().replace(node, repl)
-            except Exception, e:
+            except Exception as e:
                  unmerged.append(repr((key, xpath, e)))
  
          return unmerged
@@ -220,7 +222,7 @@ class WLDocument(object):
          if output_dir_path:
              save_path = output_dir_path
              if make_author_dir:
-                save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8'))
+                save_path = os.path.join(save_path, six.text_type(self.book_info.author).encode('utf-8'))
              save_path = os.path.join(save_path, self.book_info.url.slug)
              if ext:
                  save_path += '.%s' % ext
diff --git a/librarian/partners.py b/librarian/partners.py

index 33198f7..671cf4d 100644 (file)
--- a/librarian/partners.py
+++ b/librarian/partners.py
@@ -11,9 +11,10 @@ along with custom cover images etc.
  
  New partners shouldn't be added here, but in the partners repository.
  """
+from __future__ import print_function, unicode_literals
  
  from librarian import packagers, cover
-from util import makedirs
+from .util import makedirs
  
  
  class GandalfEpub(packagers.EpubPackager):
@@ -79,7 +80,7 @@ class Virtualo(packagers.Packager):
          try:
              for main_input in input_filenames:
                  if verbose:
-                    print main_input
+                    print(main_input)
                  path, fname = os.path.realpath(main_input).rsplit('/', 1)
                  provider = DirDocProvider(path)
                  slug, ext = os.path.splitext(fname)
@@ -110,13 +111,13 @@ class Virtualo(packagers.Packager):
                  doc.save_output_file(
                      doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
                      output_path=outfile_sample)
-        except ParseError, e:
-            print '%(file)s:%(name)s:%(message)s' % {
+        except ParseError as e:
+            print('%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
                  'name': e.__class__.__name__,
                  'message': e.message
-            }
+            })
  
          xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
-        xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
+        xml_file.write(etree.tostring(xml, pretty_print=True, encoding='unicode').encode('utf-8'))
          xml_file.close()
diff --git a/librarian/pdf.py b/librarian/pdf.py

index d67bddf..e6d897d 100644 (file)
--- a/librarian/pdf.py
+++ b/librarian/pdf.py
@@ -9,11 +9,11 @@ Creates one big XML from the book and its children, converts it to LaTeX
  with TeXML, then runs it by XeLaTeX.
  
  """
-from __future__ import with_statement
+from __future__ import print_function, unicode_literals
+
  import os
  import os.path
  import shutil
-from StringIO import StringIO
  from tempfile import mkdtemp, NamedTemporaryFile
  import re
  from copy import deepcopy
@@ -23,6 +23,7 @@ from itertools import chain
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
+import six
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
@@ -57,7 +58,7 @@ def insert_tags(doc, split_re, tagname, exclude=None):
  
      >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>')
      >>> insert_tags(t, re.compile('-'), 'd')
-    >>> print etree.tostring(t)
+    >>> print(etree.tostring(t, encoding='unicode'))
      <a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
      """
  
@@ -196,11 +197,11 @@ def package_available(package, args='', verbose=False):
      tempdir = mkdtemp('-wl2pdf-test')
      fpath = os.path.join(tempdir, 'test.tex')
      f = open(fpath, 'w')
-    f.write(r"""
-        \documentclass{wl}
-        \usepackage[%s]{%s}
-        \begin{document}
-        \end{document}
+    f.write("""
+        \\documentclass{wl}
+        \\usepackage[%s]{%s}
+        \\begin{document}
+        \\end{document}
          """ % (args, package))
      f.close()
      if verbose:
@@ -306,8 +307,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          del document  # no longer needed large object :)
  
          tex_path = os.path.join(temp, 'doc.tex')
-        fout = open(tex_path, 'w')
-        process(StringIO(texml), fout, 'utf-8')
+        fout = open(tex_path, 'wb')
+        process(six.BytesIO(texml), fout, 'utf-8')
          fout.close()
          del texml
  
@@ -329,7 +330,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
  
          # some things work better when compiled twice
          # (table of contents, [line numbers - disabled])
-        for run in xrange(2):
+        for run in range(2):
              if verbose:
                  p = call(['xelatex', tex_path])
              else:
@@ -346,7 +347,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          shutil.rmtree(temp)
          return OutputFile.from_filename(output_file.name)
  
-    except (XMLSyntaxError, XSLTApplyError), e:
+    except (XMLSyntaxError, XSLTApplyError) as e:
          raise ParseError(e)
  
  
@@ -361,14 +362,14 @@ def load_including_children(wldoc=None, provider=None, uri=None):
          text = f.read().decode('utf-8')
          f.close()
      elif wldoc is not None:
-        text = etree.tostring(wldoc.edoc, encoding=unicode)
+        text = etree.tostring(wldoc.edoc, encoding='unicode')
          provider = wldoc.provider
      else:
          raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
  
-    text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
+    text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
  
-    document = WLDocument.from_string(text, parse_dublincore=True, provider=provider)
+    document = WLDocument.from_bytes(text.encode('utf-8'), parse_dublincore=True, provider=provider)
      document.swap_endlines()
  
      for child_uri in document.book_info.parts:
diff --git a/librarian/picture.py b/librarian/picture.py

index 1aa1d07..d255f55 100644 (file)
--- a/librarian/picture.py
+++ b/librarian/picture.py
@@ -1,14 +1,16 @@
  # -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
  from operator import and_
  
-from dcparser import Field, WorkInfo, DCNS
+from .dcparser import Field, WorkInfo, DCNS
  from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
  from xml.parsers.expat import ExpatError
  from os import path
-from StringIO import StringIO
  from lxml import etree
  from lxml.etree import (XMLSyntaxError, XSLTApplyError, Element)
  import re
+import six
  
  
  class WLPictureURI(WLURI):
@@ -99,14 +101,14 @@ class WLPicture(object):
          self.frame = None
  
      @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        return cls.from_file(StringIO(xml), *args, **kwargs)
+    def from_bytes(cls, xml, *args, **kwargs):
+        return cls.from_file(six.BytesIO(xml), *args, **kwargs)
  
      @classmethod
      def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
  
          # first, prepare for parsing
-        if isinstance(xmlfile, basestring):
+        if isinstance(xmlfile, six.text_type):
              file = open(xmlfile, 'rb')
              try:
                  data = file.read()
@@ -115,7 +117,7 @@ class WLPicture(object):
          else:
              data = xmlfile.read()
  
-        if not isinstance(data, unicode):
+        if not isinstance(data, six.text_type):
              data = data.decode('utf-8')
  
          data = data.replace(u'\ufeff', '')
@@ -126,12 +128,12 @@ class WLPicture(object):
  
          try:
              parser = etree.XMLParser(remove_blank_text=False)
-            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+            tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser)
  
              me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
              me.load_frame_info()
              return me
-        except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+        except (ExpatError, XMLSyntaxError, XSLTApplyError) as e:
              raise ParseError(e)
  
      @property
@@ -184,7 +186,7 @@ class WLPicture(object):
              pd['coords'] = coords
  
              def want_unicode(x):
-                if not isinstance(x, unicode):
+                if not isinstance(x, six.text_type):
                      return x.decode('utf-8')
                  else:
                      return x
diff --git a/librarian/sponsor.py b/librarian/sponsor.py

index c9bc35b..1374cda 100644 (file)
--- a/librarian/sponsor.py
+++ b/librarian/sponsor.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import get_resource
  
  
diff --git a/librarian/text.py b/librarian/text.py

index 4064849..7ba6d29 100644 (file)
--- a/librarian/text.py
+++ b/librarian/text.py
@@ -3,10 +3,13 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  import copy
  from librarian import functions, OutputFile
  from lxml import etree
  import os
+import six
  
  
  functions.reg_substitute_entities()
@@ -103,7 +106,7 @@ def transform(wldoc, flags=None, **options):
              'description': description,
              'url': url,
              'license_description': license_description,
-            'text': unicode(result),
+            'text': six.text_type(result),
              'source': source,
              'contributors': contributors,
              'funders': funders,
@@ -111,5 +114,5 @@ def transform(wldoc, flags=None, **options):
              'isbn': isbn,
          }).encode('utf-8')
      else:
-        result = unicode(result).encode('utf-8')
-    return OutputFile.from_string("\r\n".join(result.splitlines()) + "\r\n")
+        result = six.text_type(result).encode('utf-8')
+    return OutputFile.from_bytes(b"\r\n".join(result.splitlines()) + b"\r\n")
diff --git a/librarian/util.py b/librarian/util.py

index 0886fd5..c302084 100644 (file)
--- a/librarian/util.py
+++ b/librarian/util.py
@@ -2,6 +2,8 @@
  # by Paul Winkler 
  # http://code.activestate.com/recipes/81611-roman-numerals/
  # PSFL (GPL compatible)
+from __future__ import print_function, unicode_literals
+
  import os
  
  
@@ -18,11 +20,11 @@ def int_to_roman(input):
      Traceback (most recent call last):
      ValueError: Argument must be between 1 and 3999
  
-    >>> int_to_roman(1.5)
+    >>> int_to_roman(1.5)  # doctest: +IGNORE_EXCEPTION_DETAIL
      Traceback (most recent call last):
      TypeError: expected integer, got <type 'float'>
  
-    >>> for i in range(1, 21): print int_to_roman(i)
+    >>> for i in range(1, 21): print(int_to_roman(i))
      ...
      I
      II
@@ -44,15 +46,15 @@ def int_to_roman(input):
      XVIII
      XIX
      XX
-    >>> print int_to_roman(2000)
+    >>> print(int_to_roman(2000))
      MM
-    >>> print int_to_roman(1999)
+    >>> print(int_to_roman(1999))
      MCMXCIX
      """
      if type(input) != type(1):
-        raise TypeError, "expected integer, got %s" % type(input)
+        raise TypeError("expected integer, got %s" % type(input))
      if not 0 < input < 4000:
-        raise ValueError, "Argument must be between 1 and 3999"    
+        raise ValueError("Argument must be between 1 and 3999")
      ints = (1000, 900,  500, 400, 100,  90, 50,  40, 10,  9,    5,  4,    1)
      nums = ('M',  'CM', 'D', 'CD','C', 'XC','L','XL','X','IX','V','IV','I')
      result = ""
@@ -66,17 +68,17 @@ def roman_to_int(input):
      """
      Convert a roman numeral to an integer.
      
-    >>> r = range(1, 4000)
+    >>> r = list(range(1, 4000))
      >>> nums = [int_to_roman(i) for i in r]
      >>> ints = [roman_to_int(n) for n in nums]
-    >>> print r == ints
+    >>> print(r == ints)
      1
  
      >>> roman_to_int('VVVIV')
      Traceback (most recent call last):
       ...
      ValueError: input is not a valid roman numeral: VVVIV
-    >>> roman_to_int(1)
+    >>> roman_to_int(1)  # doctest: +IGNORE_EXCEPTION_DETAIL
      Traceback (most recent call last):
       ...
      TypeError: expected string, got <type 'int'>
@@ -90,14 +92,14 @@ def roman_to_int(input):
      ValueError: input is not a valid roman numeral: IL
      """
      if type(input) != type(""):
-        raise TypeError, "expected string, got %s" % type(input)
+        raise TypeError("expected string, got %s" % type(input))
      input = input.upper()
      nums = ['M', 'D', 'C', 'L', 'X', 'V', 'I']
      ints = [1000, 500, 100, 50,  10,  5,    1]
      places = []
      for c in input:
          if not c in nums:
-            raise ValueError, "input is not a valid roman numeral: %s" % input
+            raise ValueError("input is not a valid roman numeral: %s" % input)
      for i in range(len(input)):
          c = input[i]
          value = ints[nums.index(c)]
@@ -116,9 +118,9 @@ def roman_to_int(input):
      if int_to_roman(sum) == input:
          return sum
      else:
-        raise ValueError, 'input is not a valid roman numeral: %s' % input
+        raise ValueError('input is not a valid roman numeral: %s' % input)
  
  
  def makedirs(path):
      if not os.path.isdir(path):
-        os.makedirs(path)
-\ No newline at end of file
+        os.makedirs(path)
diff --git a/scripts/book2cover b/scripts/book2cover

index 444563c..a81fc63 100755 (executable)
--- a/scripts/book2cover
+++ b/scripts/book2cover
@@ -4,8 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from StringIO import StringIO
-from librarian import OutputFile
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2epub b/scripts/book2epub

index 7a7a41d..5b906b9 100755 (executable)
--- a/scripts/book2epub
+++ b/scripts/book2epub
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2fb2 b/scripts/book2fb2

index 584ae99..de4615b 100755 (executable)
--- a/scripts/book2fb2
+++ b/scripts/book2fb2
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything
  
  
diff --git a/scripts/book2html b/scripts/book2html

index 2c1d04e..f6d459d 100755 (executable)
--- a/scripts/book2html
+++ b/scripts/book2html
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2mobi b/scripts/book2mobi

index b283309..b0d0686 100755 (executable)
--- a/scripts/book2mobi
+++ b/scripts/book2mobi
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2partner b/scripts/book2partner

index f1892bb..8982354 100755 (executable)
--- a/scripts/book2partner
+++ b/scripts/book2partner
@@ -4,20 +4,15 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
+from collections import OrderedDict
  import inspect
  import optparse
  import os
  import sys
  
  from librarian import packagers
-try:
-    from collections import OrderedDict
-except ImportError:
-    try:
-        from django.utils.datastructures import SortedDict
-        OrderedDict = SortedDict
-    except ImportError:
-        OrderedDict = dict
  
  
  if __name__ == '__main__':
@@ -64,12 +59,12 @@ if __name__ == '__main__':
              if inspect.isclass(package) and issubclass(package, packagers.Packager):
                  packages[package_name] = package
      if not packages:
-        print 'No packages found!'
+        print('No packages found!')
  
      if options.list_packages:
-        print 'Available packages:'
+        print('Available packages:')
          for package_name, package in packages.items():
-            print ' ', package_name
+            print(' ', package_name)
          exit(0)
  
      if len(input_filenames) < 1 or not options.packages:
@@ -79,6 +74,6 @@ if __name__ == '__main__':
      used_packages = [packages[p] for p in options.packages.split(',')]
      for package in used_packages:
          if options.verbose:
-            print 'Package:', package.__name__
+            print('Package:', package.__name__)
          package.prepare(input_filenames,
              options.output_dir, options.verbose, options.overwrite)
diff --git a/scripts/book2pdf b/scripts/book2pdf

index ccb5fac..3c363f1 100755 (executable)
--- a/scripts/book2pdf
+++ b/scripts/book2pdf
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  
  
diff --git a/scripts/book2txt b/scripts/book2txt

index c706a07..0e84ac9 100755 (executable)
--- a/scripts/book2txt
+++ b/scripts/book2txt
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.book2anything import Book2Anything, Option
  from librarian.parser import WLDocument
  
diff --git a/scripts/bookfragments b/scripts/bookfragments

index 0d94497..b283297 100755 (executable)
--- a/scripts/bookfragments
+++ b/scripts/bookfragments
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os
  import optparse
  
@@ -29,14 +31,14 @@ if __name__ == '__main__':
      # Do some real work
      for input_filename in input_filenames:
          if options.verbose:
-            print input_filename
+            print(input_filename)
  
          output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
  
          closed_fragments, open_fragments = html.extract_fragments(input_filename)
  
          for fragment_id in open_fragments:
-            print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
+            print('%s:warning:unclosed fragment #%s' % (input_filename, fragment_id))
  
          output_file = open(output_filename, 'w')
          output_file.write("""
diff --git a/scripts/fn_qualifiers_list_from_redmine.py b/scripts/fn_qualifiers_list_from_redmine.py

old mode 100644 (file)

new mode 100755 (executable)

index 020b119..66b00cc
--- a/scripts/fn_qualifiers_list_from_redmine.py
+++ b/scripts/fn_qualifiers_list_from_redmine.py
@@ -5,16 +5,17 @@
  This scripts reads the table of footnote qualifiers from Redmine
  and produces contents of fn_qualifiers.py – a list of valid qualifiers.
  """
+from __future__ import print_function, unicode_literals
  
  from lxml import etree
-from urllib2 import urlopen
+from six.moves.urllib.request import urlopen
  
  url = 'http://redmine.nowoczesnapolska.org.pl/projects/wl-publikacje/wiki/Lista_skr%C3%B3t%C3%B3w'
  
  parser = etree.HTMLParser()
  tree = etree.parse(urlopen(url), parser)
  
-print """\
+print("""\
  # -*- coding: utf-8
  \"""
  List of standard footnote qualifiers.
@@ -24,12 +25,12 @@ do not edit it.
  from __future__ import unicode_literals
  
  
-FN_QUALIFIERS = {""".encode('utf-8')
+FN_QUALIFIERS = {""")
  
  for td in tree.findall('//td'):
-    print ("    '%s': '%s'," % (
+    print(("    '%s': '%s'," % (
          td[0].text.replace('\\', '\\\\').replace("'", "\\'"),
          td[0].tail.strip(' -').replace('\\', '\\\\').replace("'", "\\'")
-    )).encode('utf-8')
+    )))
  
-print """    }""".encode('utf-8')
+print("""    }""")
diff --git a/scripts/genslugs b/scripts/genslugs

index a234096..9745b68 100755 (executable)
--- a/scripts/genslugs
+++ b/scripts/genslugs
@@ -4,6 +4,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import print_function, unicode_literals
+
  import os
  import optparse
  
@@ -36,13 +38,13 @@ if __name__ == '__main__':
      # Do some real work
      for input_filename in input_filenames:
          if options.verbose:
-            print input_filename
+            print(input_filename)
  
          doc = etree.parse(input_filename)
          try:
              title = doc.find('//{http://purl.org/dc/elements/1.1/}title').text
          except AttributeError:
-            print '%s:error:Book title not found. Skipping.' % input_filename
+            print('%s:error:Book title not found. Skipping.' % input_filename)
              continue
  
          parent = ''
@@ -52,14 +54,14 @@ if __name__ == '__main__':
          except AttributeError:
              pass
          except IndexError:
-            print '%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url)
+            print('%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url))
  
          book_url = doc.find('//{http://purl.org/dc/elements/1.1/}identifier.url')
          if book_url is None:
              book_description = doc.find('//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description')
              book_url = etree.SubElement(book_description, '{http://purl.org/dc/elements/1.1/}identifier.url')
          if not options.force and book_url.text.startswith('http://'):
-            print '%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text)
+            print('%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text))
              continue
  
          book_url.text = BOOK_URL + slughifi(parent + title)[:60]
diff --git a/setup.py b/setup.py

index 10abe6e..b391f0c 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
  #
  import os
  import os.path
-from distutils.core import setup
+from setuptools import setup
  
  def whole_tree(prefix, path):
      files = []
@@ -21,7 +21,7 @@ def whole_tree(prefix, path):
  
  setup(
      name='librarian',
-    version='1.6',
+    version='1.7',
      description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
      author="Marek Stępniowski",
      author_email='marek@stepniowski.com',
@@ -29,13 +29,15 @@ setup(
      maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl',
      url='http://github.com/fnp/librarian',
      packages=['librarian', 'librarian.embeds'],
-    package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
+    package_data={'librarian': ['xslt/*.xslt', 'xslt/*.xml', 'epub/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
                                  whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res') +
                                  whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')},
      include_package_data=True,
      install_requires=[
-        'lxml>=2.2',
+        'lxml>=2.2,<=4.3',
          'Pillow',
+        'six',
+        'texml',
      ],
      scripts=['scripts/book2html',
               'scripts/book2txt',
@@ -47,5 +49,4 @@ setup(
               'scripts/book2cover',
               'scripts/bookfragments',
               'scripts/genslugs'],
-    tests_require=['nose>=0.11', 'coverage>=3.0.1'],
  )
diff --git a/tests/files/dcparser/andersen_brzydkie_kaczatko.out b/tests/files/dcparser/andersen_brzydkie_kaczatko.out

index c0fb00b..9f07b39 100644 (file)
--- a/tests/files/dcparser/andersen_brzydkie_kaczatko.out
+++ b/tests/files/dcparser/andersen_brzydkie_kaczatko.out
@@ -1,5 +1,5 @@
  {
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Andersen/Brzydkie_kaczątko',
      'source_name': u'Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925',
      'author': u'Andersen, Hans Christian',
diff --git a/tests/files/dcparser/biedrzycki_akslop.out b/tests/files/dcparser/biedrzycki_akslop.out

index a7eeffe..588a4b7 100644 (file)
--- a/tests/files/dcparser/biedrzycki_akslop.out
+++ b/tests/files/dcparser/biedrzycki_akslop.out
@@ -1,6 +1,6 @@
  {
      'editors': [u'Sekuła, Aleksandra'],
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop',
      'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993',
      'author': u'Biedrzycki, Miłosz',
diff --git a/tests/files/dcparser/kochanowski_piesn7.out b/tests/files/dcparser/kochanowski_piesn7.out

index b3eba1e..96198a3 100644 (file)
--- a/tests/files/dcparser/kochanowski_piesn7.out
+++ b/tests/files/dcparser/kochanowski_piesn7.out
@@ -1,5 +1,5 @@
  {
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)',
      'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976',
      'author': u'Kochanowski, Jan',
diff --git a/tests/files/dcparser/mickiewicz_rybka.out b/tests/files/dcparser/mickiewicz_rybka.out

index a35f935..f3c76c0 100644 (file)
--- a/tests/files/dcparser/mickiewicz_rybka.out
+++ b/tests/files/dcparser/mickiewicz_rybka.out
@@ -1,6 +1,6 @@
  {
      'editors': [u'Sekuła, Aleksandra', u'Kallenbach, Józef'],
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka',
      'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922',
      'author': u'Mickiewicz, Adam',
diff --git a/tests/files/dcparser/sofokles_antygona.out b/tests/files/dcparser/sofokles_antygona.out

index d934602..477988f 100644 (file)
--- a/tests/files/dcparser/sofokles_antygona.out
+++ b/tests/files/dcparser/sofokles_antygona.out
@@ -1,6 +1,6 @@
  {
      'editors': [u'Sekuła, Aleksandra'],
-    'publisher': u'Fundacja Nowoczesna Polska',
+    'publisher': [u'Fundacja Nowoczesna Polska'],
      'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona',
      'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939',
      'author': u'Sofokles',
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.fb2 b/tests/files/text/asnyk_miedzy_nami_expected.fb2

new file mode 100644 (file)

index 0000000..b9e4e13
--- /dev/null
+++ b/tests/files/text/asnyk_miedzy_nami_expected.fb2
@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:wl="http://wolnelektury.pl/functions" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:l="http://www.w3.org/1999/xlink">
+  <body>
+    <title>
+      <p>Adam Asnyk</p>
+      <p>Między nami nic nie było</p>
+    </title>
+    <epigraph>
+      <p>
+                                       Utwór opracowany został w ramach projektu
+                                               <a l:href="http://www.wolnelektury.pl/">Wolne Lektury</a>
+                                       przez <a l:href="http://www.nowoczesnapolska.org.pl/">fundację
+                                               Nowoczesna Polska</a>.
+                               </p>
+    </epigraph>
+    <section>
+      <poem>
+        <stanza>
+          <v>Między nami nic nie było!</v>
+          <v>Żadnych zwierzeń, wyznań żadnych!</v>
+          <v>Nic nas z sobą nie łączyło —</v>
+          <v>Prócz wiosennych marzeń zdradnych;</v>
+        </stanza>
+        <stanza>
+          <v>Prócz tych woni, barw i blasków,</v>
+          <v>Unoszących się w przestrzeni;</v>
+          <v>Prócz szumiących śpiewem lasków</v>
+          <v>I tej świeżej łąk zieleni;</v>
+        </stanza>
+        <stanza>
+          <v>Prócz tych kaskad i potoków,</v>
+          <v>Zraszających każdy parów,</v>
+          <v>Prócz girlandy tęcz, obłoków,</v>
+          <v>Prócz natury słodkich czarów;</v>
+        </stanza>
+        <stanza>
+          <v>Prócz tych wspólnych, jasnych zdrojów,</v>
+          <v>Z których serce zachwyt piło;</v>
+          <v>Prócz pierwiosnków i powojów,—</v>
+          <v>Między nami nic nie było!</v>
+        </stanza>
+      </poem>
+    </section>
+  </body>
+  <body name="notes"/>
+</FictionBook>
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.txt b/tests/files/text/asnyk_miedzy_nami_expected.txt

index 3942928..92cc1bd 100644 (file)
--- a/tests/files/text/asnyk_miedzy_nami_expected.txt
+++ b/tests/files/text/asnyk_miedzy_nami_expected.txt
@@ -37,6 +37,8 @@ Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się w domen
  \r
  Tekst opracowany na podstawie: (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898\r
  \r
+Wydawca: Fundacja Nowoczesna Polska\r
+\r
  Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.\r
  \r
  Opracowanie redakcyjne i przypisy: Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.\r
diff --git a/tests/files/text/asnyk_miedzy_nami_expected_raw.txt b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt

new file mode 100644 (file)

index 0000000..cac61d8
--- /dev/null
+++ b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt
@@ -0,0 +1,22 @@
+\r
+\r
+Między nami nic nie było!\r
+Żadnych zwierzeń, wyznań żadnych!\r
+Nic nas z sobą nie łączyło —\r
+Prócz wiosennych marzeń zdradnych;\r
+\r
+Prócz tych woni, barw i blasków,\r
+Unoszących się w przestrzeni;\r
+Prócz szumiących śpiewem lasków\r
+I tej świeżej łąk zieleni;\r
+\r
+Prócz tych kaskad i potoków,\r
+Zraszających każdy parów,\r
+Prócz girlandy tęcz, obłoków,\r
+Prócz natury słodkich czarów;\r
+\r
+Prócz tych wspólnych, jasnych zdrojów,\r
+Z których serce zachwyt piło;\r
+Prócz pierwiosnków i powojów,—\r
+Między nami nic nie było!\r
+\r
diff --git a/tests/test_dcparser.py b/tests/test_dcparser.py

index cab5b1c..4dab764 100644 (file)
--- a/tests/test_dcparser.py
+++ b/tests/test_dcparser.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import dcparser
  from lxml import etree
  from nose.tools import *
@@ -13,9 +15,9 @@ from datetime import date
  
  
  def check_dcparser(xml_file, result_file):
-    xml = file(xml_file).read()
+    xml = open(xml_file, 'rb').read()
      result = codecs.open(result_file, encoding='utf-8').read()
-    info = dcparser.BookInfo.from_string(xml).to_dict()
+    info = dcparser.BookInfo.from_bytes(xml).to_dict()
      should_be = eval(result)
      for key in should_be:
          assert_equals(info[key], should_be[key])
@@ -28,13 +30,13 @@ def test_dcparser():
  
  
  def check_serialize(xml_file):
-    xml = file(xml_file).read()
-    info = dcparser.BookInfo.from_string(xml)
+    xml = open(xml_file, 'rb').read()
+    info = dcparser.BookInfo.from_bytes(xml)
  
      # serialize
-    serialized = etree.tostring(info.to_etree(), encoding=unicode).encode('utf-8')
+    serialized = etree.tostring(info.to_etree(), encoding='unicode').encode('utf-8')
      # then parse again
-    info_bis = dcparser.BookInfo.from_string(serialized)
+    info_bis = dcparser.BookInfo.from_bytes(serialized)
  
      # check if they are the same
      for key in vars(info):
@@ -49,7 +51,7 @@ def test_serialize():
  
  
  def test_asdate():
-    assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 03))
+    assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 3))
      assert_equals(dcparser.as_date(u"2011"), date(2011, 1, 1))
      assert_equals(dcparser.as_date(u"2 poł. XIX w."), date(1950, 1, 1))
      assert_equals(dcparser.as_date(u"XVII w., l. 20"), date(1720, 1, 1))
diff --git a/tests/test_epub.py b/tests/test_epub.py

index 720fec6..4ac874a 100644 (file)
--- a/tests/test_epub.py
+++ b/tests/test_epub.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from zipfile import ZipFile
  from lxml import html
  from nose.tools import *
@@ -30,3 +32,13 @@ def test_transform():
                  u'Opracowanie redakcyjne i przypisy: '
                  u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
      assert_true(editors_attribution)
+
+
+def test_transform_hyphenate():
+    epub = WLDocument.from_file(
+            get_fixture('text', 'asnyk_zbior.xml'),
+            provider=DirDocProvider(get_fixture('text', ''))
+        ).as_epub(
+            flags=['without_fonts'],
+            hyphenate=True
+        ).get_file()
diff --git a/tests/test_fb2.py b/tests/test_fb2.py

new file mode 100644 (file)

index 0000000..2b8de67
--- /dev/null
+++ b/tests/test_fb2.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from __future__ import unicode_literals
+
+from librarian import NoDublinCore
+from librarian.parser import WLDocument
+from nose.tools import *
+from .utils import get_fixture
+
+
+def test_transform():
+    expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.fb2')
+
+    text = WLDocument.from_file(
+            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+        ).as_fb2().get_bytes()
+
+    assert_equal(text, open(expected_output_file_path, 'rb').read())
+
diff --git a/tests/test_html.py b/tests/test_html.py

index a0de630..d77d8fe 100644 (file)
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -3,10 +3,12 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import NoDublinCore
  from librarian.parser import WLDocument
  from nose.tools import *
-from utils import get_fixture
+from .utils import get_fixture
  
  
  def test_transform():
@@ -14,9 +16,9 @@ def test_transform():
  
      html = WLDocument.from_file(
              get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
-        ).as_html().get_string()
+        ).as_html().get_bytes()
  
-    assert_equal(html, file(expected_output_file_path).read())
+    assert_equal(html, open(expected_output_file_path, 'rb').read())
  
  
  @raises(NoDublinCore)
@@ -35,7 +37,7 @@ def test_passing_parse_dublincore_to_transform():
  
  
  def test_empty():
-    assert not WLDocument.from_string(
-            '<utwor />',
+    assert not WLDocument.from_bytes(
+            b'<utwor />',
              parse_dublincore=False,
          ).as_html()
diff --git a/tests/test_html_annotations.py b/tests/test_html_annotations.py

index 234f297..410577c 100644 (file)
--- a/tests/test_html_annotations.py
+++ b/tests/test_html_annotations.py
@@ -21,73 +21,73 @@ def test_annotations():
  
          ('<pe/>', (
              'pe',
-            [], 
-            '',
-            '<p></p>'
+            [],
+            '[przypis edytorski]',
+            '<p> [przypis edytorski]</p>'
              ),
              'Empty footnote'),
  
          ('<pr>Definiendum --- definiens.</pr>', (
              'pr',
-            [], 
-            'Definiendum \u2014 definiens.', 
-            '<p>Definiendum \u2014 definiens.</p>'
+            [],
+            'Definiendum \u2014 definiens. [przypis redakcyjny]',
+            '<p>Definiendum \u2014 definiens. [przypis redakcyjny]</p>'
              ),
              'Plain footnote.'),
  
          ('<pt><slowo_obce>Definiendum</slowo_obce> --- definiens.</pt>', (
              'pt',
-            [], 
-            'Definiendum \u2014 definiens.', 
-            '<p><em class="foreign-word">Definiendum</em> \u2014 definiens.</p>'
+            [],
+            'Definiendum \u2014 definiens. [przypis tłumacza]',
+            '<p><em class="foreign-word">Definiendum</em> \u2014 definiens. [przypis tłumacza]</p>'
              ),
              'Standard footnote.'),
  
          ('<pr>Definiendum (łac.) --- definiens.</pr>', (
              'pr',
-            ['łac.'], 
-            'Definiendum (łac.) \u2014 definiens.', 
-            '<p>Definiendum (łac.) \u2014 definiens.</p>'
+            ['łac.'],
+            'Definiendum (łac.) \u2014 definiens. [przypis redakcyjny]',
+            '<p>Definiendum (łac.) \u2014 definiens. [przypis redakcyjny]</p>'
              ),
              'Plain footnote with qualifier'),
  
          ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- definiens.</pe>', (
              'pe',
-            ['łac.'], 
-            'Definiendum (łac.) \u2014 definiens.', 
-            '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 definiens.</p>'
+            ['łac.'],
+            'Definiendum (łac.) \u2014 definiens. [przypis edytorski]',
+            '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 definiens. [przypis edytorski]</p>'
              ),
              'Standard footnote with qualifier.'),
  
          ('<pt> <slowo_obce>Definiendum</slowo_obce> (daw.) --- definiens.</pt>', (
              'pt',
-            ['daw.'], 
-            'Definiendum (daw.) \u2014 definiens.', 
-            '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens.</p>'
+            ['daw.'],
+            'Definiendum (daw.) \u2014 definiens. [przypis tłumacza]',
+            '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens. [przypis tłumacza]</p>'
              ),
              'Standard footnote with leading whitespace and qualifier.'),
  
          ('<pr>Definiendum (łac.) --- <slowo_obce>definiens</slowo_obce>.</pr>', (
              'pr',
-            ['łac.'], 
-            'Definiendum (łac.) \u2014 definiens.', 
-            '<p>Definiendum (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>'
+            ['łac.'],
+            'Definiendum (łac.) \u2014 definiens. [przypis redakcyjny]',
+            '<p>Definiendum (łac.) \u2014 <em class="foreign-word">definiens</em>. [przypis redakcyjny]</p>'
              ),
              'Plain footnote with qualifier and some emphasis.'),
  
          ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- <slowo_obce>definiens</slowo_obce>.</pe>', (
              'pe',
              ['łac.'],
-            'Definiendum (łac.) \u2014 definiens.',
-            '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>'
+            'Definiendum (łac.) \u2014 definiens. [przypis edytorski]',
+            '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 <em class="foreign-word">definiens</em>. [przypis edytorski]</p>'
              ),
              'Standard footnote with qualifier and some emphasis.'),
  
          ('<pe>Definiendum (łac.) --- definiens (some) --- more text.</pe>', (
              'pe',
              ['łac.'],
-            'Definiendum (łac.) \u2014 definiens (some) \u2014 more text.',
-            '<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text.</p>',
+            'Definiendum (łac.) \u2014 definiens (some) \u2014 more text. [przypis edytorski]',
+            '<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text. [przypis edytorski]</p>',
              ),
              'Footnote with a second parentheses and mdash.'),
  
@@ -96,9 +96,9 @@ def test_annotations():
              'pe',
              ['daw.', 'niem.'],
              'gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, '
-            'szeregowiec w wojsku polskim cudzoziemskiego autoramentu.',
+            'szeregowiec w wojsku polskim cudzoziemskiego autoramentu. [przypis edytorski]',
              '<p><em class="foreign-word">gemajna</em> (daw., z niem. <em class="foreign-word">gemein</em>: zwykły) '
-            '\u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</p>'
+            '\u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu. [przypis edytorski]</p>'
              ),
              'Footnote with multiple and qualifiers and emphasis.'),
  
@@ -106,7 +106,9 @@ def test_annotations():
  
      xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join(
          t[0] for t in annotations)
-    html = WLDocument.from_string(xml_src, parse_dublincore=False).as_html().get_file()
+    html = WLDocument.from_bytes(
+        xml_src.encode('utf-8'),
+        parse_dublincore=False).as_html().get_file()
      res_annotations = list(extract_annotations(html))
  
      for i, (src, expected, name) in enumerate(annotations):
diff --git a/tests/test_html_fragments.py b/tests/test_html_fragments.py

index 3e87a9e..16057bc 100644 (file)
--- a/tests/test_html_fragments.py
+++ b/tests/test_html_fragments.py
@@ -3,9 +3,11 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian.html import extract_fragments
  from nose.tools import *
-from utils import get_fixture
+from .utils import get_fixture
  
  
  def test_fragments():
@@ -14,5 +16,5 @@ def test_fragments():
      closed_fragments, open_fragments = extract_fragments(
          get_fixture('text', 'asnyk_miedzy_nami_expected.html'))
      assert not open_fragments
-    fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f) for f in closed_fragments.values())
-    assert_equal(fragments_text, file(expected_output_file_path).read().decode('utf-8'))
+    fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f) for f in sorted(closed_fragments.values(), key=lambda f: f.id))
+    assert_equal(fragments_text, open(expected_output_file_path, 'rb').read().decode('utf-8'))
diff --git a/tests/test_mobi.py b/tests/test_mobi.py

new file mode 100644 (file)

index 0000000..3b29e72
--- /dev/null
+++ b/tests/test_mobi.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from __future__ import unicode_literals
+
+from zipfile import ZipFile
+from lxml import html
+from nose.tools import *
+from librarian import DirDocProvider
+from librarian.parser import WLDocument
+from tests.utils import get_fixture
+
+
+def test_transform():
+    mobi = WLDocument.from_file(
+            get_fixture('text', 'asnyk_zbior.xml'),
+            provider=DirDocProvider(get_fixture('text', ''))
+        ).as_mobi(converter_path='true').get_file()
diff --git a/tests/test_pdf.py b/tests/test_pdf.py

index 5b2dba1..98d1fa6 100644 (file)
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -3,11 +3,14 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
+import re
  from tempfile import NamedTemporaryFile
  from nose.tools import *
  from librarian import DirDocProvider
  from librarian.parser import WLDocument
-from utils import get_fixture
+from .utils import get_fixture
  
  
  def test_transform():
@@ -17,9 +20,8 @@ def test_transform():
              get_fixture('text', 'asnyk_zbior.xml'),
              provider=DirDocProvider(get_fixture('text', ''))
          ).as_pdf(save_tex=temp.name)
-    tex = open(temp.name).read().decode('utf-8')
-    print tex
+    tex = open(temp.name, 'rb').read().decode('utf-8')
  
      # Check contributor list.
-    editors = re.search(ur'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
+    editors = re.search(r'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
      assert_equal(editors.group(1), u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
diff --git a/tests/test_picture.py b/tests/test_picture.py

index 00b03ce..f97609b 100644 (file)
--- a/tests/test_picture.py
+++ b/tests/test_picture.py
@@ -3,6 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import picture, dcparser
  from tests.utils import get_all_fixtures, get_fixture
  from os import path
@@ -46,7 +48,6 @@ def test_picture_parts():
      motifs = set()
      names = set()
  
-    print parts
      for p in parts:
          for m in p['themes']:
              motifs.add(m)
diff --git a/tests/test_text.py b/tests/test_text.py

index 70dfb60..14c728f 100644 (file)
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -3,10 +3,12 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from __future__ import unicode_literals
+
  from librarian import NoDublinCore
  from librarian.parser import WLDocument
  from nose.tools import *
-from utils import get_fixture
+from .utils import get_fixture
  
  
  def test_transform():
@@ -14,9 +16,19 @@ def test_transform():
  
      text = WLDocument.from_file(
              get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
-        ).as_text().get_string()
+        ).as_text().get_bytes()
+
+    assert_equal(text, open(expected_output_file_path, 'rb').read())
+
+
+def test_transform_raw():
+    expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected_raw.txt')
+
+    text = WLDocument.from_file(
+            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+        ).as_text(flags=['raw-text']).get_bytes()
  
-    assert_equal(text, file(expected_output_file_path).read())
+    assert_equal(text, open(expected_output_file_path, 'rb').read())
  
  
  @raises(NoDublinCore)
diff --git a/tests/utils.py b/tests/utils.py

index fc87532..7da206c 100644 (file)
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -3,7 +3,6 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from __future__ import with_statement
  from os.path import realpath, join, dirname
  import glob
  
diff --git a/tox.ini b/tox.ini

new file mode 100644 (file)

index 0000000..5b28a3b
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,28 @@
+[tox]
+envlist =
+    clean,
+    py{27,34,35,36,37},
+    stats
+
+[testenv]
+deps =
+    nose
+    coverage
+passenv = HOME  ; Needed to find locally installed fonts when testing PDF production.
+commands =
+    nosetests --with-coverage --cover-package=librarian -d --with-doctest --with-xunit --exe
+install_command = pip install --extra-index-url https://py.mdrn.pl/simple {packages}
+
+[testenv:clean]
+basepython = python2
+commands =
+    coverage erase
+deps = coverage
+
+[testenv:stats]
+basepython = python2
+commands =
+    coverage report
+    coverage html
+deps = coverage
+
author	Radek Czajka <rczajka@rczajka.pl>
	Wed, 27 Feb 2019 09:13:41 +0000 (10:13 +0100)
committer	Radek Czajka <rczajka@rczajka.pl>
	Wed, 27 Feb 2019 19:35:56 +0000 (20:35 +0100)
.gitignore		patch \| blob \| history
AUTHORS.md		patch \| blob \| history
CHANGELOG.md	[new file with mode: 0644]	patch \| blob
MANIFEST.in	[new file with mode: 0644]	patch \| blob
README.md		patch \| blob \| history
librarian/__init__.py		patch \| blob \| history
librarian/book2anything.py		patch \| blob \| history
librarian/cover.py		patch \| blob \| history
librarian/dcparser.py		patch \| blob \| history
librarian/embeds/__init__.py		patch \| blob \| history
librarian/embeds/latex.py		patch \| blob \| history
librarian/embeds/mathml.py		patch \| blob \| history
librarian/epub.py		patch \| blob \| history
librarian/fb2.py		patch \| blob \| history
librarian/functions.py		patch \| blob \| history
librarian/html.py		patch \| blob \| history
librarian/hyphenator.py		patch \| blob \| history
librarian/mobi.py		patch \| blob \| history
librarian/packagers.py		patch \| blob \| history
librarian/parser.py		patch \| blob \| history
librarian/partners.py		patch \| blob \| history
librarian/pdf.py		patch \| blob \| history
librarian/picture.py		patch \| blob \| history
librarian/sponsor.py		patch \| blob \| history
librarian/text.py		patch \| blob \| history
librarian/util.py		patch \| blob \| history
scripts/book2cover		patch \| blob \| history
scripts/book2epub		patch \| blob \| history
scripts/book2fb2		patch \| blob \| history
scripts/book2html		patch \| blob \| history
scripts/book2mobi		patch \| blob \| history
scripts/book2partner		patch \| blob \| history
scripts/book2pdf		patch \| blob \| history
scripts/book2txt		patch \| blob \| history
scripts/bookfragments		patch \| blob \| history
scripts/fn_qualifiers_list_from_redmine.py	[changed mode: 0644->0755]	patch \| blob \| history
scripts/genslugs		patch \| blob \| history
setup.py		patch \| blob \| history
tests/files/dcparser/andersen_brzydkie_kaczatko.out		patch \| blob \| history
tests/files/dcparser/biedrzycki_akslop.out		patch \| blob \| history
tests/files/dcparser/kochanowski_piesn7.out		patch \| blob \| history
tests/files/dcparser/mickiewicz_rybka.out		patch \| blob \| history
tests/files/dcparser/sofokles_antygona.out		patch \| blob \| history
tests/files/text/asnyk_miedzy_nami_expected.fb2	[new file with mode: 0644]	patch \| blob
tests/files/text/asnyk_miedzy_nami_expected.txt		patch \| blob \| history
tests/files/text/asnyk_miedzy_nami_expected_raw.txt	[new file with mode: 0644]	patch \| blob
tests/test_dcparser.py		patch \| blob \| history
tests/test_epub.py		patch \| blob \| history
tests/test_fb2.py	[new file with mode: 0644]	patch \| blob
tests/test_html.py		patch \| blob \| history
tests/test_html_annotations.py		patch \| blob \| history
tests/test_html_fragments.py		patch \| blob \| history
tests/test_mobi.py	[new file with mode: 0644]	patch \| blob
tests/test_pdf.py		patch \| blob \| history
tests/test_picture.py		patch \| blob \| history
tests/test_text.py		patch \| blob \| history
tests/utils.py		patch \| blob \| history
tox.ini	[new file with mode: 0644]	patch \| blob