From 4e329824f40367945de11d3647396859092f5c2c Mon Sep 17 00:00:00 2001
From: Radek Czajka
Date: Wed, 27 Feb 2019 10:13:41 +0100
Subject: [PATCH] Python 3.4-3.7 support; Added coverter_path argument in
mobi.transform; Replaced all `from_string` and `get_string` methods with
`from_bytes` and `get_bytes`; Fixed packaging, added a changelog, updated and
added more tests, added Tox config. Version bump.
---
.gitignore | 3 +
AUTHORS.md | 18 +++--
CHANGELOG.md | 20 ++++++
MANIFEST.in | 15 ++++
README.md | 18 ++---
librarian/__init__.py | 69 +++++++++----------
librarian/book2anything.py | 15 ++--
librarian/cover.py | 32 +++++----
librarian/dcparser.py | 53 +++++++-------
librarian/embeds/__init__.py | 2 +
librarian/embeds/latex.py | 8 ++-
librarian/embeds/mathml.py | 5 +-
librarian/epub.py | 60 ++++++++--------
librarian/fb2.py | 5 +-
librarian/functions.py | 4 +-
librarian/html.py | 33 +++++----
librarian/hyphenator.py | 3 +-
librarian/mobi.py | 14 ++--
librarian/packagers.py | 12 ++--
librarian/parser.py | 22 +++---
librarian/partners.py | 13 ++--
librarian/pdf.py | 31 +++++----
librarian/picture.py | 20 +++---
librarian/sponsor.py | 2 +
librarian/text.py | 9 ++-
librarian/util.py | 28 ++++----
scripts/book2cover | 4 +-
scripts/book2epub | 2 +
scripts/book2fb2 | 2 +
scripts/book2html | 2 +
scripts/book2mobi | 2 +
scripts/book2partner | 19 ++---
scripts/book2pdf | 2 +
scripts/book2txt | 2 +
scripts/bookfragments | 6 +-
scripts/fn_qualifiers_list_from_redmine.py | 13 ++--
scripts/genslugs | 10 +--
setup.py | 11 +--
.../dcparser/andersen_brzydkie_kaczatko.out | 2 +-
tests/files/dcparser/biedrzycki_akslop.out | 2 +-
tests/files/dcparser/kochanowski_piesn7.out | 2 +-
tests/files/dcparser/mickiewicz_rybka.out | 2 +-
tests/files/dcparser/sofokles_antygona.out | 2 +-
.../files/text/asnyk_miedzy_nami_expected.fb2 | 46 +++++++++++++
.../files/text/asnyk_miedzy_nami_expected.txt | 2 +
.../text/asnyk_miedzy_nami_expected_raw.txt | 22 ++++++
tests/test_dcparser.py | 16 +++--
tests/test_epub.py | 12 ++++
tests/test_fb2.py | 22 ++++++
tests/test_html.py | 12 ++--
tests/test_html_annotations.py | 58 ++++++++--------
tests/test_html_fragments.py | 8 ++-
tests/test_mobi.py | 20 ++++++
tests/test_pdf.py | 10 +--
tests/test_picture.py | 3 +-
tests/test_text.py | 18 ++++-
tests/utils.py | 1 -
tox.ini | 28 ++++++++
58 files changed, 572 insertions(+), 305 deletions(-)
create mode 100644 CHANGELOG.md
create mode 100644 MANIFEST.in
mode change 100644 => 100755 scripts/fn_qualifiers_list_from_redmine.py
create mode 100644 tests/files/text/asnyk_miedzy_nami_expected.fb2
create mode 100644 tests/files/text/asnyk_miedzy_nami_expected_raw.txt
create mode 100644 tests/test_fb2.py
create mode 100644 tests/test_mobi.py
create mode 100644 tox.ini
diff --git a/.gitignore b/.gitignore
index b6c0f8a..0660acf 100755
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ build
.project
.pydevproject
.settings
+/.tox
+/nosetests.xml
+/htmlcov
diff --git a/AUTHORS.md b/AUTHORS.md
index 70fe140..2eab59f 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -1,9 +1,17 @@
Authors
-------
-Originally written by Marek StÄpniowski
-
-Later contributions:
+List of people who have contributed to the project, in chronological order:
+
+* Marek StÄpniowski
+* Åukasz Rekucki
+* Radek Czajka
+* Åukasz Anwajler
+* Adam Twardoch
+* Marcin Koziej
+* MichaŠGórny
+* Aleksander Åukasz
+* Robert BÅaut
+* Jan Szejko
+
- * Åukasz Rekucki
- * Radek Czajka
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..dbc3209
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,20 @@
+# Change Log
+
+This document records all notable changes to Librarian.
+
+## 1.7 (2019-02-27)
+
+### Added
+- Python 3.4+ support, to existing Python 2.7 support.
+- `coverter_path` argument in `mobi.transform`.
+- Proper packaging info.
+- This changelog.
+- Tox configuration for tests.
+
+### Changed
+- `from_bytes` methods replaced all `from_string` methods,
+ i.e. on: OutputFile, WorkInfo, BookInfo, WLDocument, WLPicture.
+- `get_bytes` replaced `get_string` on OutputFile.
+
+### Removed
+- Shims for Python < 2.7.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..af6efac
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,15 @@
+include *.md
+include LICENSE
+include NOTICE
+include tox.ini
+recursive-include scripts *.py *.css
+recursive-include tests *.py *.xml *.html *.out *.txt *.jpeg
+include librarian/xslt/*.xslt
+include librarian/xslt/*.xml
+include librarian/epub/*
+include librarian/pdf/*
+include librarian/fb2/*
+include librarian/fonts/*
+graft librarian/res
+graft librarian/font-optimizer
+
diff --git a/README.md b/README.md
index c0e13e9..dea2381 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,9 @@ License
![AGPL Logo](http://www.gnu.org/graphics/agplv3-155x51.png)
- Copyright © 2008,2009,2010 Fundacja Nowoczesna Polska
+ Copyright © 2008-2019 Fundacja Nowoczesna Polska
- For full list of contributors see AUTHORS section at the end.
+ For full list of contributors see AUTHORS file.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
@@ -29,10 +29,12 @@ other formats, which are more suitable for presentation.
Currently we support:
- * HTML4, XHTML 1.0
+ * HTML4, XHTML 1.0 (?)
* Plain text
* EPUB (XHTML based)
+ * MOBI
* print-ready PDF
+ * FB2
Other features:
@@ -84,13 +86,3 @@ To convert a file to PDF:
To extract book fragments marked as "theme":
bookfragments file1.xml [file2.xml ...]
-
-
-Authors
--------
-Originally written by Marek StÄpniowski
-
-Later contributions:
-
- * Åukasz Rekucki
- * Radek Czajka
\ No newline at end of file
diff --git a/librarian/__init__.py b/librarian/__init__.py
index 9a9e23e..119b6b1 100644
--- a/librarian/__init__.py
+++ b/librarian/__init__.py
@@ -3,28 +3,28 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from __future__ import with_statement
+from __future__ import print_function, unicode_literals
import os
import re
import shutil
+from tempfile import NamedTemporaryFile
import urllib
-
-from util import makedirs
+from lxml import etree
+import six
+from six.moves.urllib.request import FancyURLopener
+from .util import makedirs
+@six.python_2_unicode_compatible
class UnicodeException(Exception):
def __str__(self):
- """ Dirty workaround for Python Unicode handling problems. """
- return unicode(self).encode('utf-8')
-
- def __unicode__(self):
""" Dirty workaround for Python Unicode handling problems. """
args = self.args[0] if len(self.args) == 1 else self.args
try:
- message = unicode(args)
+ message = six.text_type(args)
except UnicodeDecodeError:
- message = unicode(args, encoding='utf-8', errors='ignore')
+ message = six.text_type(args, encoding='utf-8', errors='ignore')
return message
class ParseError(UnicodeException):
@@ -79,6 +79,7 @@ PLMETNS = XMLNamespace("http://dl.psnc.pl/schemas/plmet/")
WLNS = EmptyNamespace()
+@six.python_2_unicode_compatible
class WLURI(object):
"""Represents a WL URI. Extracts slug from it."""
slug = None
@@ -88,7 +89,7 @@ class WLURI(object):
'(?P[-a-z0-9]+)/?$')
def __init__(self, uri):
- uri = unicode(uri)
+ uri = six.text_type(uri)
self.uri = uri
self.slug = uri.rstrip('/').rsplit('/', 1)[-1]
@@ -104,16 +105,13 @@ class WLURI(object):
def from_slug(cls, slug):
"""Contructs an URI from slug.
- >>> WLURI.from_slug('a-slug').uri
- u'http://wolnelektury.pl/katalog/lektura/a-slug/'
+ >>> print(WLURI.from_slug('a-slug').uri)
+ http://wolnelektury.pl/katalog/lektura/a-slug/
"""
uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
return cls(uri)
- def __unicode__(self):
- return self.uri
-
def __str__(self):
return self.uri
@@ -146,11 +144,10 @@ class DirDocProvider(DocProvider):
def by_slug(self, slug):
fname = slug + '.xml'
- return open(os.path.join(self.dir, fname))
+ return open(os.path.join(self.dir, fname), 'rb')
-import lxml.etree as etree
-import dcparser
+from . import dcparser
DEFAULT_BOOKINFO = dcparser.BookInfo(
{ RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
@@ -175,14 +172,14 @@ DEFAULT_BOOKINFO = dcparser.BookInfo(
def xinclude_forURI(uri):
e = etree.Element(XINS("include"))
e.set("href", uri)
- return etree.tostring(e, encoding=unicode)
+ return etree.tostring(e, encoding='unicode')
def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
"""Wrap the text within the minimal XML structure with a DC template."""
bookinfo.created_at = creation_date
dcstring = etree.tostring(bookinfo.to_etree(), \
- method='xml', encoding=unicode, pretty_print=True)
+ method='xml', encoding='unicode', pretty_print=True)
return u'\n' + dcstring + u'\n\n' + ocrtext + \
u'\n\n'
@@ -192,7 +189,7 @@ def serialize_raw(element):
b = u'' + (element.text or '')
for child in element.iterchildren():
- e = etree.tostring(child, method='xml', encoding=unicode,
+ e = etree.tostring(child, method='xml', encoding='unicode',
pretty_print=True)
b += e
@@ -212,7 +209,7 @@ def get_resource(path):
class OutputFile(object):
"""Represents a file returned by one of the converters."""
- _string = None
+ _bytes = None
_filename = None
def __del__(self):
@@ -220,14 +217,14 @@ class OutputFile(object):
os.unlink(self._filename)
def __nonzero__(self):
- return self._string is not None or self._filename is not None
+ return self._bytes is not None or self._filename is not None
@classmethod
- def from_string(cls, string):
+ def from_bytes(cls, bytestring):
"""Converter returns contents of a file as a string."""
instance = cls()
- instance._string = string
+ instance._bytes = bytestring
return instance
@classmethod
@@ -238,33 +235,31 @@ class OutputFile(object):
instance._filename = filename
return instance
- def get_string(self):
- """Get file's contents as a string."""
+ def get_bytes(self):
+ """Get file's contents as a bytestring."""
if self._filename is not None:
- with open(self._filename) as f:
+ with open(self._filename, 'rb') as f:
return f.read()
else:
- return self._string
+ return self._bytes
def get_file(self):
"""Get file as a file-like object."""
- if self._string is not None:
- from StringIO import StringIO
- return StringIO(self._string)
+ if self._bytes is not None:
+ return six.BytesIO(self._bytes)
elif self._filename is not None:
- return open(self._filename)
+ return open(self._filename, 'rb')
def get_filename(self):
"""Get file as a fs path."""
if self._filename is not None:
return self._filename
- elif self._string is not None:
- from tempfile import NamedTemporaryFile
+ elif self._bytes is not None:
temp = NamedTemporaryFile(prefix='librarian-', delete=False)
- temp.write(self._string)
+ temp.write(self._bytes)
temp.close()
self._filename = temp.name
return self._filename
@@ -279,6 +274,6 @@ class OutputFile(object):
shutil.copy(self.get_filename(), path)
-class URLOpener(urllib.FancyURLopener):
+class URLOpener(FancyURLopener):
version = 'FNP Librarian (http://github.com/fnp/librarian)'
urllib._urlopener = URLOpener()
diff --git a/librarian/book2anything.py b/librarian/book2anything.py
index 0da3b61..948d9fd 100755
--- a/librarian/book2anything.py
+++ b/librarian/book2anything.py
@@ -4,9 +4,11 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import print_function, unicode_literals
+
import os.path
import optparse
-
+import six
from librarian import DirDocProvider, ParseError
from librarian.parser import WLDocument
from librarian.cover import make_cover
@@ -102,7 +104,10 @@ class Book2Anything(object):
try:
for main_input in input_filenames:
if options.verbose:
- print main_input
+ print(main_input)
+
+ if isinstance(main_input, six.binary_type):
+ main_input = main_input.decode('utf-8')
# Where to find input?
if cls.uses_provider:
@@ -126,9 +131,9 @@ class Book2Anything(object):
doc.save_output_file(output, output_file, options.output_dir, options.make_dir, cls.ext)
- except ParseError, e:
- print '%(file)s:%(name)s:%(message)s' % {
+ except ParseError as e:
+ print('%(file)s:%(name)s:%(message)s' % {
'file': main_input,
'name': e.__class__.__name__,
'message': e
- }
+ })
diff --git a/librarian/cover.py b/librarian/cover.py
index 29e24c8..09c8071 100644
--- a/librarian/cover.py
+++ b/librarian/cover.py
@@ -3,9 +3,11 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
import re
from PIL import Image, ImageFont, ImageDraw, ImageFilter
-from StringIO import StringIO
+from six import BytesIO
from librarian import get_resource, OutputFile, URLOpener
@@ -69,7 +71,7 @@ class TextBox(object):
line_width = self.draw.textsize(line, font=font)[0]
line = line.strip() + ' '
- pos_x = (self.max_width - line_width) / 2
+ pos_x = (self.max_width - line_width) // 2
if shadow_color:
self.shadow_draw.text(
@@ -144,7 +146,7 @@ class Cover(object):
if format is not None:
self.format = format
if width and height:
- self.height = height * self.width / width
+ self.height = int(round(height * self.width / width))
scale = max(float(width or 0) / self.width, float(height or 0) / self.height)
if scale >= 1:
self.scale = scale
@@ -171,8 +173,8 @@ class Cover(object):
# WL logo
if metr.logo_width:
logo = Image.open(get_resource('res/wl-logo.png'))
- logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]))
- img.paste(logo, ((metr.width - metr.logo_width) / 2, img.size[1] - logo.size[1] - metr.logo_bottom))
+ logo = logo.resize((metr.logo_width, int(round(logo.size[1] * metr.logo_width / logo.size[0]))))
+ img.paste(logo, ((metr.width - metr.logo_width) // 2, img.size[1] - logo.size[1] - metr.logo_bottom))
top = metr.author_top
tbox = TextBox(
@@ -223,9 +225,9 @@ class Cover(object):
return self.final_image().save(*args, **default_kwargs)
def output_file(self, *args, **kwargs):
- imgstr = StringIO()
+ imgstr = BytesIO()
self.save(imgstr, *args, **kwargs)
- return OutputFile.from_string(imgstr.getvalue())
+ return OutputFile.from_bytes(imgstr.getvalue())
class WLCover(Cover):
@@ -347,9 +349,9 @@ class WLCover(Cover):
elif self.box_position == 'bottom':
box_top = metr.height - metr.box_bottom_margin - box_img.size[1]
else: # Middle.
- box_top = (metr.height - box_img.size[1]) / 2
+ box_top = (metr.height - box_img.size[1]) // 2
- box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) / 2
+ box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) // 2
# Draw the white box.
ImageDraw.Draw(img).rectangle(
@@ -389,17 +391,17 @@ class WLCover(Cover):
if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]:
resized = (
trg_size[0],
- src.size[1] * trg_size[0] / src.size[0]
+ int(round(src.size[1] * trg_size[0] / src.size[0]))
)
- cut = (resized[1] - trg_size[1]) / 2
+ cut = (resized[1] - trg_size[1]) // 2
src = src.resize(resized, Image.ANTIALIAS)
src = src.crop((0, cut, src.size[0], src.size[1] - cut))
else:
resized = (
- src.size[0] * trg_size[1] / src.size[1],
+ int(round(src.size[0] * trg_size[1] / src.size[1])),
trg_size[1],
)
- cut = (resized[0] - trg_size[0]) / 2
+ cut = (resized[0] - trg_size[0]) // 2
src = src.resize(resized, Image.ANTIALIAS)
src = src.crop((cut, 0, src.size[0] - cut, src.size[1]))
@@ -448,11 +450,11 @@ class LogoWLCover(WLCover):
img.paste(gradient, (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask)
cursor = metr.width - metr.gradient_logo_margin_right
- logo_top = metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 - metr.bleed / 2
+ logo_top = int(metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 - metr.bleed / 2)
for logo_path in self.gradient_logos[::-1]:
logo = Image.open(get_resource(logo_path))
logo = logo.resize(
- (logo.size[0] * metr.gradient_logo_height / logo.size[1], metr.gradient_logo_height),
+ (int(round(logo.size[0] * metr.gradient_logo_height / logo.size[1])), metr.gradient_logo_height),
Image.ANTIALIAS)
cursor -= logo.size[0]
img.paste(logo, (cursor, logo_top), mask=logo)
diff --git a/librarian/dcparser.py b/librarian/dcparser.py
index f8dfaf9..eeb750a 100644
--- a/librarian/dcparser.py
+++ b/librarian/dcparser.py
@@ -3,10 +3,14 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from xml.parsers.expat import ExpatError
from datetime import date
+from functools import total_ordering
import time
import re
+import six
from librarian.util import roman_to_int
from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
@@ -16,7 +20,7 @@ import lxml.etree as etree # ElementTree API using libxml2
from lxml.etree import XMLSyntaxError
-class TextPlus(unicode):
+class TextPlus(six.text_type):
pass
@@ -27,6 +31,8 @@ class DatePlus(date):
# ==============
# = Converters =
# ==============
+@six.python_2_unicode_compatible
+@total_ordering
class Person(object):
"""Single person with last name and a list of first names."""
def __init__(self, last_name, *first_names):
@@ -55,13 +61,13 @@ class Person(object):
def __eq__(self, right):
return self.last_name == right.last_name and self.first_names == right.first_names
- def __cmp__(self, other):
- return cmp((self.last_name, self.first_names), (other.last_name, other.first_names))
+ def __lt__(self, other):
+ return (self.last_name, self.first_names) < (other.last_name, other.first_names)
def __hash__(self):
return hash((self.last_name, self.first_names))
- def __unicode__(self):
+ def __str__(self):
if len(self.first_names) > 0:
return '%s, %s' % (self.last_name, ' '.join(self.first_names))
else:
@@ -83,7 +89,7 @@ for now we will translate this to some single date losing information of course.
"""
try:
# check out the "N. poÅ X w." syntax
- if isinstance(text, str):
+ if isinstance(text, six.binary_type):
text = text.decode("utf-8")
century_format = u"(?:([12]) *poÅ[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?"
@@ -94,7 +100,7 @@ for now we will translate this to some single date losing information of course.
if m:
half = m.group(1)
decade = m.group(3)
- century = roman_to_int(str(m.group(2)))
+ century = roman_to_int(m.group(2))
if half is not None:
if decade is not None:
raise ValueError("Bad date format. Cannot specify both half and decade of century")
@@ -114,7 +120,7 @@ for now we will translate this to some single date losing information of course.
raise ValueError
return DatePlus(t[0], t[1], t[2])
- except ValueError, e:
+ except ValueError as e:
raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
@@ -123,7 +129,7 @@ def as_person(text):
def as_unicode(text):
- if isinstance(text, unicode):
+ if isinstance(text, six.text_type):
return text
else:
return TextPlus(text.decode('utf-8'))
@@ -174,7 +180,7 @@ class Field(object):
if hasattr(val[0], 'lang'):
setattr(nv, 'lang', val[0].lang)
return nv
- except ValueError, e:
+ except ValueError as e:
raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
def validate(self, fdict, fallbacks=None, strict=False):
@@ -221,9 +227,7 @@ class DCInfo(type):
return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict)
-class WorkInfo(object):
- __metaclass__ = DCInfo
-
+class WorkInfo(six.with_metaclass(DCInfo, object)):
FIELDS = (
Field(DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
Field(DCNS('title'), 'title'),
@@ -255,9 +259,8 @@ class WorkInfo(object):
)
@classmethod
- def from_string(cls, xml, *args, **kwargs):
- from StringIO import StringIO
- return cls.from_file(StringIO(xml), *args, **kwargs)
+ def from_bytes(cls, xml, *args, **kwargs):
+ return cls.from_file(six.BytesIO(xml), *args, **kwargs)
@classmethod
def from_file(cls, xmlfile, *args, **kwargs):
@@ -282,9 +285,9 @@ class WorkInfo(object):
# extract data from the element and make the info
return cls.from_element(desc_tag, *args, **kwargs)
- except XMLSyntaxError, e:
+ except XMLSyntaxError as e:
raise ParseError(e)
- except ExpatError, e:
+ except ExpatError as e:
raise ParseError(e)
@classmethod
@@ -306,7 +309,7 @@ class WorkInfo(object):
fv = field_dict.get(e.tag, [])
if e.text is not None:
text = e.text
- if not isinstance(text, unicode):
+ if not isinstance(text, six.text_type):
text = text.decode('utf-8')
val = TextPlus(text)
val.lang = e.attrib.get(XMLNS('lang'), lang)
@@ -394,11 +397,11 @@ class WorkInfo(object):
for x in v:
e = etree.Element(field.uri)
if x is not None:
- e.text = unicode(x)
+ e.text = six.text_type(x)
description.append(e)
else:
e = etree.Element(field.uri)
- e.text = unicode(v)
+ e.text = six.text_type(v)
description.append(e)
return root
@@ -413,9 +416,9 @@ class WorkInfo(object):
if field.multiple:
if len(v) == 0:
continue
- v = [unicode(x) for x in v if x is not None]
+ v = [six.text_type(x) for x in v if x is not None]
else:
- v = unicode(v)
+ v = six.text_type(v)
dc[field.name] = {'uri': field.uri, 'value': v}
rdf['fields'] = dc
@@ -430,15 +433,15 @@ class WorkInfo(object):
if field.multiple:
if len(v) == 0:
continue
- v = [unicode(x) for x in v if x is not None]
+ v = [six.text_type(x) for x in v if x is not None]
else:
- v = unicode(v)
+ v = six.text_type(v)
result[field.name] = v
if field.salias:
v = getattr(self, field.salias)
if v is not None:
- result[field.salias] = unicode(v)
+ result[field.salias] = six.text_type(v)
return result
diff --git a/librarian/embeds/__init__.py b/librarian/embeds/__init__.py
index 3b1abdb..fa74530 100644
--- a/librarian/embeds/__init__.py
+++ b/librarian/embeds/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import importlib
from lxml import etree
diff --git a/librarian/embeds/latex.py b/librarian/embeds/latex.py
index 0201d08..8425d03 100644
--- a/librarian/embeds/latex.py
+++ b/librarian/embeds/latex.py
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
import os
import shutil
from subprocess import call, PIPE
@@ -10,14 +12,14 @@ from . import DataEmbed, create_embed, downgrades_to
class LaTeX(DataEmbed):
@downgrades_to('image/png')
def to_png(self):
- tmpl = open(get_resource('res/embeds/latex/template.tex')).read().decode('utf-8')
+ tmpl = open(get_resource('res/embeds/latex/template.tex'), 'rb').read().decode('utf-8')
tempdir = mkdtemp('-librarian-embed-latex')
fpath = os.path.join(tempdir, 'doc.tex')
- with open(fpath, 'w') as f:
+ with open(fpath, 'wb') as f:
f.write((tmpl % {'code': self.data}).encode('utf-8'))
call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim',
os.path.join(tempdir, 'doc.png')])
- pngdata = open(os.path.join(tempdir, 'doc.png')).read()
+ pngdata = open(os.path.join(tempdir, 'doc.png'), 'rb').read()
shutil.rmtree(tempdir)
return create_embed('image/png', data=pngdata)
diff --git a/librarian/embeds/mathml.py b/librarian/embeds/mathml.py
index dd78f05..bd58baf 100644
--- a/librarian/embeds/mathml.py
+++ b/librarian/embeds/mathml.py
@@ -1,5 +1,8 @@
# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
from lxml import etree
+import six
from librarian import get_resource
from . import TreeEmbed, create_embed, downgrades_to
@@ -9,4 +12,4 @@ class MathML(TreeEmbed):
def to_latex(self):
xslt = etree.parse(get_resource('res/embeds/mathml/mathml2latex.xslt'))
output = self.tree.xslt(xslt)
- return create_embed('application/x-latex', data=unicode(output))
+ return create_embed('application/x-latex', data=six.text_type(output))
diff --git a/librarian/epub.py b/librarian/epub.py
index 333b56f..e9670d5 100644
--- a/librarian/epub.py
+++ b/librarian/epub.py
@@ -3,13 +3,13 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from __future__ import with_statement
+from __future__ import print_function, unicode_literals
import os
import os.path
import re
import subprocess
-from StringIO import StringIO
+from six import BytesIO
from copy import deepcopy
from mimetypes import guess_type
@@ -30,7 +30,7 @@ functions.reg_lang_code_3to2()
def squeeze_whitespace(s):
- return re.sub(r'\s+', ' ', s)
+ return re.sub(b'\\s+', b' ', s)
def set_hyph_language(source_tree):
@@ -38,7 +38,7 @@ def set_hyph_language(source_tree):
result = ''
text = ''.join(text)
with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
- for line in f:
+ for line in f.read().decode('latin1').split('\n'):
list = line.strip().split('|')
if list[0] == text:
result = list[2]
@@ -77,12 +77,12 @@ def hyphenate_and_fix_conjunctions(source_tree, hyph):
def inner_xml(node):
""" returns node's text and children as a string
- >>> print inner_xml(etree.fromstring('xyz'))
+ >>> print(inner_xml(etree.fromstring('xyz')))
xyz
"""
nt = node.text if node.text is not None else ''
- return ''.join([nt] + [etree.tostring(child) for child in node])
+ return ''.join([nt] + [etree.tostring(child, encoding='unicode') for child in node])
def set_inner_xml(node, text):
@@ -90,7 +90,7 @@ def set_inner_xml(node, text):
>>> e = etree.fromstring('bxx')
>>> set_inner_xml(e, 'xyz')
- >>> print etree.tostring(e)
+ >>> print(etree.tostring(e, encoding='unicode'))
xyz
"""
@@ -102,7 +102,7 @@ def set_inner_xml(node, text):
def node_name(node):
""" Find out a node's name
- >>> print node_name(etree.fromstring('XYZ'))
+ >>> print(node_name(etree.fromstring('XYZ')))
XYZ
"""
@@ -122,7 +122,7 @@ def xslt(xml, sheet, **kwargs):
xml = etree.ElementTree(xml)
with open(sheet) as xsltf:
transform = etree.XSLT(etree.parse(xsltf))
- params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
+ params = dict((key, transform.strparam(value)) for key, value in kwargs.items())
return transform(xml, **params)
@@ -172,8 +172,8 @@ class Stanza(object):
>>> s = etree.fromstring("a c c/\\nbx/\\nyc/ \\nd")
>>> Stanza(s).versify()
- >>> print etree.tostring(s)
- a c cbx/
+ >>> print(etree.tostring(s, encoding='unicode'))
+ a ccbx/
ycd
"""
@@ -325,8 +325,8 @@ class TOC(object):
return "\n".join(texts)
def html(self):
- with open(get_resource('epub/toc.html')) as f:
- t = unicode(f.read(), 'utf-8')
+ with open(get_resource('epub/toc.html'), 'rb') as f:
+ t = f.read().decode('utf-8')
return t % self.html_part()
@@ -546,16 +546,16 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
mime = zipfile.ZipInfo()
mime.filename = 'mimetype'
mime.compress_type = zipfile.ZIP_STORED
- mime.extra = ''
- zip.writestr(mime, 'application/epub+zip')
+ mime.extra = b''
+ zip.writestr(mime, b'application/epub+zip')
zip.writestr(
'META-INF/container.xml',
- ''
- ''
- ''
- ''
+ b''
+ b''
+ b''
+ b''
)
zip.write(get_resource('res/wl-logo-small.png'),
os.path.join('OPS', 'logo_wolnelektury.png'))
@@ -569,7 +569,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
if cover is True:
cover = make_cover
- cover_file = StringIO()
+ cover_file = BytesIO()
bound_cover = cover(document.book_info)
bound_cover.save(cover_file)
cover_name = 'cover.%s' % bound_cover.ext()
@@ -602,12 +602,12 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
annotations = etree.Element('annotations')
toc_file = etree.fromstring(
- ''
- ''
- ''
+ b''
+ b''
+ b''
)
nav_map = toc_file[-1]
@@ -645,7 +645,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
' '))
spine.append(etree.fromstring(
''))
- html_string = open(get_resource('epub/support.html')).read()
+ html_string = open(get_resource('epub/support.html'), 'rb').read()
chars.update(used_chars(etree.fromstring(html_string)))
zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
@@ -679,7 +679,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
os.path.join(tmpdir, fname)]
env = {"PERL_USE_UNSAFE_INC": "1"}
if verbose:
- print "Running font-optimizer"
+ print("Running font-optimizer")
subprocess.check_call(optimizer_call, env=env)
else:
dev_null = open(os.devnull, 'w')
diff --git a/librarian/fb2.py b/librarian/fb2.py
index 25a4c1f..6dd1c35 100644
--- a/librarian/fb2.py
+++ b/librarian/fb2.py
@@ -3,9 +3,12 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
import os.path
from copy import deepcopy
from lxml import etree
+import six
from librarian import functions, OutputFile
from .epub import replace_by_verse
@@ -62,6 +65,6 @@ def transform(wldoc, verbose=False,
result = document.transform(style)
- return OutputFile.from_string(unicode(result).encode('utf-8'))
+ return OutputFile.from_bytes(six.text_type(result).encode('utf-8'))
# vim:et
diff --git a/librarian/functions.py b/librarian/functions.py
index 75e2911..e5a47d6 100644
--- a/librarian/functions.py
+++ b/librarian/functions.py
@@ -3,6 +3,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from lxml import etree
import re
@@ -112,7 +114,7 @@ def reg_lang_code_3to2():
result = ''
text = ''.join(text)
with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
- for line in f:
+ for line in f.read().decode('latin1').split('\n'):
list = line.strip().split('|')
if list[0] == text:
result = list[2]
diff --git a/librarian/html.py b/librarian/html.py
index a566f71..67f0061 100644
--- a/librarian/html.py
+++ b/librarian/html.py
@@ -3,9 +3,10 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import print_function, unicode_literals
+
import os
import re
-import cStringIO
import copy
from lxml import etree
@@ -13,6 +14,8 @@ from librarian import XHTMLNS, ParseError, OutputFile
from librarian import functions
from lxml.etree import XMLSyntaxError, XSLTApplyError
+import six
+
functions.reg_substitute_entities()
functions.reg_person_name()
@@ -33,11 +36,10 @@ def html_has_content(text):
def transform_abstrakt(abstrakt_element):
- from cStringIO import StringIO
style_filename = get_stylesheet('legacy')
style = etree.parse(style_filename)
xml = etree.tostring(abstrakt_element)
- document = etree.parse(StringIO(xml.replace('abstrakt', 'dlugi_cytat'))) # HACK
+ document = etree.parse(six.BytesIO(xml.replace('abstrakt', 'dlugi_cytat'))) # HACK
result = document.xslt(style)
html = re.sub('', '', etree.tostring(result))
return re.sub('?blockquote[^>]*>', '', html)
@@ -77,16 +79,17 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None):
add_table_of_themes(result.getroot())
add_table_of_contents(result.getroot())
- return OutputFile.from_string(etree.tostring(
+ return OutputFile.from_bytes(etree.tostring(
result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8'))
else:
return None
except KeyError:
raise ValueError("'%s' is not a valid stylesheet.")
- except (XMLSyntaxError, XSLTApplyError), e:
+ except (XMLSyntaxError, XSLTApplyError) as e:
raise ParseError(e)
+@six.python_2_unicode_compatible
class Fragment(object):
def __init__(self, id, themes):
super(Fragment, self).__init__()
@@ -106,7 +109,7 @@ class Fragment(object):
try:
stack.pop()
except IndexError:
- print 'CLOSED NON-OPEN TAG:', element
+ print('CLOSED NON-OPEN TAG:', element)
stack.reverse()
return self.events + stack
@@ -128,7 +131,7 @@ class Fragment(object):
return ''.join(result)
- def __unicode__(self):
+ def __str__(self):
return self.to_string()
@@ -139,7 +142,7 @@ def extract_fragments(input_filename):
# iterparse would die on a HTML document
parser = etree.HTMLParser(encoding='utf-8')
- buf = cStringIO.StringIO()
+ buf = six.BytesIO()
buf.write(etree.tostring(etree.parse(input_filename, parser).getroot()[0][0], encoding='utf-8'))
buf.seek(0)
@@ -173,7 +176,7 @@ def extract_fragments(input_filename):
try:
fragment = open_fragments[element.get('fid')]
except KeyError:
- print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
+ print('%s:closed not open fragment #%s' % (input_filename, element.get('fid')))
else:
closed_fragments[fragment.id] = fragment
del open_fragments[fragment.id]
@@ -207,7 +210,7 @@ def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None
link_text = prefix
anchor = etree.Element('a', href='#%s' % prefix)
anchor.set('class', 'anchor')
- anchor.text = unicode(link_text)
+ anchor.text = six.text_type(link_text)
parent.insert(index, anchor)
if with_target:
@@ -247,7 +250,7 @@ def raw_printable_text(element):
for e in working.findall('a'):
if e.get('class') in ('annotation', 'theme-begin'):
e.text = ''
- return etree.tostring(working, method='text', encoding=unicode).strip()
+ return etree.tostring(working, method='text', encoding='unicode').strip()
def add_table_of_contents(root):
@@ -300,7 +303,7 @@ def add_table_of_themes(root):
theme_names = [s.strip() for s in fragment.text.split(',')]
for theme_name in theme_names:
book_themes.setdefault(theme_name, []).append(fragment.get('name'))
- book_themes = book_themes.items()
+ book_themes = list(book_themes.items())
book_themes.sort(key=lambda s: sortify(s[0]))
themes_div = etree.Element('div', id="themes")
themes_ol = etree.SubElement(themes_div, 'ol')
@@ -326,7 +329,7 @@ def extract_annotations(html_path):
parser = etree.HTMLParser(encoding='utf-8')
tree = etree.parse(html_path, parser)
footnotes = tree.find('//*[@id="footnotes"]')
- re_qualifier = re.compile(ur'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014')
+ re_qualifier = re.compile(r'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014')
if footnotes is not None:
for footnote in footnotes.findall('div'):
fn_type = footnote.get('class').split('-')[1]
@@ -335,8 +338,8 @@ def extract_annotations(html_path):
footnote.text = None
if len(footnote) and footnote[-1].tail == '\n':
footnote[-1].tail = None
- text_str = etree.tostring(footnote, method='text', encoding=unicode).strip()
- html_str = etree.tostring(footnote, method='html', encoding=unicode).strip()
+ text_str = etree.tostring(footnote, method='text', encoding='unicode').strip()
+ html_str = etree.tostring(footnote, method='html', encoding='unicode').strip()
match = re_qualifier.match(text_str)
if match:
diff --git a/librarian/hyphenator.py b/librarian/hyphenator.py
index 18d402b..aa5b4c3 100644
--- a/librarian/hyphenator.py
+++ b/librarian/hyphenator.py
@@ -14,6 +14,7 @@ info@wilbertberendsen.nl
License: LGPL.
"""
+from __future__ import print_function, unicode_literals
import sys
import re
@@ -235,5 +236,5 @@ if __name__ == "__main__":
h = Hyphenator(dict_file, left=1, right=1)
for i in h(word):
- print i
+ print(i)
diff --git a/librarian/mobi.py b/librarian/mobi.py
index c3c8f28..6f1f5d6 100644
--- a/librarian/mobi.py
+++ b/librarian/mobi.py
@@ -3,6 +3,7 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
from copy import deepcopy
import os
@@ -13,13 +14,16 @@ from librarian import OutputFile
def transform(wldoc, verbose=False, sample=None, cover=None,
- use_kindlegen=False, flags=None, hyphenate=True, ilustr_path=''):
+ use_kindlegen=False, flags=None, hyphenate=True, ilustr_path='',
+ converter_path=None):
""" produces a MOBI file
wldoc: a WLDocument
sample=n: generate sample e-book (with at least n paragraphs)
cover: a cover.Cover factory overriding default
flags: less-advertising,
+ converter_path: override path to MOBI converter,
+ either ebook-convert or kindlegen
"""
document = deepcopy(wldoc)
@@ -40,10 +44,12 @@ def transform(wldoc, verbose=False, sample=None, cover=None,
if use_kindlegen:
output_file_basename = os.path.basename(output_file.name)
- subprocess.check_call(['kindlegen', '-c2', epub.get_filename(),
- '-o', output_file_basename], **kwargs)
+ subprocess.check_call([converter_path or 'kindlegen',
+ '-c2', epub.get_filename(),
+ '-o', output_file_basename], **kwargs)
else:
- subprocess.check_call(['ebook-convert', epub.get_filename(),
+ subprocess.check_call([converter_path or 'ebook-convert',
+ epub.get_filename(),
output_file.name, '--no-inline-toc',
'--mobi-file-type=both',
'--mobi-ignore-margins'], **kwargs)
diff --git a/librarian/packagers.py b/librarian/packagers.py
index f57a983..b3f5548 100644
--- a/librarian/packagers.py
+++ b/librarian/packagers.py
@@ -3,11 +3,13 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import print_function, unicode_literals
+
import os
from librarian import pdf, epub, mobi, DirDocProvider, ParseError
from librarian.parser import WLDocument
-from util import makedirs
+from .util import makedirs
class Packager(object):
@@ -39,14 +41,14 @@ class Packager(object):
try:
for main_input in input_filenames:
if verbose:
- print main_input
+ print(main_input)
cls.prepare_file(main_input, output_dir, verbose, overwrite)
- except ParseError, e:
- print '%(file)s:%(name)s:%(message)s' % {
+ except ParseError as e:
+ print('%(file)s:%(name)s:%(message)s' % {
'file': main_input,
'name': e.__class__.__name__,
'message': e.message
- }
+ })
class EpubPackager(Packager):
diff --git a/librarian/parser.py b/librarian/parser.py
index 43cb0a9..73ddd52 100644
--- a/librarian/parser.py
+++ b/librarian/parser.py
@@ -3,6 +3,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian import ValidationError, NoDublinCore, ParseError, NoProvider
from librarian import RDFNS
from librarian.cover import make_cover
@@ -14,7 +16,7 @@ from lxml.etree import XMLSyntaxError, XSLTApplyError
import os
import re
-from StringIO import StringIO
+import six
class WLDocument(object):
@@ -45,14 +47,14 @@ class WLDocument(object):
self.book_info = None
@classmethod
- def from_string(cls, xml, *args, **kwargs):
- return cls.from_file(StringIO(xml), *args, **kwargs)
+ def from_bytes(cls, xml, *args, **kwargs):
+ return cls.from_file(six.BytesIO(xml), *args, **kwargs)
@classmethod
def from_file(cls, xmlfile, *args, **kwargs):
# first, prepare for parsing
- if isinstance(xmlfile, basestring):
+ if isinstance(xmlfile, six.text_type):
file = open(xmlfile, 'rb')
try:
data = file.read()
@@ -61,17 +63,17 @@ class WLDocument(object):
else:
data = xmlfile.read()
- if not isinstance(data, unicode):
+ if not isinstance(data, six.text_type):
data = data.decode('utf-8')
data = data.replace(u'\ufeff', '')
try:
parser = etree.XMLParser(remove_blank_text=False)
- tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+ tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser)
return cls(tree, *args, **kwargs)
- except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+ except (ExpatError, XMLSyntaxError, XSLTApplyError) as e:
raise ParseError(e)
def swap_endlines(self):
@@ -139,7 +141,7 @@ class WLDocument(object):
def serialize(self):
self.update_dc()
- return etree.tostring(self.edoc, encoding=unicode, pretty_print=True)
+ return etree.tostring(self.edoc, encoding='unicode', pretty_print=True)
def merge_chunks(self, chunk_dict):
unmerged = []
@@ -150,7 +152,7 @@ class WLDocument(object):
node = self.edoc.xpath(xpath)[0]
repl = etree.fromstring(u"<%s>%s%s>" % (node.tag, data, node.tag))
node.getparent().replace(node, repl)
- except Exception, e:
+ except Exception as e:
unmerged.append(repr((key, xpath, e)))
return unmerged
@@ -220,7 +222,7 @@ class WLDocument(object):
if output_dir_path:
save_path = output_dir_path
if make_author_dir:
- save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8'))
+ save_path = os.path.join(save_path, six.text_type(self.book_info.author).encode('utf-8'))
save_path = os.path.join(save_path, self.book_info.url.slug)
if ext:
save_path += '.%s' % ext
diff --git a/librarian/partners.py b/librarian/partners.py
index 33198f7..671cf4d 100644
--- a/librarian/partners.py
+++ b/librarian/partners.py
@@ -11,9 +11,10 @@ along with custom cover images etc.
New partners shouldn't be added here, but in the partners repository.
"""
+from __future__ import print_function, unicode_literals
from librarian import packagers, cover
-from util import makedirs
+from .util import makedirs
class GandalfEpub(packagers.EpubPackager):
@@ -79,7 +80,7 @@ class Virtualo(packagers.Packager):
try:
for main_input in input_filenames:
if verbose:
- print main_input
+ print(main_input)
path, fname = os.path.realpath(main_input).rsplit('/', 1)
provider = DirDocProvider(path)
slug, ext = os.path.splitext(fname)
@@ -110,13 +111,13 @@ class Virtualo(packagers.Packager):
doc.save_output_file(
doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
output_path=outfile_sample)
- except ParseError, e:
- print '%(file)s:%(name)s:%(message)s' % {
+ except ParseError as e:
+ print('%(file)s:%(name)s:%(message)s' % {
'file': main_input,
'name': e.__class__.__name__,
'message': e.message
- }
+ })
xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
- xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
+ xml_file.write(etree.tostring(xml, pretty_print=True, encoding='unicode').encode('utf-8'))
xml_file.close()
diff --git a/librarian/pdf.py b/librarian/pdf.py
index d67bddf..e6d897d 100644
--- a/librarian/pdf.py
+++ b/librarian/pdf.py
@@ -9,11 +9,11 @@ Creates one big XML from the book and its children, converts it to LaTeX
with TeXML, then runs it by XeLaTeX.
"""
-from __future__ import with_statement
+from __future__ import print_function, unicode_literals
+
import os
import os.path
import shutil
-from StringIO import StringIO
from tempfile import mkdtemp, NamedTemporaryFile
import re
from copy import deepcopy
@@ -23,6 +23,7 @@ from itertools import chain
from Texml.processor import process
from lxml import etree
from lxml.etree import XMLSyntaxError, XSLTApplyError
+import six
from librarian.dcparser import Person
from librarian.parser import WLDocument
@@ -57,7 +58,7 @@ def insert_tags(doc, split_re, tagname, exclude=None):
>>> t = etree.fromstring('A-B-CX-Y-Z')
>>> insert_tags(t, re.compile('-'), 'd')
- >>> print etree.tostring(t)
+ >>> print(etree.tostring(t, encoding='unicode'))
ABCXYZ
"""
@@ -196,11 +197,11 @@ def package_available(package, args='', verbose=False):
tempdir = mkdtemp('-wl2pdf-test')
fpath = os.path.join(tempdir, 'test.tex')
f = open(fpath, 'w')
- f.write(r"""
- \documentclass{wl}
- \usepackage[%s]{%s}
- \begin{document}
- \end{document}
+ f.write("""
+ \\documentclass{wl}
+ \\usepackage[%s]{%s}
+ \\begin{document}
+ \\end{document}
""" % (args, package))
f.close()
if verbose:
@@ -306,8 +307,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
del document # no longer needed large object :)
tex_path = os.path.join(temp, 'doc.tex')
- fout = open(tex_path, 'w')
- process(StringIO(texml), fout, 'utf-8')
+ fout = open(tex_path, 'wb')
+ process(six.BytesIO(texml), fout, 'utf-8')
fout.close()
del texml
@@ -329,7 +330,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
# some things work better when compiled twice
# (table of contents, [line numbers - disabled])
- for run in xrange(2):
+ for run in range(2):
if verbose:
p = call(['xelatex', tex_path])
else:
@@ -346,7 +347,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
shutil.rmtree(temp)
return OutputFile.from_filename(output_file.name)
- except (XMLSyntaxError, XSLTApplyError), e:
+ except (XMLSyntaxError, XSLTApplyError) as e:
raise ParseError(e)
@@ -361,14 +362,14 @@ def load_including_children(wldoc=None, provider=None, uri=None):
text = f.read().decode('utf-8')
f.close()
elif wldoc is not None:
- text = etree.tostring(wldoc.edoc, encoding=unicode)
+ text = etree.tostring(wldoc.edoc, encoding='unicode')
provider = wldoc.provider
else:
raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
- text = re.sub(ur"([\u0400-\u04ff]+)", ur"\1", text)
+ text = re.sub(r"([\u0400-\u04ff]+)", r"\1", text)
- document = WLDocument.from_string(text, parse_dublincore=True, provider=provider)
+ document = WLDocument.from_bytes(text.encode('utf-8'), parse_dublincore=True, provider=provider)
document.swap_endlines()
for child_uri in document.book_info.parts:
diff --git a/librarian/picture.py b/librarian/picture.py
index 1aa1d07..d255f55 100644
--- a/librarian/picture.py
+++ b/librarian/picture.py
@@ -1,14 +1,16 @@
# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
from operator import and_
-from dcparser import Field, WorkInfo, DCNS
+from .dcparser import Field, WorkInfo, DCNS
from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
from xml.parsers.expat import ExpatError
from os import path
-from StringIO import StringIO
from lxml import etree
from lxml.etree import (XMLSyntaxError, XSLTApplyError, Element)
import re
+import six
class WLPictureURI(WLURI):
@@ -99,14 +101,14 @@ class WLPicture(object):
self.frame = None
@classmethod
- def from_string(cls, xml, *args, **kwargs):
- return cls.from_file(StringIO(xml), *args, **kwargs)
+ def from_bytes(cls, xml, *args, **kwargs):
+ return cls.from_file(six.BytesIO(xml), *args, **kwargs)
@classmethod
def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
# first, prepare for parsing
- if isinstance(xmlfile, basestring):
+ if isinstance(xmlfile, six.text_type):
file = open(xmlfile, 'rb')
try:
data = file.read()
@@ -115,7 +117,7 @@ class WLPicture(object):
else:
data = xmlfile.read()
- if not isinstance(data, unicode):
+ if not isinstance(data, six.text_type):
data = data.decode('utf-8')
data = data.replace(u'\ufeff', '')
@@ -126,12 +128,12 @@ class WLPicture(object):
try:
parser = etree.XMLParser(remove_blank_text=False)
- tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+ tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser)
me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
me.load_frame_info()
return me
- except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+ except (ExpatError, XMLSyntaxError, XSLTApplyError) as e:
raise ParseError(e)
@property
@@ -184,7 +186,7 @@ class WLPicture(object):
pd['coords'] = coords
def want_unicode(x):
- if not isinstance(x, unicode):
+ if not isinstance(x, six.text_type):
return x.decode('utf-8')
else:
return x
diff --git a/librarian/sponsor.py b/librarian/sponsor.py
index c9bc35b..1374cda 100644
--- a/librarian/sponsor.py
+++ b/librarian/sponsor.py
@@ -3,6 +3,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian import get_resource
diff --git a/librarian/text.py b/librarian/text.py
index 4064849..7ba6d29 100644
--- a/librarian/text.py
+++ b/librarian/text.py
@@ -3,10 +3,13 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
import copy
from librarian import functions, OutputFile
from lxml import etree
import os
+import six
functions.reg_substitute_entities()
@@ -103,7 +106,7 @@ def transform(wldoc, flags=None, **options):
'description': description,
'url': url,
'license_description': license_description,
- 'text': unicode(result),
+ 'text': six.text_type(result),
'source': source,
'contributors': contributors,
'funders': funders,
@@ -111,5 +114,5 @@ def transform(wldoc, flags=None, **options):
'isbn': isbn,
}).encode('utf-8')
else:
- result = unicode(result).encode('utf-8')
- return OutputFile.from_string("\r\n".join(result.splitlines()) + "\r\n")
+ result = six.text_type(result).encode('utf-8')
+ return OutputFile.from_bytes(b"\r\n".join(result.splitlines()) + b"\r\n")
diff --git a/librarian/util.py b/librarian/util.py
index 0886fd5..c302084 100644
--- a/librarian/util.py
+++ b/librarian/util.py
@@ -2,6 +2,8 @@
# by Paul Winkler
# http://code.activestate.com/recipes/81611-roman-numerals/
# PSFL (GPL compatible)
+from __future__ import print_function, unicode_literals
+
import os
@@ -18,11 +20,11 @@ def int_to_roman(input):
Traceback (most recent call last):
ValueError: Argument must be between 1 and 3999
- >>> int_to_roman(1.5)
+ >>> int_to_roman(1.5) # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
TypeError: expected integer, got
- >>> for i in range(1, 21): print int_to_roman(i)
+ >>> for i in range(1, 21): print(int_to_roman(i))
...
I
II
@@ -44,15 +46,15 @@ def int_to_roman(input):
XVIII
XIX
XX
- >>> print int_to_roman(2000)
+ >>> print(int_to_roman(2000))
MM
- >>> print int_to_roman(1999)
+ >>> print(int_to_roman(1999))
MCMXCIX
"""
if type(input) != type(1):
- raise TypeError, "expected integer, got %s" % type(input)
+ raise TypeError("expected integer, got %s" % type(input))
if not 0 < input < 4000:
- raise ValueError, "Argument must be between 1 and 3999"
+ raise ValueError("Argument must be between 1 and 3999")
ints = (1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1)
nums = ('M', 'CM', 'D', 'CD','C', 'XC','L','XL','X','IX','V','IV','I')
result = ""
@@ -66,17 +68,17 @@ def roman_to_int(input):
"""
Convert a roman numeral to an integer.
- >>> r = range(1, 4000)
+ >>> r = list(range(1, 4000))
>>> nums = [int_to_roman(i) for i in r]
>>> ints = [roman_to_int(n) for n in nums]
- >>> print r == ints
+ >>> print(r == ints)
1
>>> roman_to_int('VVVIV')
Traceback (most recent call last):
...
ValueError: input is not a valid roman numeral: VVVIV
- >>> roman_to_int(1)
+ >>> roman_to_int(1) # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
TypeError: expected string, got
@@ -90,14 +92,14 @@ def roman_to_int(input):
ValueError: input is not a valid roman numeral: IL
"""
if type(input) != type(""):
- raise TypeError, "expected string, got %s" % type(input)
+ raise TypeError("expected string, got %s" % type(input))
input = input.upper()
nums = ['M', 'D', 'C', 'L', 'X', 'V', 'I']
ints = [1000, 500, 100, 50, 10, 5, 1]
places = []
for c in input:
if not c in nums:
- raise ValueError, "input is not a valid roman numeral: %s" % input
+ raise ValueError("input is not a valid roman numeral: %s" % input)
for i in range(len(input)):
c = input[i]
value = ints[nums.index(c)]
@@ -116,9 +118,9 @@ def roman_to_int(input):
if int_to_roman(sum) == input:
return sum
else:
- raise ValueError, 'input is not a valid roman numeral: %s' % input
+ raise ValueError('input is not a valid roman numeral: %s' % input)
def makedirs(path):
if not os.path.isdir(path):
- os.makedirs(path)
\ No newline at end of file
+ os.makedirs(path)
diff --git a/scripts/book2cover b/scripts/book2cover
index 444563c..a81fc63 100755
--- a/scripts/book2cover
+++ b/scripts/book2cover
@@ -4,8 +4,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from StringIO import StringIO
-from librarian import OutputFile
+from __future__ import unicode_literals
+
from librarian.book2anything import Book2Anything, Option
diff --git a/scripts/book2epub b/scripts/book2epub
index 7a7a41d..5b906b9 100755
--- a/scripts/book2epub
+++ b/scripts/book2epub
@@ -4,6 +4,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian.book2anything import Book2Anything, Option
diff --git a/scripts/book2fb2 b/scripts/book2fb2
index 584ae99..de4615b 100755
--- a/scripts/book2fb2
+++ b/scripts/book2fb2
@@ -4,6 +4,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian.book2anything import Book2Anything
diff --git a/scripts/book2html b/scripts/book2html
index 2c1d04e..f6d459d 100755
--- a/scripts/book2html
+++ b/scripts/book2html
@@ -4,6 +4,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian.book2anything import Book2Anything, Option
diff --git a/scripts/book2mobi b/scripts/book2mobi
index b283309..b0d0686 100755
--- a/scripts/book2mobi
+++ b/scripts/book2mobi
@@ -4,6 +4,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian.book2anything import Book2Anything, Option
diff --git a/scripts/book2partner b/scripts/book2partner
index f1892bb..8982354 100755
--- a/scripts/book2partner
+++ b/scripts/book2partner
@@ -4,20 +4,15 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import print_function, unicode_literals
+
+from collections import OrderedDict
import inspect
import optparse
import os
import sys
from librarian import packagers
-try:
- from collections import OrderedDict
-except ImportError:
- try:
- from django.utils.datastructures import SortedDict
- OrderedDict = SortedDict
- except ImportError:
- OrderedDict = dict
if __name__ == '__main__':
@@ -64,12 +59,12 @@ if __name__ == '__main__':
if inspect.isclass(package) and issubclass(package, packagers.Packager):
packages[package_name] = package
if not packages:
- print 'No packages found!'
+ print('No packages found!')
if options.list_packages:
- print 'Available packages:'
+ print('Available packages:')
for package_name, package in packages.items():
- print ' ', package_name
+ print(' ', package_name)
exit(0)
if len(input_filenames) < 1 or not options.packages:
@@ -79,6 +74,6 @@ if __name__ == '__main__':
used_packages = [packages[p] for p in options.packages.split(',')]
for package in used_packages:
if options.verbose:
- print 'Package:', package.__name__
+ print('Package:', package.__name__)
package.prepare(input_filenames,
options.output_dir, options.verbose, options.overwrite)
diff --git a/scripts/book2pdf b/scripts/book2pdf
index ccb5fac..3c363f1 100755
--- a/scripts/book2pdf
+++ b/scripts/book2pdf
@@ -4,6 +4,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian.book2anything import Book2Anything, Option
diff --git a/scripts/book2txt b/scripts/book2txt
index c706a07..0e84ac9 100755
--- a/scripts/book2txt
+++ b/scripts/book2txt
@@ -4,6 +4,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian.book2anything import Book2Anything, Option
from librarian.parser import WLDocument
diff --git a/scripts/bookfragments b/scripts/bookfragments
index 0d94497..b283297 100755
--- a/scripts/bookfragments
+++ b/scripts/bookfragments
@@ -4,6 +4,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import print_function, unicode_literals
+
import os
import optparse
@@ -29,14 +31,14 @@ if __name__ == '__main__':
# Do some real work
for input_filename in input_filenames:
if options.verbose:
- print input_filename
+ print(input_filename)
output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
closed_fragments, open_fragments = html.extract_fragments(input_filename)
for fragment_id in open_fragments:
- print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
+ print('%s:warning:unclosed fragment #%s' % (input_filename, fragment_id))
output_file = open(output_filename, 'w')
output_file.write("""
diff --git a/scripts/fn_qualifiers_list_from_redmine.py b/scripts/fn_qualifiers_list_from_redmine.py
old mode 100644
new mode 100755
index 020b119..66b00cc
--- a/scripts/fn_qualifiers_list_from_redmine.py
+++ b/scripts/fn_qualifiers_list_from_redmine.py
@@ -5,16 +5,17 @@
This scripts reads the table of footnote qualifiers from Redmine
and produces contents of fn_qualifiers.py â a list of valid qualifiers.
"""
+from __future__ import print_function, unicode_literals
from lxml import etree
-from urllib2 import urlopen
+from six.moves.urllib.request import urlopen
url = 'http://redmine.nowoczesnapolska.org.pl/projects/wl-publikacje/wiki/Lista_skr%C3%B3t%C3%B3w'
parser = etree.HTMLParser()
tree = etree.parse(urlopen(url), parser)
-print """\
+print("""\
# -*- coding: utf-8
\"""
List of standard footnote qualifiers.
@@ -24,12 +25,12 @@ do not edit it.
from __future__ import unicode_literals
-FN_QUALIFIERS = {""".encode('utf-8')
+FN_QUALIFIERS = {""")
for td in tree.findall('//td'):
- print (" '%s': '%s'," % (
+ print((" '%s': '%s'," % (
td[0].text.replace('\\', '\\\\').replace("'", "\\'"),
td[0].tail.strip(' -').replace('\\', '\\\\').replace("'", "\\'")
- )).encode('utf-8')
+ )))
-print """ }""".encode('utf-8')
+print(""" }""")
diff --git a/scripts/genslugs b/scripts/genslugs
index a234096..9745b68 100755
--- a/scripts/genslugs
+++ b/scripts/genslugs
@@ -4,6 +4,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import print_function, unicode_literals
+
import os
import optparse
@@ -36,13 +38,13 @@ if __name__ == '__main__':
# Do some real work
for input_filename in input_filenames:
if options.verbose:
- print input_filename
+ print(input_filename)
doc = etree.parse(input_filename)
try:
title = doc.find('//{http://purl.org/dc/elements/1.1/}title').text
except AttributeError:
- print '%s:error:Book title not found. Skipping.' % input_filename
+ print('%s:error:Book title not found. Skipping.' % input_filename)
continue
parent = ''
@@ -52,14 +54,14 @@ if __name__ == '__main__':
except AttributeError:
pass
except IndexError:
- print '%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url)
+ print('%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url))
book_url = doc.find('//{http://purl.org/dc/elements/1.1/}identifier.url')
if book_url is None:
book_description = doc.find('//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description')
book_url = etree.SubElement(book_description, '{http://purl.org/dc/elements/1.1/}identifier.url')
if not options.force and book_url.text.startswith('http://'):
- print '%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text)
+ print('%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text))
continue
book_url.text = BOOK_URL + slughifi(parent + title)[:60]
diff --git a/setup.py b/setup.py
index 10abe6e..b391f0c 100755
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
#
import os
import os.path
-from distutils.core import setup
+from setuptools import setup
def whole_tree(prefix, path):
files = []
@@ -21,7 +21,7 @@ def whole_tree(prefix, path):
setup(
name='librarian',
- version='1.6',
+ version='1.7',
description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
author="Marek StÄpniowski",
author_email='marek@stepniowski.com',
@@ -29,13 +29,15 @@ setup(
maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl',
url='http://github.com/fnp/librarian',
packages=['librarian', 'librarian.embeds'],
- package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
+ package_data={'librarian': ['xslt/*.xslt', 'xslt/*.xml', 'epub/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res') +
whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')},
include_package_data=True,
install_requires=[
- 'lxml>=2.2',
+ 'lxml>=2.2,<=4.3',
'Pillow',
+ 'six',
+ 'texml',
],
scripts=['scripts/book2html',
'scripts/book2txt',
@@ -47,5 +49,4 @@ setup(
'scripts/book2cover',
'scripts/bookfragments',
'scripts/genslugs'],
- tests_require=['nose>=0.11', 'coverage>=3.0.1'],
)
diff --git a/tests/files/dcparser/andersen_brzydkie_kaczatko.out b/tests/files/dcparser/andersen_brzydkie_kaczatko.out
index c0fb00b..9f07b39 100644
--- a/tests/files/dcparser/andersen_brzydkie_kaczatko.out
+++ b/tests/files/dcparser/andersen_brzydkie_kaczatko.out
@@ -1,5 +1,5 @@
{
- 'publisher': u'Fundacja Nowoczesna Polska',
+ 'publisher': [u'Fundacja Nowoczesna Polska'],
'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Andersen/Brzydkie_kaczÄ
tko',
'source_name': u'Andersen, Hans Christian (1805-1875), BaÅnie, Gebethner i Wolff, wyd. 7, Kraków, 1925',
'author': u'Andersen, Hans Christian',
diff --git a/tests/files/dcparser/biedrzycki_akslop.out b/tests/files/dcparser/biedrzycki_akslop.out
index a7eeffe..588a4b7 100644
--- a/tests/files/dcparser/biedrzycki_akslop.out
+++ b/tests/files/dcparser/biedrzycki_akslop.out
@@ -1,6 +1,6 @@
{
'editors': [u'SekuÅa, Aleksandra'],
- 'publisher': u'Fundacja Nowoczesna Polska',
+ 'publisher': [u'Fundacja Nowoczesna Polska'],
'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop',
'source_name': u'MiÅosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993',
'author': u'Biedrzycki, MiÅosz',
diff --git a/tests/files/dcparser/kochanowski_piesn7.out b/tests/files/dcparser/kochanowski_piesn7.out
index b3eba1e..96198a3 100644
--- a/tests/files/dcparser/kochanowski_piesn7.out
+++ b/tests/files/dcparser/kochanowski_piesn7.out
@@ -1,5 +1,5 @@
{
- 'publisher': u'Fundacja Nowoczesna Polska',
+ 'publisher': [u'Fundacja Nowoczesna Polska'],
'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/PieÅni/PieÅÅ_VII_(1)',
'source_name': u'Kochanowski, Jan (1530-1584), DzieÅa polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, PaÅstwowy Instytut Wydawniczy, Warszawa, 1976',
'author': u'Kochanowski, Jan',
diff --git a/tests/files/dcparser/mickiewicz_rybka.out b/tests/files/dcparser/mickiewicz_rybka.out
index a35f935..f3c76c0 100644
--- a/tests/files/dcparser/mickiewicz_rybka.out
+++ b/tests/files/dcparser/mickiewicz_rybka.out
@@ -1,6 +1,6 @@
{
'editors': [u'SekuÅa, Aleksandra', u'Kallenbach, Józef'],
- 'publisher': u'Fundacja Nowoczesna Polska',
+ 'publisher': [u'Fundacja Nowoczesna Polska'],
'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka',
'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze mÅodzieÅcze - Ballady i romanse - Wiersze do r. 1824), Krakowska SpóÅdzielnia Wydawnicza, wyd. 2 zwiÄkszone, Kraków, 1922',
'author': u'Mickiewicz, Adam',
diff --git a/tests/files/dcparser/sofokles_antygona.out b/tests/files/dcparser/sofokles_antygona.out
index d934602..477988f 100644
--- a/tests/files/dcparser/sofokles_antygona.out
+++ b/tests/files/dcparser/sofokles_antygona.out
@@ -1,6 +1,6 @@
{
'editors': [u'SekuÅa, Aleksandra'],
- 'publisher': u'Fundacja Nowoczesna Polska',
+ 'publisher': [u'Fundacja Nowoczesna Polska'],
'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona',
'source_name': u'Sofokles (496-406 a.C.), Antygona, ZakÅad Narodowy im. OssoliÅskich, wyd. 7, Lwów, 1939',
'author': u'Sofokles',
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.fb2 b/tests/files/text/asnyk_miedzy_nami_expected.fb2
new file mode 100644
index 0000000..b9e4e13
--- /dev/null
+++ b/tests/files/text/asnyk_miedzy_nami_expected.fb2
@@ -0,0 +1,46 @@
+
+
+
+
+ Adam Asnyk
+ MiÄdzy nami nic nie byÅo
+
+
+
+ Utwór opracowany zostaŠw ramach projektu
+ Wolne Lektury
+ przez fundacjÄ
+ Nowoczesna Polska.
+
+
+
+
+
+ MiÄdzy nami nic nie byÅo!
+ Å»adnych zwierzeÅ, wyznaŠżadnych!
+ Nic nas z sobÄ
nie ÅÄ
czyÅo â
+ Prócz wiosennych marzeŠzdradnych;
+
+
+ Prócz tych woni, barw i blasków,
+ UnoszÄ
cych siÄ w przestrzeni;
+ Prócz szumiÄ
cych Åpiewem lasków
+ I tej Åwieżej ÅÄ
k zieleni;
+
+
+ Prócz tych kaskad i potoków,
+ ZraszajÄ
cych każdy parów,
+ Prócz girlandy tÄcz, obÅoków,
+ Prócz natury sÅodkich czarów;
+
+
+ Prócz tych wspólnych, jasnych zdrojów,
+ Z których serce zachwyt piÅo;
+ Prócz pierwiosnków i powojów,â
+ MiÄdzy nami nic nie byÅo!
+
+
+
+
+
+
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.txt b/tests/files/text/asnyk_miedzy_nami_expected.txt
index 3942928..92cc1bd 100644
--- a/tests/files/text/asnyk_miedzy_nami_expected.txt
+++ b/tests/files/text/asnyk_miedzy_nami_expected.txt
@@ -37,6 +37,8 @@ Ten utwór nie jest objÄty majÄ
tkowym prawem autorskim i znajduje siÄ w domen
Tekst opracowany na podstawie: (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone sÅowem wstÄpnym St. KrzemiÅskiego, Warszawa, 1898
+Wydawca: Fundacja Nowoczesna Polska
+
Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez BibliotekÄ NarodowÄ
z egzemplarza pochodzÄ
cego ze zbiorów BN.
Opracowanie redakcyjne i przypisy: Adam Fikcyjny, Aleksandra SekuÅa, Olga Sutkowska.
diff --git a/tests/files/text/asnyk_miedzy_nami_expected_raw.txt b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt
new file mode 100644
index 0000000..cac61d8
--- /dev/null
+++ b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt
@@ -0,0 +1,22 @@
+
+
+MiÄdzy nami nic nie byÅo!
+Å»adnych zwierzeÅ, wyznaŠżadnych!
+Nic nas z sobÄ
nie ÅÄ
czyÅo â
+Prócz wiosennych marzeŠzdradnych;
+
+Prócz tych woni, barw i blasków,
+UnoszÄ
cych siÄ w przestrzeni;
+Prócz szumiÄ
cych Åpiewem lasków
+I tej Åwieżej ÅÄ
k zieleni;
+
+Prócz tych kaskad i potoków,
+ZraszajÄ
cych każdy parów,
+Prócz girlandy tÄcz, obÅoków,
+Prócz natury sÅodkich czarów;
+
+Prócz tych wspólnych, jasnych zdrojów,
+Z których serce zachwyt piÅo;
+Prócz pierwiosnków i powojów,â
+MiÄdzy nami nic nie byÅo!
+
diff --git a/tests/test_dcparser.py b/tests/test_dcparser.py
index cab5b1c..4dab764 100644
--- a/tests/test_dcparser.py
+++ b/tests/test_dcparser.py
@@ -3,6 +3,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian import dcparser
from lxml import etree
from nose.tools import *
@@ -13,9 +15,9 @@ from datetime import date
def check_dcparser(xml_file, result_file):
- xml = file(xml_file).read()
+ xml = open(xml_file, 'rb').read()
result = codecs.open(result_file, encoding='utf-8').read()
- info = dcparser.BookInfo.from_string(xml).to_dict()
+ info = dcparser.BookInfo.from_bytes(xml).to_dict()
should_be = eval(result)
for key in should_be:
assert_equals(info[key], should_be[key])
@@ -28,13 +30,13 @@ def test_dcparser():
def check_serialize(xml_file):
- xml = file(xml_file).read()
- info = dcparser.BookInfo.from_string(xml)
+ xml = open(xml_file, 'rb').read()
+ info = dcparser.BookInfo.from_bytes(xml)
# serialize
- serialized = etree.tostring(info.to_etree(), encoding=unicode).encode('utf-8')
+ serialized = etree.tostring(info.to_etree(), encoding='unicode').encode('utf-8')
# then parse again
- info_bis = dcparser.BookInfo.from_string(serialized)
+ info_bis = dcparser.BookInfo.from_bytes(serialized)
# check if they are the same
for key in vars(info):
@@ -49,7 +51,7 @@ def test_serialize():
def test_asdate():
- assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 03))
+ assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 3))
assert_equals(dcparser.as_date(u"2011"), date(2011, 1, 1))
assert_equals(dcparser.as_date(u"2 poÅ. XIX w."), date(1950, 1, 1))
assert_equals(dcparser.as_date(u"XVII w., l. 20"), date(1720, 1, 1))
diff --git a/tests/test_epub.py b/tests/test_epub.py
index 720fec6..4ac874a 100644
--- a/tests/test_epub.py
+++ b/tests/test_epub.py
@@ -3,6 +3,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from zipfile import ZipFile
from lxml import html
from nose.tools import *
@@ -30,3 +32,13 @@ def test_transform():
u'Opracowanie redakcyjne i przypisy: '
u'Adam Fikcyjny, Aleksandra SekuÅa, Olga Sutkowska.')
assert_true(editors_attribution)
+
+
+def test_transform_hyphenate():
+ epub = WLDocument.from_file(
+ get_fixture('text', 'asnyk_zbior.xml'),
+ provider=DirDocProvider(get_fixture('text', ''))
+ ).as_epub(
+ flags=['without_fonts'],
+ hyphenate=True
+ ).get_file()
diff --git a/tests/test_fb2.py b/tests/test_fb2.py
new file mode 100644
index 0000000..2b8de67
--- /dev/null
+++ b/tests/test_fb2.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from __future__ import unicode_literals
+
+from librarian import NoDublinCore
+from librarian.parser import WLDocument
+from nose.tools import *
+from .utils import get_fixture
+
+
+def test_transform():
+ expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.fb2')
+
+ text = WLDocument.from_file(
+ get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+ ).as_fb2().get_bytes()
+
+ assert_equal(text, open(expected_output_file_path, 'rb').read())
+
diff --git a/tests/test_html.py b/tests/test_html.py
index a0de630..d77d8fe 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -3,10 +3,12 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian import NoDublinCore
from librarian.parser import WLDocument
from nose.tools import *
-from utils import get_fixture
+from .utils import get_fixture
def test_transform():
@@ -14,9 +16,9 @@ def test_transform():
html = WLDocument.from_file(
get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
- ).as_html().get_string()
+ ).as_html().get_bytes()
- assert_equal(html, file(expected_output_file_path).read())
+ assert_equal(html, open(expected_output_file_path, 'rb').read())
@raises(NoDublinCore)
@@ -35,7 +37,7 @@ def test_passing_parse_dublincore_to_transform():
def test_empty():
- assert not WLDocument.from_string(
- '',
+ assert not WLDocument.from_bytes(
+ b'',
parse_dublincore=False,
).as_html()
diff --git a/tests/test_html_annotations.py b/tests/test_html_annotations.py
index 234f297..410577c 100644
--- a/tests/test_html_annotations.py
+++ b/tests/test_html_annotations.py
@@ -21,73 +21,73 @@ def test_annotations():
('', (
'pe',
- [],
- '',
- ''
+ [],
+ '[przypis edytorski]',
+ ' [przypis edytorski]
'
),
'Empty footnote'),
('Definiendum --- definiens.', (
'pr',
- [],
- 'Definiendum \u2014 definiens.',
- 'Definiendum \u2014 definiens.
'
+ [],
+ 'Definiendum \u2014 definiens. [przypis redakcyjny]',
+ 'Definiendum \u2014 definiens. [przypis redakcyjny]
'
),
'Plain footnote.'),
('Definiendum --- definiens.', (
'pt',
- [],
- 'Definiendum \u2014 definiens.',
- 'Definiendum \u2014 definiens.
'
+ [],
+ 'Definiendum \u2014 definiens. [przypis tÅumacza]',
+ 'Definiendum \u2014 definiens. [przypis tÅumacza]
'
),
'Standard footnote.'),
('Definiendum (Åac.) --- definiens.', (
'pr',
- ['Åac.'],
- 'Definiendum (Åac.) \u2014 definiens.',
- 'Definiendum (Åac.) \u2014 definiens.
'
+ ['Åac.'],
+ 'Definiendum (Åac.) \u2014 definiens. [przypis redakcyjny]',
+ 'Definiendum (Åac.) \u2014 definiens. [przypis redakcyjny]
'
),
'Plain footnote with qualifier'),
('Definiendum (Åac.) --- definiens.', (
'pe',
- ['Åac.'],
- 'Definiendum (Åac.) \u2014 definiens.',
- 'Definiendum (Åac.) \u2014 definiens.
'
+ ['Åac.'],
+ 'Definiendum (Åac.) \u2014 definiens. [przypis edytorski]',
+ 'Definiendum (Åac.) \u2014 definiens. [przypis edytorski]
'
),
'Standard footnote with qualifier.'),
(' Definiendum (daw.) --- definiens.', (
'pt',
- ['daw.'],
- 'Definiendum (daw.) \u2014 definiens.',
- ' Definiendum (daw.) \u2014 definiens.
'
+ ['daw.'],
+ 'Definiendum (daw.) \u2014 definiens. [przypis tÅumacza]',
+ ' Definiendum (daw.) \u2014 definiens. [przypis tÅumacza]
'
),
'Standard footnote with leading whitespace and qualifier.'),
('Definiendum (Åac.) --- definiens.', (
'pr',
- ['Åac.'],
- 'Definiendum (Åac.) \u2014 definiens.',
- 'Definiendum (Åac.) \u2014 definiens.
'
+ ['Åac.'],
+ 'Definiendum (Åac.) \u2014 definiens. [przypis redakcyjny]',
+ 'Definiendum (Åac.) \u2014 definiens. [przypis redakcyjny]
'
),
'Plain footnote with qualifier and some emphasis.'),
('Definiendum (Åac.) --- definiens.', (
'pe',
['Åac.'],
- 'Definiendum (Åac.) \u2014 definiens.',
- 'Definiendum (Åac.) \u2014 definiens.
'
+ 'Definiendum (Åac.) \u2014 definiens. [przypis edytorski]',
+ 'Definiendum (Åac.) \u2014 definiens. [przypis edytorski]
'
),
'Standard footnote with qualifier and some emphasis.'),
('Definiendum (Åac.) --- definiens (some) --- more text.', (
'pe',
['Åac.'],
- 'Definiendum (Åac.) \u2014 definiens (some) \u2014 more text.',
- 'Definiendum (Åac.) \u2014 definiens (some) \u2014 more text.
',
+ 'Definiendum (Åac.) \u2014 definiens (some) \u2014 more text. [przypis edytorski]',
+ 'Definiendum (Åac.) \u2014 definiens (some) \u2014 more text. [przypis edytorski]
',
),
'Footnote with a second parentheses and mdash.'),
@@ -96,9 +96,9 @@ def test_annotations():
'pe',
['daw.', 'niem.'],
'gemajna (daw., z niem. gemein: zwykÅy) \u2014 czÄÅciej: gemajn, '
- 'szeregowiec w wojsku polskim cudzoziemskiego autoramentu.',
+ 'szeregowiec w wojsku polskim cudzoziemskiego autoramentu. [przypis edytorski]',
'gemajna (daw., z niem. gemein: zwykÅy) '
- '\u2014 czÄÅciej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.
'
+ '\u2014 czÄÅciej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu. [przypis edytorski]
'
),
'Footnote with multiple and qualifiers and emphasis.'),
@@ -106,7 +106,9 @@ def test_annotations():
xml_src = ''' %s ''' % "".join(
t[0] for t in annotations)
- html = WLDocument.from_string(xml_src, parse_dublincore=False).as_html().get_file()
+ html = WLDocument.from_bytes(
+ xml_src.encode('utf-8'),
+ parse_dublincore=False).as_html().get_file()
res_annotations = list(extract_annotations(html))
for i, (src, expected, name) in enumerate(annotations):
diff --git a/tests/test_html_fragments.py b/tests/test_html_fragments.py
index 3e87a9e..16057bc 100644
--- a/tests/test_html_fragments.py
+++ b/tests/test_html_fragments.py
@@ -3,9 +3,11 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian.html import extract_fragments
from nose.tools import *
-from utils import get_fixture
+from .utils import get_fixture
def test_fragments():
@@ -14,5 +16,5 @@ def test_fragments():
closed_fragments, open_fragments = extract_fragments(
get_fixture('text', 'asnyk_miedzy_nami_expected.html'))
assert not open_fragments
- fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f) for f in closed_fragments.values())
- assert_equal(fragments_text, file(expected_output_file_path).read().decode('utf-8'))
+ fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f) for f in sorted(closed_fragments.values(), key=lambda f: f.id))
+ assert_equal(fragments_text, open(expected_output_file_path, 'rb').read().decode('utf-8'))
diff --git a/tests/test_mobi.py b/tests/test_mobi.py
new file mode 100644
index 0000000..3b29e72
--- /dev/null
+++ b/tests/test_mobi.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from __future__ import unicode_literals
+
+from zipfile import ZipFile
+from lxml import html
+from nose.tools import *
+from librarian import DirDocProvider
+from librarian.parser import WLDocument
+from tests.utils import get_fixture
+
+
+def test_transform():
+ mobi = WLDocument.from_file(
+ get_fixture('text', 'asnyk_zbior.xml'),
+ provider=DirDocProvider(get_fixture('text', ''))
+ ).as_mobi(converter_path='true').get_file()
diff --git a/tests/test_pdf.py b/tests/test_pdf.py
index 5b2dba1..98d1fa6 100644
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -3,11 +3,14 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
+import re
from tempfile import NamedTemporaryFile
from nose.tools import *
from librarian import DirDocProvider
from librarian.parser import WLDocument
-from utils import get_fixture
+from .utils import get_fixture
def test_transform():
@@ -17,9 +20,8 @@ def test_transform():
get_fixture('text', 'asnyk_zbior.xml'),
provider=DirDocProvider(get_fixture('text', ''))
).as_pdf(save_tex=temp.name)
- tex = open(temp.name).read().decode('utf-8')
- print tex
+ tex = open(temp.name, 'rb').read().decode('utf-8')
# Check contributor list.
- editors = re.search(ur'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
+ editors = re.search(r'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
assert_equal(editors.group(1), u"Adam Fikcyjny, Aleksandra SekuÅa, Olga Sutkowska")
diff --git a/tests/test_picture.py b/tests/test_picture.py
index 00b03ce..f97609b 100644
--- a/tests/test_picture.py
+++ b/tests/test_picture.py
@@ -3,6 +3,8 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian import picture, dcparser
from tests.utils import get_all_fixtures, get_fixture
from os import path
@@ -46,7 +48,6 @@ def test_picture_parts():
motifs = set()
names = set()
- print parts
for p in parts:
for m in p['themes']:
motifs.add(m)
diff --git a/tests/test_text.py b/tests/test_text.py
index 70dfb60..14c728f 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -3,10 +3,12 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian import NoDublinCore
from librarian.parser import WLDocument
from nose.tools import *
-from utils import get_fixture
+from .utils import get_fixture
def test_transform():
@@ -14,9 +16,19 @@ def test_transform():
text = WLDocument.from_file(
get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
- ).as_text().get_string()
+ ).as_text().get_bytes()
+
+ assert_equal(text, open(expected_output_file_path, 'rb').read())
+
+
+def test_transform_raw():
+ expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected_raw.txt')
+
+ text = WLDocument.from_file(
+ get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+ ).as_text(flags=['raw-text']).get_bytes()
- assert_equal(text, file(expected_output_file_path).read())
+ assert_equal(text, open(expected_output_file_path, 'rb').read())
@raises(NoDublinCore)
diff --git a/tests/utils.py b/tests/utils.py
index fc87532..7da206c 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -3,7 +3,6 @@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from __future__ import with_statement
from os.path import realpath, join, dirname
import glob
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..5b28a3b
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,28 @@
+[tox]
+envlist =
+ clean,
+ py{27,34,35,36,37},
+ stats
+
+[testenv]
+deps =
+ nose
+ coverage
+passenv = HOME ; Needed to find locally installed fonts when testing PDF production.
+commands =
+ nosetests --with-coverage --cover-package=librarian -d --with-doctest --with-xunit --exe
+install_command = pip install --extra-index-url https://py.mdrn.pl/simple {packages}
+
+[testenv:clean]
+basepython = python2
+commands =
+ coverage erase
+deps = coverage
+
+[testenv:stats]
+basepython = python2
+commands =
+ coverage report
+ coverage html
+deps = coverage
+
--
2.20.1