Housekeeping.
authorRadek Czajka <rczajka@rczajka.pl>
Wed, 10 Jun 2020 14:59:42 +0000 (16:59 +0200)
committerRadek Czajka <rczajka@rczajka.pl>
Wed, 10 Jun 2020 14:59:42 +0000 (16:59 +0200)
src/librarian/__init__.py
src/librarian/embeds/mathml.py
src/librarian/epub.py
src/librarian/parser.py
src/librarian/picture.py
src/librarian/res/text/template.txt [new file with mode: 0644]
src/librarian/text.py
src/librarian/util.py

index 119b6b1..95ea3fe 100644 (file)
@@ -27,20 +27,25 @@ class UnicodeException(Exception):
             message = six.text_type(args, encoding='utf-8', errors='ignore')
         return message
 
             message = six.text_type(args, encoding='utf-8', errors='ignore')
         return message
 
+
 class ParseError(UnicodeException):
     pass
 
 class ParseError(UnicodeException):
     pass
 
+
 class ValidationError(UnicodeException):
     pass
 
 class ValidationError(UnicodeException):
     pass
 
+
 class NoDublinCore(ValidationError):
     """There's no DublinCore section, and it's required."""
     pass
 
 class NoDublinCore(ValidationError):
     """There's no DublinCore section, and it's required."""
     pass
 
+
 class NoProvider(UnicodeException):
     """There's no DocProvider specified, and it's needed."""
     pass
 
 class NoProvider(UnicodeException):
     """There's no DocProvider specified, and it's needed."""
     pass
 
+
 class XMLNamespace(object):
     '''A handy structure to repsent names in an XML namespace.'''
 
 class XMLNamespace(object):
     '''A handy structure to repsent names in an XML namespace.'''
 
@@ -59,6 +64,7 @@ class XMLNamespace(object):
     def __str__(self):
         return '%s' % self.uri
 
     def __str__(self):
         return '%s' % self.uri
 
+
 class EmptyNamespace(XMLNamespace):
     def __init__(self):
         super(EmptyNamespace, self).__init__('')
 class EmptyNamespace(XMLNamespace):
     def __init__(self):
         super(EmptyNamespace, self).__init__('')
@@ -66,6 +72,7 @@ class EmptyNamespace(XMLNamespace):
     def __call__(self, tag):
         return tag
 
     def __call__(self, tag):
         return tag
 
+
 # some common namespaces we use
 XMLNS = XMLNamespace('http://www.w3.org/XML/1998/namespace')
 RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
 # some common namespaces we use
 XMLNS = XMLNamespace('http://www.w3.org/XML/1998/namespace')
 RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
@@ -85,8 +92,10 @@ class WLURI(object):
     slug = None
 
     example = 'http://wolnelektury.pl/katalog/lektura/template/'
     slug = None
 
     example = 'http://wolnelektury.pl/katalog/lektura/template/'
-    _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektur[ay]/'
-            '(?P<slug>[-a-z0-9]+)/?$')
+    _re_wl_uri = re.compile(
+        r'http://(www\.)?wolnelektury.pl/katalog/lektur[ay]/'
+        '(?P<slug>[-a-z0-9]+)/?$'
+    )
 
     def __init__(self, uri):
         uri = six.text_type(uri)
 
     def __init__(self, uri):
         uri = six.text_type(uri)
@@ -149,37 +158,47 @@ class DirDocProvider(DocProvider):
 
 from . import dcparser
 
 
 from . import dcparser
 
+
 DEFAULT_BOOKINFO = dcparser.BookInfo(
 DEFAULT_BOOKINFO = dcparser.BookInfo(
-        { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
-        { DCNS('creator'): [u'Some, Author'],
-          DCNS('title'): [u'Some Title'],
-          DCNS('subject.period'): [u'Unknown'],
-          DCNS('subject.type'): [u'Unknown'],
-          DCNS('subject.genre'): [u'Unknown'],
-          DCNS('date'): ['1970-01-01'],
-          DCNS('language'): [u'pol'],
-          # DCNS('date'): [creation_date],
-          DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
-          DCNS('description'):
-          [u"""Publikacja zrealizowana w ramach projektu
-             Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
-             wykonana przez Bibliotekę Narodową z egzemplarza
-             pochodzącego ze zbiorów BN."""],
-          DCNS('identifier.url'): [WLURI.example],
-          DCNS('rights'):
-            [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })
+    {
+        RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'
+    },
+    {
+        DCNS('creator'): [u'Some, Author'],
+        DCNS('title'): [u'Some Title'],
+        DCNS('subject.period'): [u'Unknown'],
+        DCNS('subject.type'): [u'Unknown'],
+        DCNS('subject.genre'): [u'Unknown'],
+        DCNS('date'): ['1970-01-01'],
+        DCNS('language'): [u'pol'],
+        # DCNS('date'): [creation_date],
+        DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
+        DCNS('description'):
+        [u"""Publikacja zrealizowana w ramach projektu
+        Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
+        wykonana przez Bibliotekę Narodową z egzemplarza
+        pochodzącego ze zbiorów BN."""],
+        DCNS('identifier.url'): [WLURI.example],
+        DCNS('rights'):
+        [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"]
+    }
+)
+
 
 def xinclude_forURI(uri):
     e = etree.Element(XINS("include"))
     e.set("href", uri)
     return etree.tostring(e, encoding='unicode')
 
 
 def xinclude_forURI(uri):
     e = etree.Element(XINS("include"))
     e.set("href", uri)
     return etree.tostring(e, encoding='unicode')
 
+
 def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
     """Wrap the text within the minimal XML structure with a DC template."""
     bookinfo.created_at = creation_date
 
 def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
     """Wrap the text within the minimal XML structure with a DC template."""
     bookinfo.created_at = creation_date
 
-    dcstring = etree.tostring(bookinfo.to_etree(), \
-        method='xml', encoding='unicode', pretty_print=True)
+    dcstring = etree.tostring(
+        bookinfo.to_etree(),  method='xml', encoding='unicode',
+        pretty_print=True
+    )
 
     return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
         u'\n</plain-text>\n</utwor>'
 
     return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
         u'\n</plain-text>\n</utwor>'
@@ -190,18 +209,21 @@ def serialize_raw(element):
 
     for child in element.iterchildren():
         e = etree.tostring(child, method='xml', encoding='unicode',
 
     for child in element.iterchildren():
         e = etree.tostring(child, method='xml', encoding='unicode',
-                pretty_print=True)
+                           pretty_print=True)
         b += e
 
     return b
 
         b += e
 
     return b
 
+
 SERIALIZERS = {
     'raw': serialize_raw,
 }
 
 SERIALIZERS = {
     'raw': serialize_raw,
 }
 
+
 def serialize_children(element, format='raw'):
     return SERIALIZERS[format](element)
 
 def serialize_children(element, format='raw'):
     return SERIALIZERS[format](element)
 
+
 def get_resource(path):
     return os.path.join(os.path.dirname(__file__), path)
 
 def get_resource(path):
     return os.path.join(os.path.dirname(__file__), path)
 
@@ -276,4 +298,6 @@ class OutputFile(object):
 
 class URLOpener(FancyURLopener):
     version = 'FNP Librarian (http://github.com/fnp/librarian)'
 
 class URLOpener(FancyURLopener):
     version = 'FNP Librarian (http://github.com/fnp/librarian)'
+
+
 urllib._urlopener = URLOpener()
 urllib._urlopener = URLOpener()
index 801c213..16fa75b 100644 (file)
@@ -12,13 +12,13 @@ class MathML(TreeEmbed):
     def to_latex(self):
         """
         >>> print(MathML(etree.fromstring(
     def to_latex(self):
         """
         >>> print(MathML(etree.fromstring(
-                '<mat>a &lt; b</mat>'
-            )).to_latex().data.strip())
+        ...     '<mat>a &lt; b</mat>'
+        ... )).to_latex().data.strip())
         a < b
 
         >>> print(MathML(etree.fromstring(
         a < b
 
         >>> print(MathML(etree.fromstring(
-                '<mat>&lt; &amp; &amp;lt; &#65;</mat>'
-            )).to_latex().data.strip())
+        ...     '<mat>&lt; &amp; &amp;lt; &#65;</mat>'
+        ... )).to_latex().data.strip())
         < & &lt; A
 
         """
         < & &lt; A
 
         """
index be9488a..137796e 100644 (file)
@@ -82,7 +82,9 @@ def inner_xml(node):
     """
 
     nt = node.text if node.text is not None else ''
     """
 
     nt = node.text if node.text is not None else ''
-    return ''.join([nt] + [etree.tostring(child, encoding='unicode') for child in node])
+    return ''.join(
+        [nt] + [etree.tostring(child, encoding='unicode') for child in node]
+    )
 
 
 def set_inner_xml(node, text):
 
 
 def set_inner_xml(node, text):
@@ -122,7 +124,10 @@ def xslt(xml, sheet, **kwargs):
         xml = etree.ElementTree(xml)
     with open(sheet) as xsltf:
         transform = etree.XSLT(etree.parse(xsltf))
         xml = etree.ElementTree(xml)
     with open(sheet) as xsltf:
         transform = etree.XSLT(etree.parse(xsltf))
-        params = dict((key, transform.strparam(value)) for key, value in kwargs.items())
+        params = dict(
+            (key, transform.strparam(value))
+            for key, value in kwargs.items()
+        )
         return transform(xml, **params)
 
 
         return transform(xml, **params)
 
 
@@ -170,11 +175,17 @@ class Stanza(object):
     Slashes may only occur directly in the stanza. Any slashes in subelements
     will be ignored, and the subelements will be put inside verse elements.
 
     Slashes may only occur directly in the stanza. Any slashes in subelements
     will be ignored, and the subelements will be put inside verse elements.
 
-    >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
+    >>> s = etree.fromstring(
+    ...         "<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>"
+    ...     )
     >>> Stanza(s).versify()
     >>> Stanza(s).versify()
-    >>> print(etree.tostring(s, encoding='unicode'))
-    <strofa><wers_normalny>a <b>c</b><b>c</b></wers_normalny><wers_normalny>b<x>x/
-    y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
+    >>> print(etree.tostring(s, encoding='unicode', pretty_print=True).strip())
+    <strofa>
+      <wers_normalny>a <b>c</b><b>c</b></wers_normalny>
+      <wers_normalny>b<x>x/
+    y</x>c</wers_normalny>
+      <wers_normalny>d</wers_normalny>
+    </strofa>
 
     """
     def __init__(self, stanza_elem):
 
     """
     def __init__(self, stanza_elem):
@@ -190,7 +201,10 @@ class Stanza(object):
         tail = self.stanza.tail
         self.stanza.clear()
         self.stanza.tail = tail
         tail = self.stanza.tail
         self.stanza.clear()
         self.stanza.tail = tail
-        self.stanza.extend(verse for verse in self.verses if verse.text or len(verse) > 0)
+        self.stanza.extend(
+            verse for verse in self.verses
+            if verse.text or len(verse) > 0
+        )
 
     def open_normal_verse(self):
         self.open_verse = self.stanza.makeelement("wers_normalny")
 
     def open_normal_verse(self):
         self.open_verse = self.stanza.makeelement("wers_normalny")
@@ -249,7 +263,10 @@ def add_to_manifest(manifest, partno):
 def add_to_spine(spine, partno):
     """ Adds a node to the spine section in content.opf file """
 
 def add_to_spine(spine, partno):
     """ Adds a node to the spine section in content.opf file """
 
-    e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
+    e = spine.makeelement(
+        OPFNS('itemref'),
+        attrib={'idref': 'part%d' % partno}
+    )
     spine.append(e)
 
 
     spine.append(e)
 
 
@@ -348,7 +365,8 @@ def chop(main_text):
 
     last_node_part = False
 
 
     last_node_part = False
 
-    # the below loop are workaround for a problem with epubs in drama ebooks without acts
+    # The below loop are workaround for a problem with epubs
+    # in drama ebooks without acts.
     is_scene = False
     is_act = False
     for one_part in main_text:
     is_scene = False
     is_act = False
     for one_part in main_text:
@@ -376,7 +394,10 @@ def chop(main_text):
                 yield part_xml
                 last_node_part = True
                 main_xml_part[:] = [deepcopy(one_part)]
                 yield part_xml
                 last_node_part = True
                 main_xml_part[:] = [deepcopy(one_part)]
-            elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
+            elif (not last_node_part
+                  and name in (
+                      "naglowek_rozdzial", "naglowek_akt", "srodtytul"
+                  )):
                 yield part_xml
                 main_xml_part[:] = [deepcopy(one_part)]
             else:
                 yield part_xml
                 main_xml_part[:] = [deepcopy(one_part)]
             else:
@@ -385,8 +406,12 @@ def chop(main_text):
     yield part_xml
 
 
     yield part_xml
 
 
-def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
-    """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
+def transform_chunk(chunk_xml, chunk_no, annotations, empty=False,
+                    _empty_html_static=[]):
+    """
+    Transforms one chunk, returns a HTML string, a TOC object
+    and a set of used characters.
+    """
 
     toc = TOC()
     for element in chunk_xml[0]:
 
     toc = TOC()
     for element in chunk_xml[0]:
@@ -395,11 +420,13 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
             toc.add(node_name(element), "part%d.html" % chunk_no)
         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
             toc.add(node_name(element), "part%d.html" % chunk_no)
         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
-            subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
+            subnumber = toc.add(node_name(element), "part%d.html" % chunk_no,
+                                level=1, is_part=False)
             element.set('sub', str(subnumber))
     if empty:
         if not _empty_html_static:
             element.set('sub', str(subnumber))
     if empty:
         if not _empty_html_static:
-            _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
+            with open(get_resource('epub/emptyChunk.html')) as f:
+                _empty_html_static.append(f.read())
         chars = set()
         output_html = _empty_html_static[0]
     else:
         chars = set()
         output_html = _empty_html_static[0]
     else:
@@ -417,7 +444,8 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
 
 
 def transform(wldoc, verbose=False, style=None, html_toc=False,
 
 
 def transform(wldoc, verbose=False, style=None, html_toc=False,
-              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
+              sample=None, cover=None, flags=None, hyphenate=False,
+              ilustr_path='', output_type='epub'):
     """ produces a EPUB file
 
     sample=n: generate sample e-book (with at least n paragraphs)
     """ produces a EPUB file
 
     sample=n: generate sample e-book (with at least n paragraphs)
@@ -430,7 +458,9 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
         replace_characters(wldoc.edoc.getroot())
 
 
         replace_characters(wldoc.edoc.getroot())
 
-        hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
+        hyphenator = set_hyph_language(
+            wldoc.edoc.getroot()
+        ) if hyphenate else None
         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 
         # every input file will have a TOC entry,
         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 
         # every input file will have a TOC entry,
@@ -439,7 +469,8 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
         chars = set()
         if first:
             # write book title page
         chars = set()
         if first:
             # write book title page
-            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'),
+                             outputtype=output_type)
             chars = used_chars(html_tree.getroot())
             html_string = etree.tostring(
                 html_tree, pretty_print=True, xml_declaration=True,
             chars = used_chars(html_tree.getroot())
             html_string = etree.tostring(
                 html_tree, pretty_print=True, xml_declaration=True,
@@ -456,15 +487,17 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                 chars = set()
                 html_string = open(get_resource('epub/emptyChunk.html')).read()
             else:
                 chars = set()
                 html_string = open(get_resource('epub/emptyChunk.html')).read()
             else:
-                html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
+                html_tree = xslt(wldoc.edoc,
+                                 get_resource('epub/xsltChunkTitle.xsl'))
                 chars = used_chars(html_tree.getroot())
                 html_string = etree.tostring(
                     html_tree, pretty_print=True, xml_declaration=True,
                     encoding="utf-8",
                 chars = used_chars(html_tree.getroot())
                 html_string = etree.tostring(
                     html_tree, pretty_print=True, xml_declaration=True,
                     encoding="utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
+                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"'
                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                 )
                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                 )
-            zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
+            zip.writestr('OPS/part%d.html' % chunk_counter,
+                         squeeze_whitespace(html_string))
             add_to_manifest(manifest, chunk_counter)
             add_to_spine(spine, chunk_counter)
             chunk_counter += 1
             add_to_manifest(manifest, chunk_counter)
             add_to_spine(spine, chunk_counter)
             chunk_counter += 1
@@ -485,12 +518,16 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                     if sample <= 0:
                         empty = True
                     else:
                     if sample <= 0:
                         empty = True
                     else:
-                        sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
-                chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
+                        sample -= len(chunk_xml.xpath(
+                            '//strofa|//akap|//akap_cd|//akap_dialog'
+                        ))
+                chunk_html, chunk_toc, chunk_chars = transform_chunk(
+                    chunk_xml, chunk_counter, annotations, empty)
 
                 toc.extend(chunk_toc)
                 chars = chars.union(chunk_chars)
 
                 toc.extend(chunk_toc)
                 chars = chars.union(chunk_chars)
-                zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
+                zip.writestr('OPS/part%d.html' % chunk_counter,
+                             squeeze_whitespace(chunk_html))
                 add_to_manifest(manifest, chunk_counter)
                 add_to_spine(spine, chunk_counter)
                 chunk_counter += 1
                 add_to_manifest(manifest, chunk_counter)
                 add_to_spine(spine, chunk_counter)
                 chunk_counter += 1
@@ -524,18 +561,21 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
     if document.book_info.thanks:
         document.edoc.getroot().set('thanks', document.book_info.thanks)
 
     if document.book_info.thanks:
         document.edoc.getroot().set('thanks', document.book_info.thanks)
 
-    opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
+    opf = xslt(document.book_info.to_etree(),
+               get_resource('epub/xsltContent.xsl'))
     manifest = opf.find('.//' + OPFNS('manifest'))
     guide = opf.find('.//' + OPFNS('guide'))
     spine = opf.find('.//' + OPFNS('spine'))
 
     manifest = opf.find('.//' + OPFNS('manifest'))
     guide = opf.find('.//' + OPFNS('guide'))
     spine = opf.find('.//' + OPFNS('spine'))
 
-    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
+    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub',
+                                     delete=False)
     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
     functions.reg_mathml_epub(zip)
 
     if os.path.isdir(ilustr_path):
     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
     functions.reg_mathml_epub(zip)
 
     if os.path.isdir(ilustr_path):
-        ilustr_elements = set(ilustr.get('src') for ilustr in document.edoc.findall('//ilustr'))
+        ilustr_elements = set(ilustr.get('src')
+                              for ilustr in document.edoc.findall('//ilustr'))
         for i, filename in enumerate(os.listdir(ilustr_path)):
             if filename not in ilustr_elements:
                 continue
         for i, filename in enumerate(os.listdir(ilustr_path)):
             if filename not in ilustr_elements:
                 continue
@@ -543,7 +583,9 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
             zip.write(file_path, os.path.join('OPS', filename))
             image_id = 'image%s' % i
             manifest.append(etree.fromstring(
             zip.write(file_path, os.path.join('OPS', filename))
             image_id = 'image%s' % i
             manifest.append(etree.fromstring(
-                '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
+                '<item id="%s" href="%s" media-type="%s" />' % (
+                    image_id, filename, guess_type(file_path)[0])
+            ))
 
     # write static elements
     mime = zipfile.ZipInfo()
 
     # write static elements
     mime = zipfile.ZipInfo()
@@ -590,17 +632,28 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
         if bound_cover.uses_dc_cover:
             if document.book_info.cover_by:
 
         if bound_cover.uses_dc_cover:
             if document.book_info.cover_by:
-                document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
+                document.edoc.getroot().set('data-cover-by',
+                                            document.book_info.cover_by)
             if document.book_info.cover_source:
             if document.book_info.cover_source:
-                document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
+                document.edoc.getroot().set('data-cover-source',
+                                            document.book_info.cover_source)
 
         manifest.append(etree.fromstring(
 
         manifest.append(etree.fromstring(
-            '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
+            '<item id="cover" href="cover.html" '
+            'media-type="application/xhtml+xml" />'
+        ))
         manifest.append(etree.fromstring(
         manifest.append(etree.fromstring(
-            '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
+            '<item id="cover-image" href="%s" media-type="%s" />' % (
+                cover_name, bound_cover.mime_type()
+            )
+        ))
         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
-        opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
-        guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
+        opf.getroot()[0].append(etree.fromstring(
+            '<meta name="cover" content="cover-image"/>'
+        ))
+        guide.append(etree.fromstring(
+            '<reference href="cover.html" type="cover" title="Okładka"/>'
+        ))
 
     annotations = etree.Element('annotations')
 
 
     annotations = etree.Element('annotations')
 
@@ -616,10 +669,14 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
     if html_toc:
         manifest.append(etree.fromstring(
 
     if html_toc:
         manifest.append(etree.fromstring(
-            '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
+            '<item id="html_toc" href="toc.html" '
+            'media-type="application/xhtml+xml" />'
+        ))
         spine.append(etree.fromstring(
             '<itemref idref="html_toc" />'))
         spine.append(etree.fromstring(
             '<itemref idref="html_toc" />'))
-        guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
+        guide.append(etree.fromstring(
+            '<reference href="toc.html" type="toc" title="Spis treści"/>'
+        ))
 
     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 
 
     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 
@@ -630,7 +687,9 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
     if len(annotations) > 0:
         toc.add("Przypisy", "annotations.html")
         manifest.append(etree.fromstring(
     if len(annotations) > 0:
         toc.add("Przypisy", "annotations.html")
         manifest.append(etree.fromstring(
-            '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
+            '<item id="annotations" href="annotations.html" '
+            'media-type="application/xhtml+xml" />'
+        ))
         spine.append(etree.fromstring(
             '<itemref idref="annotations" />'))
         replace_by_verse(annotations)
         spine.append(etree.fromstring(
             '<itemref idref="annotations" />'))
         replace_by_verse(annotations)
@@ -645,7 +704,9 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
     toc.add("Wesprzyj Wolne Lektury", "support.html")
     manifest.append(etree.fromstring(
 
     toc.add("Wesprzyj Wolne Lektury", "support.html")
     manifest.append(etree.fromstring(
-        '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
+        '<item id="support" href="support.html" '
+        'media-type="application/xhtml+xml" />'
+    ))
     spine.append(etree.fromstring(
         '<itemref idref="support" />'))
     html_string = open(get_resource('epub/support.html'), 'rb').read()
     spine.append(etree.fromstring(
         '<itemref idref="support" />'))
     html_string = open(get_resource('epub/support.html'), 'rb').read()
@@ -654,10 +715,13 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
     toc.add("Strona redakcyjna", "last.html")
     manifest.append(etree.fromstring(
 
     toc.add("Strona redakcyjna", "last.html")
     manifest.append(etree.fromstring(
-        '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
+        '<item id="last" href="last.html" '
+        'media-type="application/xhtml+xml" />'
+    ))
     spine.append(etree.fromstring(
         '<itemref idref="last" />'))
     spine.append(etree.fromstring(
         '<itemref idref="last" />'))
-    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
+    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'),
+                     outputtype=output_type)
     chars.update(used_chars(html_tree.getroot()))
     zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
         html_tree, pretty_print=True, xml_declaration=True,
     chars.update(used_chars(html_tree.getroot()))
     zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
         html_tree, pretty_print=True, xml_declaration=True,
@@ -674,8 +738,10 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
         except OSError:
             cwd = None
 
         except OSError:
             cwd = None
 
-        os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
-        for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
+        os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                              'font-optimizer'))
+        for fname in ('DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf',
+                      'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf'):
             optimizer_call = ['perl', 'subset.pl', '--chars',
                               ''.join(chars).encode('utf-8'),
                               get_resource('fonts/' + fname),
             optimizer_call = ['perl', 'subset.pl', '--chars',
                               ''.join(chars).encode('utf-8'),
                               get_resource('fonts/' + fname),
@@ -686,17 +752,22 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                 subprocess.check_call(optimizer_call, env=env)
             else:
                 dev_null = open(os.devnull, 'w')
                 subprocess.check_call(optimizer_call, env=env)
             else:
                 dev_null = open(os.devnull, 'w')
-                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
+                subprocess.check_call(optimizer_call, stdout=dev_null,
+                                      stderr=dev_null, env=env)
             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
             manifest.append(etree.fromstring(
             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
             manifest.append(etree.fromstring(
-                '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
+                '<item id="%s" href="%s" '
+                'media-type="application/x-font-truetype" />'
+                % (fname, fname)
+            ))
         rmtree(tmpdir)
         if cwd is not None:
             os.chdir(cwd)
     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
                  xml_declaration=True, encoding="utf-8"))
     title = document.book_info.title
         rmtree(tmpdir)
         if cwd is not None:
             os.chdir(cwd)
     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
                  xml_declaration=True, encoding="utf-8"))
     title = document.book_info.title
-    attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
+    attributes = ("dtb:uid", "dtb:depth", "dtb:totalPageCount",
+                  "dtb:maxPageNumber")
     for st in attributes:
         meta = toc_file.makeelement(NCXNS('meta'))
         meta.set('name', st)
     for st in attributes:
         meta = toc_file.makeelement(NCXNS('meta'))
         meta.set('name', st)
index 6cce0f7..2bb9509 100644 (file)
@@ -33,16 +33,21 @@ class WLDocument(object):
         dc_path = './/' + RDFNS('RDF')
 
         if root_elem.tag != 'utwor':
         dc_path = './/' + RDFNS('RDF')
 
         if root_elem.tag != 'utwor':
-            raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
+            raise ValidationError(
+                "Invalid root element. Found '%s', should be 'utwor'"
+                % root_elem.tag
+            )
 
         if parse_dublincore:
             self.rdf_elem = root_elem.find(dc_path)
 
             if self.rdf_elem is None:
 
         if parse_dublincore:
             self.rdf_elem = root_elem.find(dc_path)
 
             if self.rdf_elem is None:
-                raise NoDublinCore("Document must have a '%s' element." % RDFNS('RDF'))
+                raise NoDublinCore(
+                    "Document must have a '%s' element." % RDFNS('RDF')
+                )
 
             self.book_info = dcparser.BookInfo.from_element(
 
             self.book_info = dcparser.BookInfo.from_element(
-                    self.rdf_elem, fallbacks=meta_fallbacks, strict=strict)
+                self.rdf_elem, fallbacks=meta_fallbacks, strict=strict)
         else:
             self.book_info = None
 
         else:
             self.book_info = None
 
@@ -103,7 +108,9 @@ class WLDocument(object):
         if self.book_info is None:
             raise NoDublinCore('No Dublin Core in document.')
         for part_uri in self.book_info.parts:
         if self.book_info is None:
             raise NoDublinCore('No Dublin Core in document.')
         for part_uri in self.book_info.parts:
-            yield self.from_file(self.provider.by_uri(part_uri), provider=self.provider)
+            yield self.from_file(
+                self.provider.by_uri(part_uri), provider=self.provider
+            )
 
     def chunk(self, path):
         # convert the path to XPath
 
     def chunk(self, path):
         # convert the path to XPath
@@ -150,7 +157,9 @@ class WLDocument(object):
             try:
                 xpath = self.path_to_xpath(key)
                 node = self.edoc.xpath(xpath)[0]
             try:
                 xpath = self.path_to_xpath(key)
                 node = self.edoc.xpath(xpath)[0]
-                repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
+                repl = etree.fromstring(
+                    "<%s>%s</%s>" % (node.tag, data, node.tag)
+                )
                 node.getparent().replace(node, repl)
             except Exception as e:
                 unmerged.append(repr((key, xpath, e)))
                 node.getparent().replace(node, repl)
             except Exception as e:
                 unmerged.append(repr((key, xpath, e)))
@@ -160,8 +169,9 @@ class WLDocument(object):
     def clean_ed_note(self, note_tag='nota_red'):
         """ deletes forbidden tags from nota_red """
 
     def clean_ed_note(self, note_tag='nota_red'):
         """ deletes forbidden tags from nota_red """
 
-        for node in self.edoc.xpath('|'.join('//%s//%s' % (note_tag, tag) for tag in
-                                    ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))):
+        for node in self.edoc.xpath('|'.join(
+                '//%s//%s' % (note_tag, tag) for tag in
+                ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))):
             tail = node.tail
             node.clear()
             node.tag = 'span'
             tail = node.tail
             node.clear()
             node.tag = 'span'
@@ -174,7 +184,8 @@ class WLDocument(object):
         """
         if self.book_info is None:
             raise NoDublinCore('No Dublin Core in document.')
         """
         if self.book_info is None:
             raise NoDublinCore('No Dublin Core in document.')
-        persons = set(self.book_info.editors + self.book_info.technical_editors)
+        persons = set(self.book_info.editors
+                      + self.book_info.technical_editors)
         for child in self.parts():
             persons.update(child.editors())
         if None in persons:
         for child in self.parts():
             persons.update(child.editors())
         if None in persons:
@@ -218,11 +229,16 @@ class WLDocument(object):
         from librarian import pdf
         return pdf.transform(self, *args, **kwargs)
 
         from librarian import pdf
         return pdf.transform(self, *args, **kwargs)
 
-    def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None):
+    def save_output_file(self, output_file, output_path=None,
+                         output_dir_path=None, make_author_dir=False,
+                         ext=None):
         if output_dir_path:
             save_path = output_dir_path
             if make_author_dir:
         if output_dir_path:
             save_path = output_dir_path
             if make_author_dir:
-                save_path = os.path.join(save_path, six.text_type(self.book_info.author).encode('utf-8'))
+                save_path = os.path.join(
+                    save_path,
+                    six.text_type(self.book_info.author).encode('utf-8')
+                )
             save_path = os.path.join(save_path, self.book_info.url.slug)
             if ext:
                 save_path += '.%s' % ext
             save_path = os.path.join(save_path, self.book_info.url.slug)
             if ext:
                 save_path += '.%s' % ext
index eeb8e8e..93d8cb9 100644 (file)
@@ -181,7 +181,9 @@ class WLPicture(object):
             return [[0, 0], [-1, -1]]
 
         def has_all_props(node, props):
             return [[0, 0], [-1, -1]]
 
         def has_all_props(node, props):
-            return reduce(and_, map(lambda prop: prop in node.attrib, props))
+            return six.moves.reduce(
+                and_, map(lambda prop: prop in node.attrib, props)
+            )
 
         if not has_all_props(area, ['x1', 'x2', 'y1', 'y2']):
             return None
 
         if not has_all_props(area, ['x1', 'x2', 'y1', 'y2']):
             return None
diff --git a/src/librarian/res/text/template.txt b/src/librarian/res/text/template.txt
new file mode 100644 (file)
index 0000000..fa1429c
--- /dev/null
@@ -0,0 +1,12 @@
+%(text)s
+
+
+-----
+Ta lektura, podobnie jak tysiące innych, dostępna jest na stronie wolnelektury.pl.
+Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dostępna jest na stronie %(url)s.
+
+Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowoczesna Polska.
+
+%(license_description)s.%(source)s%(publisher)s
+
+%(description)s%(contributors)s%(funders)s%(isbn)s
index d0531a4..8e3960d 100644 (file)
@@ -6,7 +6,7 @@
 from __future__ import unicode_literals
 
 import copy
 from __future__ import unicode_literals
 
 import copy
-from librarian import functions, OutputFile
+from librarian import functions, OutputFile, get_resource
 from lxml import etree
 import os
 import six
 from lxml import etree
 import os
 import six
@@ -17,20 +17,9 @@ functions.reg_wrap_words()
 functions.reg_strip()
 functions.reg_person_name()
 
 functions.reg_strip()
 functions.reg_person_name()
 
-TEMPLATE = u"""\
-%(text)s
 
 
-
------
-Ta lektura, podobnie jak tysiące innych, dostępna jest na stronie wolnelektury.pl.
-Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dostępna jest na stronie %(url)s.
-
-Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowoczesna Polska.
-
-%(license_description)s.%(source)s%(publisher)s
-
-%(description)s%(contributors)s%(funders)s%(isbn)s
-"""
+with open(get_resource("res/text/template.txt")) as f:
+    TEMPLATE = f.read()
 
 
 def transform(wldoc, flags=None, **options):
 
 
 def transform(wldoc, flags=None, **options):
index c302084..5c9fbc2 100644 (file)
@@ -1,10 +1,11 @@
-# Functions to convert between integers and Roman numerals. Doctest examples included.
-# by Paul Winkler 
+# Functions to convert between integers and Roman numerals.
+# by Paul Winkler
 # http://code.activestate.com/recipes/81611-roman-numerals/
 # PSFL (GPL compatible)
 from __future__ import print_function, unicode_literals
 
 import os
 # http://code.activestate.com/recipes/81611-roman-numerals/
 # PSFL (GPL compatible)
 from __future__ import print_function, unicode_literals
 
 import os
+import six
 
 
 def int_to_roman(input):
 
 
 def int_to_roman(input):
@@ -51,12 +52,13 @@ def int_to_roman(input):
     >>> print(int_to_roman(1999))
     MCMXCIX
     """
     >>> print(int_to_roman(1999))
     MCMXCIX
     """
-    if type(input) != type(1):
+    if not isinstance(input, int):
         raise TypeError("expected integer, got %s" % type(input))
     if not 0 < input < 4000:
         raise ValueError("Argument must be between 1 and 3999")
     ints = (1000, 900,  500, 400, 100,  90, 50,  40, 10,  9,    5,  4,    1)
         raise TypeError("expected integer, got %s" % type(input))
     if not 0 < input < 4000:
         raise ValueError("Argument must be between 1 and 3999")
     ints = (1000, 900,  500, 400, 100,  90, 50,  40, 10,  9,    5,  4,    1)
-    nums = ('M',  'CM', 'D', 'CD','C', 'XC','L','XL','X','IX','V','IV','I')
+    nums = ('M', 'CM', 'D', 'CD', 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV',
+            'I')
     result = ""
     for i in range(len(ints)):
         count = int(input / ints[i])
     result = ""
     for i in range(len(ints)):
         count = int(input / ints[i])
@@ -64,10 +66,11 @@ def int_to_roman(input):
         input -= ints[i] * count
     return result
 
         input -= ints[i] * count
     return result
 
+
 def roman_to_int(input):
     """
     Convert a roman numeral to an integer.
 def roman_to_int(input):
     """
     Convert a roman numeral to an integer.
-    
+
     >>> r = list(range(1, 4000))
     >>> nums = [int_to_roman(i) for i in r]
     >>> ints = [roman_to_int(n) for n in nums]
     >>> r = list(range(1, 4000))
     >>> nums = [int_to_roman(i) for i in r]
     >>> ints = [roman_to_int(n) for n in nums]
@@ -91,21 +94,21 @@ def roman_to_int(input):
      ...
     ValueError: input is not a valid roman numeral: IL
     """
      ...
     ValueError: input is not a valid roman numeral: IL
     """
-    if type(input) != type(""):
+    if not isinstance(input, six.text_type):
         raise TypeError("expected string, got %s" % type(input))
     input = input.upper()
     nums = ['M', 'D', 'C', 'L', 'X', 'V', 'I']
     ints = [1000, 500, 100, 50,  10,  5,    1]
     places = []
     for c in input:
         raise TypeError("expected string, got %s" % type(input))
     input = input.upper()
     nums = ['M', 'D', 'C', 'L', 'X', 'V', 'I']
     ints = [1000, 500, 100, 50,  10,  5,    1]
     places = []
     for c in input:
-        if not c in nums:
+        if c not in nums:
             raise ValueError("input is not a valid roman numeral: %s" % input)
     for i in range(len(input)):
         c = input[i]
         value = ints[nums.index(c)]
         # If the next place holds a larger number, this value is negative.
         try:
             raise ValueError("input is not a valid roman numeral: %s" % input)
     for i in range(len(input)):
         c = input[i]
         value = ints[nums.index(c)]
         # If the next place holds a larger number, this value is negative.
         try:
-            nextvalue = ints[nums.index(input[i +1])]
+            nextvalue = ints[nums.index(input[i + 1])]
             if nextvalue > value:
                 value *= -1
         except IndexError:
             if nextvalue > value:
                 value *= -1
         except IndexError:
@@ -113,7 +116,8 @@ def roman_to_int(input):
             pass
         places.append(value)
     sum = 0
             pass
         places.append(value)
     sum = 0
-    for n in places: sum += n
+    for n in places:
+        sum += n
     # Easiest test for validity...
     if int_to_roman(sum) == input:
         return sum
     # Easiest test for validity...
     if int_to_roman(sum) == input:
         return sum