From 12b5230d8fdb3ad995e867fb5d58a69e8a627e68 Mon Sep 17 00:00:00 2001 From: Jan Szejko Date: Tue, 12 Jan 2016 10:15:59 +0100 Subject: [PATCH] pep8 and other code-style changes --- librarian/__init__.py | 5 +- librarian/book2anything.py | 61 ++++++------- librarian/cover.py | 34 +++----- librarian/dcparser.py | 151 +++++++++++++++++---------------- librarian/embeds/latex.py | 6 +- librarian/embeds/mathml.py | 4 +- librarian/epub.py | 2 +- librarian/fb2.py | 9 +- librarian/functions.py | 40 +++++---- librarian/html.py | 35 +++++--- librarian/packagers.py | 15 ++-- librarian/parser.py | 28 +++--- librarian/partners.py | 22 ++--- librarian/pdf.py | 41 ++++----- librarian/picture.py | 42 ++++----- librarian/sponsor.py | 1 + librarian/text.py | 16 ++-- tests/test_dcparser.py | 3 +- tests/test_epub.py | 3 +- tests/test_html.py | 1 + tests/test_html_annotations.py | 20 ++--- tests/test_html_fragments.py | 4 +- tests/test_pdf.py | 7 +- tests/test_picture.py | 13 ++- tests/utils.py | 2 +- 25 files changed, 285 insertions(+), 280 deletions(-) diff --git a/librarian/__init__.py b/librarian/__init__.py index b257b79..d307def 100644 --- a/librarian/__init__.py +++ b/librarian/__init__.py @@ -10,6 +10,8 @@ import re import shutil import urllib +from wolnelektury.utils import makedirs + class UnicodeException(Exception): def __str__(self): @@ -273,8 +275,7 @@ class OutputFile(object): """Save file to a path. Create directories, if necessary.""" dirname = os.path.dirname(os.path.abspath(path)) - if not os.path.isdir(dirname): - os.makedirs(dirname) + makedirs(dirname) shutil.copy(self.get_filename(), path) diff --git a/librarian/book2anything.py b/librarian/book2anything.py index 55ab599..0dcfd33 100755 --- a/librarian/book2anything.py +++ b/librarian/book2anything.py @@ -4,7 +4,6 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from collections import namedtuple import os.path import optparse @@ -34,16 +33,15 @@ class Book2Anything(object): Subclass it for any format you want to convert to. """ - format_name = None # Set format name, like "PDF". - ext = None # Set file extension, like "pdf". - uses_cover = False # Can it add a cover? - cover_optional = True # Only relevant if uses_cover - uses_provider = False # Does it need a DocProvider? - transform = None # Transform method. Uses WLDocument.as_{ext} by default. - parser_options = [] # List of Option objects for additional parser args. - transform_options = [] # List of Option objects for additional transform args. - transform_flags = [] # List of Option objects for supported transform flags. - + format_name = None # Set format name, like "PDF". + ext = None # Set file extension, like "pdf". + uses_cover = False # Can it add a cover? + cover_optional = True # Only relevant if uses_cover + uses_provider = False # Does it need a DocProvider? + transform = None # Transform method. Uses WLDocument.as_{ext} by default. + parser_options = [] # List of Option objects for additional parser args. + transform_options = [] # List of Option objects for additional transform args. + transform_flags = [] # List of Option objects for supported transform flags. @classmethod def run(cls): @@ -53,27 +51,21 @@ class Book2Anything(object): parser = optparse.OptionParser(usage=usage) - parser.add_option('-v', '--verbose', - action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-d', '--make-dir', - action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the output file in it') - parser.add_option('-o', '--output-file', - dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', - dest='output_dir', metavar='DIR', - help='specifies the directory for output') + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, + help='create a directory for author and put the output file in it') + parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', + help='specifies the output file') + parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', + help='specifies the directory for output') if cls.uses_cover: if cls.cover_optional: - parser.add_option('-c', '--with-cover', - action='store_true', dest='with_cover', default=False, - help='create default cover') - parser.add_option('-C', '--image-cache', - dest='image_cache', metavar='URL', - help='prefix for image download cache' + - (' (implies --with-cover)' if cls.cover_optional else '')) + parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, + help='create default cover') + parser.add_option('-C', '--image-cache', dest='image_cache', metavar='URL', + help='prefix for image download cache' + + (' (implies --with-cover)' if cls.cover_optional else '')) for option in cls.parser_options + cls.transform_options + cls.transform_flags: option.add(parser) @@ -81,7 +73,7 @@ class Book2Anything(object): if len(input_filenames) < 1: parser.print_help() - return(1) + return 1 # Prepare additional args for parser. parser_args = {} @@ -92,8 +84,7 @@ class Book2Anything(object): for option in cls.transform_options: transform_args[option.name()] = option.value(options) # Add flags to transform_args, if any. - transform_flags = [flag.name() for flag in cls.transform_flags - if flag.value(options)] + transform_flags = [flag.name() for flag in cls.transform_flags if flag.value(options)] if transform_flags: transform_args['flags'] = transform_flags if options.verbose: @@ -107,7 +98,6 @@ class Book2Anything(object): elif not cls.cover_optional or options.with_cover: transform_args['cover'] = DefaultEbookCover - # Do some real work try: for main_input in input_filenames: @@ -134,8 +124,7 @@ class Book2Anything(object): transform = getattr(WLDocument, 'as_%s' % cls.ext) output = transform(doc, **transform_args) - doc.save_output_file(output, - output_file, options.output_dir, options.make_dir, cls.ext) + doc.save_output_file(output, output_file, options.output_dir, options.make_dir, cls.ext) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { diff --git a/librarian/cover.py b/librarian/cover.py index 0266b4e..0b7a176 100644 --- a/librarian/cover.py +++ b/librarian/cover.py @@ -4,7 +4,7 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import re -from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance +from PIL import Image, ImageFont, ImageDraw, ImageFilter from StringIO import StringIO from librarian import get_resource, OutputFile, URLOpener @@ -181,8 +181,7 @@ class Cover(object): author_font = ImageFont.truetype( self.author_font_ttf, metr.author_font_size) - tbox.text(self.pretty_author(), self.author_color, author_font, - metr.author_lineskip, self.author_shadow) + tbox.text(self.pretty_author(), self.author_color, author_font, metr.author_lineskip, self.author_shadow) text_img = tbox.image() img.paste(text_img, (metr.author_margin_left, top), text_img) @@ -193,8 +192,7 @@ class Cover(object): ) title_font = ImageFont.truetype( self.title_font_ttf, metr.title_font_size) - tbox.text(self.pretty_title(), self.title_color, title_font, - metr.title_lineskip, self.title_shadow) + tbox.text(self.pretty_title(), self.title_color, title_font, metr.title_lineskip, self.title_shadow) text_img = tbox.image() img.paste(text_img, (metr.title_margin_left, top), text_img) @@ -319,12 +317,11 @@ class WLCover(Cover): font=author_font, line_height=metr.author_lineskip, color=self.author_color, - shadow_color=self.author_shadow, - ) + shadow_color=self.author_shadow) box.skip(metr.box_above_line) box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height), - fill=self.author_color, width=metr.box_line_width) + fill=self.author_color, width=metr.box_line_width) box.skip(metr.box_below_line) # Write title. @@ -334,8 +331,7 @@ class WLCover(Cover): line_height=metr.title_lineskip, font=title_font, color=self.title_color, - shadow_color=self.title_shadow, - ) + shadow_color=self.title_shadow) box_img = box.image() @@ -347,13 +343,11 @@ class WLCover(Cover): else: # Middle. box_top = (metr.height - box_img.size[1]) / 2 - box_left = metr.bar_width + (metr.width - metr.bar_width - - box_img.size[0]) / 2 + box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) / 2 # Draw the white box. - ImageDraw.Draw(img).rectangle((box_left, box_top, - box_left + box_img.size[0], box_top + box_img.size[1]), - fill='#fff') + ImageDraw.Draw(img).rectangle( + (box_left, box_top, box_left + box_img.size[0], box_top + box_img.size[1]), fill='#fff') # Paste the contents into the white box. img.paste(box_img, (box_left, box_top), box_img) return img @@ -412,12 +406,13 @@ class LogoWLCover(WLCover): gradient_mask = Image.new('L', (metr.width - metr.bar_width, metr.gradient_height)) draw = ImageDraw.Draw(gradient_mask) for line in range(0, metr.gradient_height): - draw.line((0, line, metr.width - metr.bar_width, line), fill=int(255 * self.gradient_opacity * line / metr.gradient_height)) - img.paste(gradient, - (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask) + draw.line( + (0, line, metr.width - metr.bar_width, line), + fill=int(255 * self.gradient_opacity * line / metr.gradient_height)) + img.paste(gradient, (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask) cursor = metr.width - metr.gradient_logo_margin_right - logo_top = metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 + logo_top = metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 for logo_path in self.gradient_logos[::-1]: logo = Image.open(get_resource(logo_path)) logo = logo.resize( @@ -514,4 +509,3 @@ class GandalfCover(Cover): DefaultEbookCover = LogoWLCover - diff --git a/librarian/dcparser.py b/librarian/dcparser.py index f413fac..fd3eec5 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -12,13 +12,14 @@ from librarian.util import roman_to_int from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS, XMLNS, WLURI, WLNS, PLMETNS) -import lxml.etree as etree # ElementTree API using libxml2 +import lxml.etree as etree # ElementTree API using libxml2 from lxml.etree import XMLSyntaxError class TextPlus(unicode): pass + class DatePlus(date): pass @@ -34,7 +35,7 @@ class Person(object): @classmethod def from_text(cls, text): - parts = [ token.strip() for token in text.split(',') ] + parts = [token.strip() for token in text.split(',')] if len(parts) == 1: surname = parts[0] names = [] @@ -45,7 +46,7 @@ class Person(object): if len(parts[1]) == 0: # there is no non-whitespace data after the comma raise ValueError("Found a comma, but no names given: \"%s\" -> %r." % (text, parts)) - names = [ name for name in parts[1].split() if len(name) ] # all non-whitespace tokens + names = [name for name in parts[1].split() if len(name)] # all non-whitespace tokens return cls(surname, *names) def readable(self): @@ -69,6 +70,7 @@ class Person(object): def __repr__(self): return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names) + def as_date(text): """Dates for digitization of pictures. It seems we need the following: ranges: '1350-1450', @@ -81,7 +83,8 @@ for now we will translate this to some single date losing information of course. """ try: # check out the "N. poł X w." syntax - if isinstance(text, str): text = text.decode("utf-8") + if isinstance(text, str): + text = text.decode("utf-8") century_format = u"(?:([12]) *poł[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?" vague_format = u"(?:po *|ok. *)?([0-9]{4})(-[0-9]{2}-[0-9]{2})?" @@ -114,18 +117,22 @@ for now we will translate this to some single date losing information of course. except ValueError, e: raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.") + def as_person(text): return Person.from_text(text) + def as_unicode(text): if isinstance(text, unicode): return text else: return TextPlus(text.decode('utf-8')) + def as_wluri_strict(text): return WLURI.strict(text) + class Field(object): def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs): self.uri = uri @@ -135,7 +142,7 @@ class Field(object): self.multiple = multiple self.salias = salias - self.required = kwargs.get('required', True) and not kwargs.has_key('default') + self.required = kwargs.get('required', True) and 'default' not in kwargs self.default = kwargs.get('default', [] if multiple else [None]) def validate_value(self, val, strict=False): @@ -173,7 +180,7 @@ class Field(object): def validate(self, fdict, fallbacks=None, strict=False): if fallbacks is None: fallbacks = {} - if not fdict.has_key(self.uri): + if self.uri not in fdict: if not self.required: # Accept single value for single fields and saliases. if self.name in fallbacks: @@ -199,7 +206,7 @@ class Field(object): class DCInfo(type): - def __new__(meta, classname, bases, class_dict): + def __new__(mcs, classname, bases, class_dict): fields = list(class_dict['FIELDS']) for base in bases[::-1]: @@ -211,41 +218,40 @@ class DCInfo(type): fields.insert(0, field) class_dict['FIELDS'] = tuple(fields) - return super(DCInfo, meta).__new__(meta, classname, bases, class_dict) + return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict) class WorkInfo(object): __metaclass__ = DCInfo FIELDS = ( - Field( DCNS('creator'), 'authors', as_person, salias='author', multiple=True), - Field( DCNS('title'), 'title'), - Field( DCNS('type'), 'type', required=False, multiple=True), - - Field( DCNS('contributor.editor'), 'editors', \ - as_person, salias='editor', multiple=True, default=[]), - Field( DCNS('contributor.technical_editor'), 'technical_editors', - as_person, salias='technical_editor', multiple=True, default=[]), - Field( DCNS('contributor.funding'), 'funders', - salias='funder', multiple=True, default=[]), - Field( DCNS('contributor.thanks'), 'thanks', required=False), - - Field( DCNS('date'), 'created_at'), - Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False), - Field( DCNS('publisher'), 'publisher'), - - Field( DCNS('language'), 'language'), - Field( DCNS('description'), 'description', required=False), - - Field( DCNS('source'), 'source_name', required=False), - Field( DCNS('source.URL'), 'source_url', required=False), - Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict), - Field( DCNS('rights.license'), 'license', required=False), - Field( DCNS('rights'), 'license_description'), - - Field( PLMETNS('digitisationSponsor'), 'sponsors', multiple=True, default=[]), - Field( WLNS('digitisationSponsorNote'), 'sponsor_note', required=False), - Field( WLNS('developmentStage'), 'stage', required=False), + Field(DCNS('creator'), 'authors', as_person, salias='author', multiple=True), + Field(DCNS('title'), 'title'), + Field(DCNS('type'), 'type', required=False, multiple=True), + + Field(DCNS('contributor.editor'), 'editors', + as_person, salias='editor', multiple=True, default=[]), + Field(DCNS('contributor.technical_editor'), 'technical_editors', + as_person, salias='technical_editor', multiple=True, default=[]), + Field(DCNS('contributor.funding'), 'funders', salias='funder', multiple=True, default=[]), + Field(DCNS('contributor.thanks'), 'thanks', required=False), + + Field(DCNS('date'), 'created_at'), + Field(DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False), + Field(DCNS('publisher'), 'publisher'), + + Field(DCNS('language'), 'language'), + Field(DCNS('description'), 'description', required=False), + + Field(DCNS('source'), 'source_name', required=False), + Field(DCNS('source.URL'), 'source_url', required=False), + Field(DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict), + Field(DCNS('rights.license'), 'license', required=False), + Field(DCNS('rights'), 'license_description'), + + Field(PLMETNS('digitisationSponsor'), 'sponsors', multiple=True, default=[]), + Field(WLNS('digitisationSponsorNote'), 'sponsor_note', required=False), + Field(WLNS('developmentStage'), 'stage', required=False), ) @classmethod @@ -320,11 +326,11 @@ class WorkInfo(object): self.fmap = {} for field in self.FIELDS: - value = field.validate(dc_fields, fallbacks=fallbacks, - strict=strict) + value = field.validate(dc_fields, fallbacks=fallbacks, strict=strict) setattr(self, 'prop_' + field.name, value) self.fmap[field.name] = field - if field.salias: self.fmap[field.salias] = field + if field.salias: + self.fmap[field.salias] = field def __getattribute__(self, name): try: @@ -332,7 +338,7 @@ class WorkInfo(object): value = object.__getattribute__(self, 'prop_'+field.name) if field.name == name: return value - else: # singular alias + else: # singular alias if not field.multiple: raise "OUCH!! for field %s" % name @@ -345,7 +351,7 @@ class WorkInfo(object): field = object.__getattribute__(self, 'fmap')[name] if field.name == name: object.__setattr__(self, 'prop_'+field.name, newvalue) - else: # singular alias + else: # singular alias if not field.multiple: raise "OUCH! while setting field %s" % name @@ -357,13 +363,13 @@ class WorkInfo(object): """Update using field_dict. Verify correctness, but don't check if all required fields are present.""" for field in self.FIELDS: - if field_dict.has_key(field.name): + if field.name in field_dict: setattr(self, field.name, field_dict[field.name]) - def to_etree(self, parent = None): + def to_etree(self, parent=None): """XML representation of this object.""" - #etree._namespace_map[str(self.RDF)] = 'rdf' - #etree._namespace_map[str(self.DC)] = 'dc' + # etree._namespace_map[str(self.RDF)] = 'rdf' + # etree._namespace_map[str(self.DC)] = 'dc' if parent is None: root = etree.Element(RDFNS('RDF')) @@ -379,7 +385,8 @@ class WorkInfo(object): v = getattr(self, field.name, None) if v is not None: if field.multiple: - if len(v) == 0: continue + if len(v) == 0: + continue for x in v: e = etree.Element(field.uri) if x is not None: @@ -393,16 +400,16 @@ class WorkInfo(object): return root def serialize(self): - rdf = {} - rdf['about'] = { 'uri': RDFNS('about'), 'value': self.about } + rdf = {'about': {'uri': RDFNS('about'), 'value': self.about}} dc = {} for field in self.FIELDS: v = getattr(self, field.name, None) if v is not None: if field.multiple: - if len(v) == 0: continue - v = [ unicode(x) for x in v if x is not None ] + if len(v) == 0: + continue + v = [unicode(x) for x in v if x is not None] else: v = unicode(v) @@ -417,44 +424,40 @@ class WorkInfo(object): if v is not None: if field.multiple: - if len(v) == 0: continue - v = [ unicode(x) for x in v if x is not None ] + if len(v) == 0: + continue + v = [unicode(x) for x in v if x is not None] else: v = unicode(v) result[field.name] = v if field.salias: v = getattr(self, field.salias) - if v is not None: result[field.salias] = unicode(v) + if v is not None: + result[field.salias] = unicode(v) return result class BookInfo(WorkInfo): FIELDS = ( - Field( DCNS('audience'), 'audiences', salias='audience', multiple=True, - required=False), - - Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, - required=False), - Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True, - required=False), - Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True, - required=False), + Field(DCNS('audience'), 'audiences', salias='audience', multiple=True, required=False), + + Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, required=False), + Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True, required=False), + Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, required=False), - Field( DCNS('contributor.translator'), 'translators', \ - as_person, salias='translator', multiple=True, default=[]), - Field( DCNS('relation.hasPart'), 'parts', - WLURI, strict=as_wluri_strict, multiple=True, required=False), - Field( DCNS('relation.isVariantOf'), 'variant_of', - WLURI, strict=as_wluri_strict, required=False), - - Field( DCNS('relation.coverImage.url'), 'cover_url', required=False), - Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False), - Field( DCNS('relation.coverImage.source'), 'cover_source', required=False), + Field(DCNS('contributor.translator'), 'translators', + as_person, salias='translator', multiple=True, default=[]), + Field(DCNS('relation.hasPart'), 'parts', WLURI, strict=as_wluri_strict, multiple=True, required=False), + Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI, strict=as_wluri_strict, required=False), + + Field(DCNS('relation.coverImage.url'), 'cover_url', required=False), + Field(DCNS('relation.coverImage.attribution'), 'cover_by', required=False), + Field(DCNS('relation.coverImage.source'), 'cover_source', required=False), # WLCover-specific. - Field( WLNS('coverBarColor'), 'cover_bar_color', required=False), - Field( WLNS('coverBoxPosition'), 'cover_box_position', required=False), + Field(WLNS('coverBarColor'), 'cover_bar_color', required=False), + Field(WLNS('coverBoxPosition'), 'cover_box_position', required=False), ) diff --git a/librarian/embeds/latex.py b/librarian/embeds/latex.py index e10d165..0201d08 100644 --- a/librarian/embeds/latex.py +++ b/librarian/embeds/latex.py @@ -1,9 +1,11 @@ +# -*- coding: utf-8 -*- import os import shutil from subprocess import call, PIPE from tempfile import mkdtemp from librarian import get_resource -from . import DataEmbed, create_embed, downgrades_to, converts_to +from . import DataEmbed, create_embed, downgrades_to + class LaTeX(DataEmbed): @downgrades_to('image/png') @@ -15,7 +17,7 @@ class LaTeX(DataEmbed): f.write((tmpl % {'code': self.data}).encode('utf-8')) call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE) call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim', - os.path.join(tempdir, 'doc.png')]) + os.path.join(tempdir, 'doc.png')]) pngdata = open(os.path.join(tempdir, 'doc.png')).read() shutil.rmtree(tempdir) return create_embed('image/png', data=pngdata) diff --git a/librarian/embeds/mathml.py b/librarian/embeds/mathml.py index f99f979..dd78f05 100644 --- a/librarian/embeds/mathml.py +++ b/librarian/embeds/mathml.py @@ -1,6 +1,8 @@ +# -*- coding: utf-8 -*- from lxml import etree from librarian import get_resource -from . import TreeEmbed, create_embed, downgrades_to, converts_to +from . import TreeEmbed, create_embed, downgrades_to + class MathML(TreeEmbed): @downgrades_to('application/x-latex') diff --git a/librarian/epub.py b/librarian/epub.py index 6a1b2ce..bf2d4d9 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -646,7 +646,7 @@ def transform(wldoc, verbose=False, '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' )) - if not flags or not 'without-fonts' in flags: + if not flags or 'without-fonts' not in flags: # strip fonts tmpdir = mkdtemp('-librarian-epub') try: diff --git a/librarian/fb2.py b/librarian/fb2.py index a1cece4..25a4c1f 100644 --- a/librarian/fb2.py +++ b/librarian/fb2.py @@ -17,10 +17,11 @@ functions.reg_person_name() def sectionify(tree): """Finds section headers and adds a tree of _section tags.""" - sections = ['naglowek_czesc', - 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena', - 'naglowek_podrozdzial'] - section_level = dict((v,k) for (k,v) in enumerate(sections)) + sections = [ + 'naglowek_czesc', + 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena', + 'naglowek_podrozdzial'] + section_level = dict((v, k) for (k, v) in enumerate(sections)) # We can assume there are just subelements an no text at section level. for level, section_name in reversed(list(enumerate(sections))): diff --git a/librarian/functions.py b/librarian/functions.py index 659bb94..75e2911 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -9,6 +9,7 @@ import re from librarian.dcparser import Person from librarian import get_resource + def _register_function(f): """ Register extension function with lxml """ ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') @@ -16,7 +17,7 @@ def _register_function(f): def reg_substitute_entities(): - ENTITY_SUBSTITUTIONS = [ + entity_substitutions = [ (u'---', u'—'), (u'--', u'–'), (u'...', u'…'), @@ -28,7 +29,7 @@ def reg_substitute_entities(): """XPath extension function converting all entites in passed text.""" if isinstance(text, list): text = ''.join(text) - for entity, substitutution in ENTITY_SUBSTITUTIONS: + for entity, substitutution in entity_substitutions: text = text.replace(entity, substitutution) return text @@ -103,22 +104,23 @@ def reg_texcommand(): text = ''.join(text) return re.sub(r'[^a-zA-Z]', '', text).strip() _register_function(texcommand) - + + def reg_lang_code_3to2(): - def lang_code_3to2(context, text): - """Convert 3-letter language code to 2-letter code""" - result = '' - text = ''.join(text) - with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f: - for line in f: - list = line.strip().split('|') - if list[0] == text: - result=list[2] - if result == '': - return text - else: - return result - _register_function(lang_code_3to2) + def lang_code_3to2(context, text): + """Convert 3-letter language code to 2-letter code""" + result = '' + text = ''.join(text) + with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f: + for line in f: + list = line.strip().split('|') + if list[0] == text: + result = list[2] + if result == '': + return text + else: + return result + _register_function(lang_code_3to2) def mathml_latex(context, trees): @@ -128,11 +130,14 @@ def mathml_latex(context, trees): text = text.replace(u'\u2062', '') return text + def reg_mathml_latex(): _register_function(mathml_latex) + def reg_mathml_epub(zipf): from librarian.embeds.mathml import MathML + def mathml(context, trees): data = MathML(trees[0]).to_latex().to_png().data name = "math%d.png" % mathml.count @@ -141,4 +146,3 @@ def reg_mathml_epub(zipf): return name mathml.count = 0 _register_function(mathml) - diff --git a/librarian/html.py b/librarian/html.py index 24d506f..6a6e3db 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -23,12 +23,15 @@ STYLESHEETS = { 'partial': 'xslt/wl2html_partial.xslt' } + def get_stylesheet(name): return os.path.join(os.path.dirname(__file__), STYLESHEETS[name]) + def html_has_content(text): return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text) + def transform(wldoc, stylesheet='legacy', options=None, flags=None): """Transforms the WL document to XHTML. @@ -55,15 +58,15 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None): if not options: options = {} result = document.transform(style, **options) - del document # no longer needed large object :) + del document # no longer needed large object :) if html_has_content(result): add_anchors(result.getroot()) add_table_of_themes(result.getroot()) add_table_of_contents(result.getroot()) - return OutputFile.from_string(etree.tostring(result, method='html', - xml_declaration=False, pretty_print=True, encoding='utf-8')) + return OutputFile.from_string(etree.tostring( + result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8')) else: return None except KeyError: @@ -71,6 +74,7 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None): except (XMLSyntaxError, XSLTApplyError), e: raise ParseError(e) + class Fragment(object): def __init__(self, id, themes): super(Fragment, self).__init__() @@ -99,7 +103,8 @@ class Fragment(object): result = [] for event, element in self.closed_events(): if event == 'start': - result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items()))) + result.append(u'<%s %s>' % ( + element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items()))) if element.text: result.append(element.text) elif event == 'end': @@ -129,7 +134,8 @@ def extract_fragments(input_filename): for event, element in etree.iterparse(buf, events=('start', 'end')): # Process begin and end elements if element.get('class', '') in ('theme-begin', 'theme-end'): - if not event == 'end': continue # Process elements only once, on end event + if not event == 'end': + continue # Process elements only once, on end event # Open new fragment if element.get('class', '') == 'theme-begin': @@ -165,7 +171,6 @@ def extract_fragments(input_filename): for fragment_id in open_fragments: open_fragments[fragment_id].append('text', element.tail) - # Process all elements except begin and end else: # Omit annotation tags @@ -210,9 +215,10 @@ def any_ancestor(element, test): def add_anchors(root): counter = 1 for element in root.iterdescendants(): - if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication') - or e.get('id') == 'nota_red' - or e.tag == 'blockquote'): + def f(e): + return e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication') or \ + e.get('id') == 'nota_red' or e.tag == 'blockquote' + if any_ancestor(element, f): continue if element.tag == 'p' and 'verse' in element.get('class', ''): @@ -237,7 +243,8 @@ def add_table_of_contents(root): counter = 1 for element in root.iterdescendants(): if element.tag in ('h2', 'h3'): - if any_ancestor(element, lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)): + if any_ancestor(element, + lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)): continue element_text = raw_printable_text(element) @@ -260,9 +267,9 @@ def add_table_of_contents(root): if len(subsections): subsection_list = etree.SubElement(section_element, 'ol') - for n, subsection, text, _ in subsections: + for n1, subsection, subtext, _ in subsections: subsection_element = etree.SubElement(subsection_list, 'li') - add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text) + add_anchor(subsection_element, "s%d" % n1, with_target=False, link_text=subtext) root.insert(0, toc) @@ -271,7 +278,8 @@ def add_table_of_themes(root): try: from sortify import sortify except ImportError: - sortify = lambda x: x + def sortify(x): + return x book_themes = {} for fragment in root.findall('.//a[@class="theme-begin"]'): @@ -334,4 +342,3 @@ def extract_annotations(html_path): qualifiers = [] yield anchor, fn_type, qualifiers, text_str, html_str - diff --git a/librarian/packagers.py b/librarian/packagers.py index a32a2a0..b3ae955 100644 --- a/librarian/packagers.py +++ b/librarian/packagers.py @@ -4,9 +4,11 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import os -from librarian import pdf, epub, mobi, DirDocProvider, ParseError, cover +from librarian import pdf, epub, mobi, DirDocProvider, ParseError from librarian.parser import WLDocument +from wolnelektury.utils import makedirs + class Packager(object): cover = None @@ -23,20 +25,15 @@ class Packager(object): slug, ext = os.path.splitext(fname) if output_dir != '': - try: - os.makedirs(output_dir) - except: - pass + makedirs(output_dir) outfile = os.path.join(output_dir, slug + '.' + cls.ext) if os.path.exists(outfile) and not overwrite: return doc = WLDocument.from_file(main_input, provider=provider) - output_file = cls.transform(doc, - cover=cls.cover, flags=cls.flags) + output_file = cls.transform(doc, cover=cls.cover, flags=cls.flags) doc.save_output_file(output_file, output_path=outfile) - @classmethod def prepare(cls, input_filenames, output_dir='', verbose=False, overwrite=False): try: @@ -56,10 +53,12 @@ class EpubPackager(Packager): converter = epub ext = 'epub' + class MobiPackager(Packager): converter = mobi ext = 'mobi' + class PdfPackager(Packager): converter = pdf ext = 'pdf' diff --git a/librarian/parser.py b/librarian/parser.py index 19ec32d..502192f 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -16,12 +16,13 @@ import os import re from StringIO import StringIO + class WLDocument(object): LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE) provider = None def __init__(self, edoc, parse_dublincore=True, provider=None, - strict=False, meta_fallbacks=None): + strict=False, meta_fallbacks=None): self.edoc = edoc self.provider = provider @@ -100,8 +101,7 @@ class WLDocument(object): if self.book_info is None: raise NoDublinCore('No Dublin Core in document.') for part_uri in self.book_info.parts: - yield self.from_file(self.provider.by_uri(part_uri), - provider=self.provider) + yield self.from_file(self.provider.by_uri(part_uri), provider=self.provider) def chunk(self, path): # convert the path to XPath @@ -122,7 +122,7 @@ class WLDocument(object): parts.append(part) else: tag, n = match.groups() - parts.append("*[%d][name() = '%s']" % (int(n)+1, tag) ) + parts.append("*[%d][name() = '%s']" % (int(n)+1, tag)) if parts[0] == '.': parts[0] = '' @@ -135,7 +135,7 @@ class WLDocument(object): def update_dc(self): if self.book_info: parent = self.rdf_elem.getparent() - parent.replace( self.rdf_elem, self.book_info.to_etree(parent) ) + parent.replace(self.rdf_elem, self.book_info.to_etree(parent)) def serialize(self): self.update_dc() @@ -148,10 +148,10 @@ class WLDocument(object): try: xpath = self.path_to_xpath(key) node = self.edoc.xpath(xpath)[0] - repl = etree.fromstring(u"<%s>%s" %(node.tag, data, node.tag) ) + repl = etree.fromstring(u"<%s>%s" % (node.tag, data, node.tag)) node.getparent().replace(node, repl) except Exception, e: - unmerged.append( repr( (key, xpath, e) ) ) + unmerged.append(repr((key, xpath, e))) return unmerged @@ -159,7 +159,7 @@ class WLDocument(object): """ deletes forbidden tags from nota_red """ for node in self.edoc.xpath('|'.join('//%s//%s' % (note_tag, tag) for tag in - ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))): + ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))): tail = node.tail node.clear() node.tag = 'span' @@ -172,8 +172,7 @@ class WLDocument(object): """ if self.book_info is None: raise NoDublinCore('No Dublin Core in document.') - persons = set(self.book_info.editors + - self.book_info.technical_editors) + persons = set(self.book_info.editors + self.book_info.technical_editors) for child in self.parts(): persons.update(child.editors()) if None in persons: @@ -211,15 +210,12 @@ class WLDocument(object): cover_class = DefaultEbookCover return cover_class(self.book_info, *args, **kwargs).output_file() - def save_output_file(self, output_file, output_path=None, - output_dir_path=None, make_author_dir=False, ext=None): + def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None): if output_dir_path: save_path = output_dir_path if make_author_dir: - save_path = os.path.join(save_path, - unicode(self.book_info.author).encode('utf-8')) - save_path = os.path.join(save_path, - self.book_info.uri.slug) + save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8')) + save_path = os.path.join(save_path, self.book_info.uri.slug) if ext: save_path += '.%s' % ext else: diff --git a/librarian/partners.py b/librarian/partners.py index 58bc8c5..dccb5b6 100644 --- a/librarian/partners.py +++ b/librarian/partners.py @@ -13,20 +13,26 @@ New partners shouldn't be added here, but in the partners repository. """ from librarian import packagers, cover +from wolnelektury.utils import makedirs + class GandalfEpub(packagers.EpubPackager): cover = cover.GandalfCover + class GandalfPdf(packagers.PdfPackager): cover = cover.GandalfCover + class BookotekaEpub(packagers.EpubPackager): cover = cover.BookotekaCover + class PrestigioEpub(packagers.EpubPackager): cover = cover.PrestigioCover flags = ('less-advertising',) + class PrestigioPdf(packagers.PdfPackager): cover = cover.PrestigioCover flags = ('less-advertising',) @@ -51,7 +57,6 @@ class Virtualo(packagers.Packager): from librarian import DirDocProvider, ParseError from librarian.parser import WLDocument from copy import deepcopy - import os import os.path xml = etree.fromstring(""" @@ -80,7 +85,7 @@ class Virtualo(packagers.Packager): slug, ext = os.path.splitext(fname) outfile_dir = os.path.join(output_dir, slug) - os.makedirs(os.path.join(output_dir, slug)) + makedirs(os.path.join(output_dir, slug)) doc = WLDocument.from_file(main_input, provider=provider) info = doc.book_info @@ -97,17 +102,14 @@ class Virtualo(packagers.Packager): cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg')) outfile = os.path.join(outfile_dir, '1.epub') outfile_sample = os.path.join(outfile_dir, '1.sample.epub') - doc.save_output_file(doc.as_epub(), - output_path=outfile) - doc.save_output_file(doc.as_epub(doc, sample=25), - output_path=outfile_sample) + doc.save_output_file(doc.as_epub(), output_path=outfile) + doc.save_output_file(doc.as_epub(doc, sample=25), output_path=outfile_sample) outfile = os.path.join(outfile_dir, '1.mobi') outfile_sample = os.path.join(outfile_dir, '1.sample.mobi') - doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), - output_path=outfile) + doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), output_path=outfile) doc.save_output_file( - doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), - output_path=outfile_sample) + doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), + output_path=outfile_sample) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': main_input, diff --git a/librarian/pdf.py b/librarian/pdf.py index 95883e1..c899afa 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -41,20 +41,21 @@ STYLESHEETS = { 'wl2tex': 'pdf/wl2tex.xslt', } -#CUSTOMIZATIONS = [ -# 'nofootnotes', -# 'nothemes', -# 'defaultleading', -# 'onehalfleading', -# 'doubleleading', -# 'nowlfont', -# ] +# CUSTOMIZATIONS = [ +# 'nofootnotes', +# 'nothemes', +# 'defaultleading', +# 'onehalfleading', +# 'doubleleading', +# 'nowlfont', +# ] + def insert_tags(doc, split_re, tagname, exclude=None): """ inserts for every occurence of `split_re' in text nodes in the `doc' tree - >>> t = etree.fromstring('A-B-CX-Y-Z'); - >>> insert_tags(t, re.compile('-'), 'd'); + >>> t = etree.fromstring('A-B-CX-Y-Z') + >>> insert_tags(t, re.compile('-'), 'd') >>> print etree.tostring(t) ABCXYZ """ @@ -95,6 +96,7 @@ def fix_hanging(doc): exclude=[DCNS("identifier.url"), DCNS("rights.license")] ) + def fix_tables(doc): for kol in doc.iter(tag='kol'): if kol.tail is not None: @@ -109,10 +111,12 @@ def fix_tables(doc): def move_motifs_inside(doc): """ moves motifs to be into block elements """ - for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'): + for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|' + '//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'): for motif in master.xpath('motyw'): for sib in motif.itersiblings(): - if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia', 'begin', 'end', 'motyw', 'extra', 'uwaga'): + if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia', + 'begin', 'end', 'motyw', 'extra', 'uwaga'): # motif shouldn't have a tail - it would be untagged text motif.tail = None motif.getparent().remove(motif) @@ -158,9 +162,8 @@ def parse_creator(doc): Finds all dc:creator and dc.contributor.translator tags and adds *_parsed versions with forenames first. """ - for person in doc.xpath("|".join('//dc:'+(tag) for tag in ( - 'creator', 'contributor.translator')), - namespaces = {'dc': str(DCNS)})[::-1]: + for person in doc.xpath("|".join('//dc:' + tag for tag in ('creator', 'contributor.translator')), + namespaces={'dc': str(DCNS)})[::-1]: if not person.text: continue p = Person.from_text(person.text) @@ -224,8 +227,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, if book_info.cover_by: root.set('data-cover-by', book_info.cover_by) if book_info.cover_source: - root.set('data-cover-source', - book_info.cover_source) + root.set('data-cover-source', book_info.cover_source) if flags: for flag in flags: root.set('flag-' + flag, 'yes') @@ -284,7 +286,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, with open(os.path.join(temp, 'cover.png'), 'w') as f: bound_cover.save(f, quality=80) - del document # no longer needed large object :) + del document # no longer needed large object :) tex_path = os.path.join(temp, 'doc.tex') fout = open(tex_path, 'w') @@ -343,8 +345,7 @@ def load_including_children(wldoc=None, provider=None, uri=None): text = re.sub(ur"([\u0400-\u04ff]+)", ur"\1", text) - document = WLDocument.from_string(text, - parse_dublincore=True, provider=provider) + document = WLDocument.from_string(text, parse_dublincore=True, provider=provider) document.swap_endlines() for child_uri in document.book_info.parts: diff --git a/librarian/picture.py b/librarian/picture.py index 6fc40b1..dbbb4de 100644 --- a/librarian/picture.py +++ b/librarian/picture.py @@ -1,5 +1,5 @@ - -from dcparser import (as_person, as_date, Field, WorkInfo, DCNS) +# -*- coding: utf-8 -*- +from dcparser import Field, WorkInfo, DCNS from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI) from xml.parsers.expat import ExpatError from os import path @@ -10,15 +10,16 @@ import re from functools import * from operator import * + class WLPictureURI(WLURI): - _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/' - '(?P[-a-z0-9]+)/?$') + _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/(?P[-a-z0-9]+)/?$') @classmethod def from_slug(cls, slug): uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug return cls(uri) + def as_wlpictureuri_strict(text): return WLPictureURI.strict(text) @@ -39,15 +40,14 @@ class PictureInfo(WorkInfo): Field(DCNS('description.medium'), 'medium', required=False), Field(DCNS('description.dimensions'), 'original_dimensions', required=False), Field(DCNS('format'), 'mime_type', required=False), - Field(DCNS('identifier.url'), 'url', WLPictureURI, - strict=as_wlpictureuri_strict), - ) + Field(DCNS('identifier.url'), 'url', WLPictureURI, strict=as_wlpictureuri_strict) + ) class ImageStore(object): EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp' - 'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc', - 'aiff', 'wbmp', 'xbm'] + 'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc', + 'aiff', 'wbmp', 'xbm'] MIME = ['image/gif', 'image/jpeg', 'image/png', 'application/x-shockwave-flash', 'image/psd', 'image/bmp', 'image/tiff', 'image/tiff', 'application/octet-stream', @@ -55,8 +55,8 @@ class ImageStore(object): 'application/x-shockwave-flash', 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm'] def __init__(self, dir_): + super(ImageStore, self).__init__() self.dir = dir_ - return super(ImageStore, self).__init__() def path(self, slug, mime_type): """ @@ -127,7 +127,7 @@ class WLPicture(object): parser = etree.XMLParser(remove_blank_text=False) tree = etree.parse(StringIO(data.encode('utf-8')), parser) - me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store) + me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store) me.load_frame_info() return me except (ExpatError, XMLSyntaxError, XSLTApplyError), e: @@ -162,29 +162,31 @@ class WLPicture(object): def has_all_props(node, props): return reduce(and_, map(lambda prop: prop in node.attrib, props)) - if has_all_props(area, ['x1', 'x2', 'y1', 'y2']) == False: + if not has_all_props(area, ['x1', 'x2', 'y1', 'y2']): return None - + def n(prop): return int(area.get(prop)) return [[n('x1'), n('y1')], [n('x2'), n('y2')]] - def partiter(self): """ Iterates the parts of this picture and returns them and their metadata """ # omg no support for //sem[(@type='theme') or (@type='object')] ? - for part in list(self.edoc.iterfind("//sem[@type='theme']")) + list(self.edoc.iterfind("//sem[@type='object']")): - pd = {} - pd['type'] = part.get('type') + for part in list(self.edoc.iterfind("//sem[@type='theme']")) +\ + list(self.edoc.iterfind("//sem[@type='object']")): + pd = {'type': part.get('type')} coords = self.get_sem_coords(part) - if coords is None: continue + if coords is None: + continue pd['coords'] = coords def want_unicode(x): - if not isinstance(x, unicode): return x.decode('utf-8') - else: return x + if not isinstance(x, unicode): + return x.decode('utf-8') + else: + return x pd['object'] = part.attrib['type'] == 'object' and want_unicode(part.attrib.get('object', u'')) or None pd['themes'] = part.attrib['type'] == 'theme' and [part.attrib.get('theme', u'')] or [] yield pd diff --git a/librarian/sponsor.py b/librarian/sponsor.py index 15d9107..c9bc35b 100644 --- a/librarian/sponsor.py +++ b/librarian/sponsor.py @@ -5,6 +5,7 @@ # from librarian import get_resource + def sponsor_logo(name): return { 'Narodowe Centrum Kultury': get_resource('res/sponsors/nck.png') diff --git a/librarian/text.py b/librarian/text.py index 0f3c0bb..0eb7b59 100644 --- a/librarian/text.py +++ b/librarian/text.py @@ -29,6 +29,7 @@ Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowocz %(description)s%(contributors)s%(funders)s """ + def transform(wldoc, flags=None, **options): """ Transforms input_file in XML to output_file in TXT. @@ -59,10 +60,17 @@ def transform(wldoc, flags=None, **options): license_description = parsed_dc.license_description license = parsed_dc.license if license: - license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (license_description, license) + license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % ( + license_description, license) else: - license_description = u"Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)" - + license_description = u"Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się " \ + u"w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, " \ + u"publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi " \ + u"materiałami (przypisy, motywy literackie etc.), które podlegają prawu " \ + u"autorskiemu, to te dodatkowe materiały udostępnione są na licencji " \ + u"Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL " \ + u"(http://creativecommons.org/licenses/by-sa/3.0/)" + source = parsed_dc.source_name if source: source = "\n\nTekst opracowany na podstawie: " + source @@ -79,7 +87,6 @@ def transform(wldoc, flags=None, **options): else: description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).' url = '*' * 10 - license = "" license_description = "" source = "" contributors = "" @@ -96,4 +103,3 @@ def transform(wldoc, flags=None, **options): else: result = unicode(result).encode('utf-8') return OutputFile.from_string("\r\n".join(result.splitlines()) + "\r\n") - diff --git a/tests/test_dcparser.py b/tests/test_dcparser.py index b9a28f0..cab5b1c 100644 --- a/tests/test_dcparser.py +++ b/tests/test_dcparser.py @@ -11,6 +11,7 @@ from tests.utils import get_all_fixtures import codecs from datetime import date + def check_dcparser(xml_file, result_file): xml = file(xml_file).read() result = codecs.open(result_file, encoding='utf-8').read() @@ -46,6 +47,7 @@ def test_serialize(): for fixture in get_all_fixtures('dcparser', '*.xml'): yield check_serialize, fixture + def test_asdate(): assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 03)) assert_equals(dcparser.as_date(u"2011"), date(2011, 1, 1)) @@ -55,4 +57,3 @@ def test_asdate(): assert_equals(dcparser.as_date(u"ok. 1813-1814"), date(1813, 1, 1)) assert_equals(dcparser.as_date(u"ok.1876-ok.1886"), date(1876, 1, 1)) assert_equals(dcparser.as_date(u"1893/1894"), date(1893, 1, 1)) - diff --git a/tests/test_epub.py b/tests/test_epub.py index faa76e7..720fec6 100644 --- a/tests/test_epub.py +++ b/tests/test_epub.py @@ -25,7 +25,8 @@ def test_transform(): for par in tree.findall("//p"): if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'): editors_attribution = True - assert_equal(par.text.rstrip(), + assert_equal( + par.text.rstrip(), u'Opracowanie redakcyjne i przypisy: ' u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.') assert_true(editors_attribution) diff --git a/tests/test_html.py b/tests/test_html.py index 51d6acd..a0de630 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -33,6 +33,7 @@ def test_passing_parse_dublincore_to_transform(): parse_dublincore=False, ).as_html() + def test_empty(): assert not WLDocument.from_string( '', diff --git a/tests/test_html_annotations.py b/tests/test_html_annotations.py index 4956b7d..234f297 100644 --- a/tests/test_html_annotations.py +++ b/tests/test_html_annotations.py @@ -1,11 +1,8 @@ # -*- coding: utf-8 from __future__ import unicode_literals -from StringIO import StringIO -import tempfile from librarian.parser import WLDocument from librarian.html import extract_annotations -from lxml import etree from nose.tools import eq_ @@ -13,10 +10,11 @@ def _test_annotation(expected, got, name): assert got[0].startswith('anchor-'), "%s: Unexpected anchor: '%s', should begin with 'anchor-'" % (name, got[0]) eq_(expected[0], got[1], "%s: Unexpected type, expected '%s', got '%s'" % (name, expected[0], got[1])) eq_(expected[1], got[2], "%s: Unexpected qualifier, expected '%s', got '%s'" % (name, expected[1], got[2])) - eq_(expected[2], got[3], "%s: Unexpected text representation, expected '%s', got '%s'" % (name, expected[2], got[3])) + eq_(expected[2], got[3], "%s: Unexpected text representation, expected '%s', got '%s'" % + (name, expected[2], got[3])) exp_html = '
%s
' % (expected[0], expected[3]) eq_(exp_html, got[4], "%s: Unexpected html representation, expected '%s', got '%s'" % (name, exp_html, got[4])) - + def test_annotations(): annotations = ( @@ -29,8 +27,7 @@ def test_annotations(): ), 'Empty footnote'), - ( - 'Definiendum --- definiens.', ( + ('Definiendum --- definiens.', ( 'pr', [], 'Definiendum \u2014 definiens.', @@ -94,11 +91,14 @@ def test_annotations(): ), 'Footnote with a second parentheses and mdash.'), - ('gemajna (daw., z niem. gemein: zwykły) --- częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.', ( + ('gemajna (daw., z niem. gemein: zwykły) --- ' + 'częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.', ( 'pe', ['daw.', 'niem.'], - 'gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.', - '

gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.

' + 'gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, ' + 'szeregowiec w wojsku polskim cudzoziemskiego autoramentu.', + '

gemajna (daw., z niem. gemein: zwykły) ' + '\u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.

' ), 'Footnote with multiple and qualifiers and emphasis.'), diff --git a/tests/test_html_fragments.py b/tests/test_html_fragments.py index 99bb62d..3e87a9e 100644 --- a/tests/test_html_fragments.py +++ b/tests/test_html_fragments.py @@ -14,7 +14,5 @@ def test_fragments(): closed_fragments, open_fragments = extract_fragments( get_fixture('text', 'asnyk_miedzy_nami_expected.html')) assert not open_fragments - fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f) - for f in closed_fragments.values()) + fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f) for f in closed_fragments.values()) assert_equal(fragments_text, file(expected_output_file_path).read().decode('utf-8')) - diff --git a/tests/test_pdf.py b/tests/test_pdf.py index 75b73bc..5b2dba1 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -3,7 +3,6 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import re from tempfile import NamedTemporaryFile from nose.tools import * from librarian import DirDocProvider @@ -22,7 +21,5 @@ def test_transform(): print tex # Check contributor list. - editors = re.search(ur'\\def\\editors\{' - ur'Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex) - assert_equal(editors.group(1), - u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska") + editors = re.search(ur'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex) + assert_equal(editors.group(1), u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska") diff --git a/tests/test_picture.py b/tests/test_picture.py index 1169f44..00b03ce 100644 --- a/tests/test_picture.py +++ b/tests/test_picture.py @@ -4,16 +4,14 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # from librarian import picture, dcparser -from lxml import etree -from nose.tools import * -from os.path import splitext from tests.utils import get_all_fixtures, get_fixture -import codecs from os import path + def test_wlpictureuri(): uri = picture.WLPictureURI('http://wolnelektury.pl/katalog/obraz/angelus-novus') + def check_load(xml_file): pi = dcparser.parse(xml_file, picture.PictureInfo) assert pi is not None @@ -39,6 +37,7 @@ def test_wlpicture(): f = wlp.image_file('r') f.close() + def test_picture_parts(): wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml'))) parts = list(wlp.partiter()) @@ -55,7 +54,5 @@ def test_picture_parts(): if p['object']: names.add(p['object']) - assert motifs == set([u'anioł historii', u'spojrzenie']), "missing motifs, got: %s" % motifs - assert names == set([u'obraz cały', u'skrzydło']), 'missing objects, got: %s' % names - - + assert motifs == {u'anioł historii', u'spojrzenie'}, "missing motifs, got: %s" % motifs + assert names == {u'obraz cały', u'skrzydło'}, 'missing objects, got: %s' % names diff --git a/tests/utils.py b/tests/utils.py index 3b1f4f5..fc87532 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -6,7 +6,7 @@ from __future__ import with_statement from os.path import realpath, join, dirname import glob -import os + def get_fixture_dir(dir_name): """Returns path to fixtures directory dir_name.""" -- 2.20.1