From 716a9ab552bffbb7df2cb31ae41ee196902c7653 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Wed, 10 Jun 2020 00:34:15 +0200 Subject: [PATCH] Style changes. --- src/librarian/book2anything.py | 68 ++++++++----- src/librarian/cover.py | 157 +++++++++++++++++++++++-------- src/librarian/dcparser.py | 142 +++++++++++++++++++--------- src/librarian/embeds/__init__.py | 7 +- src/librarian/embeds/latex.py | 10 +- src/librarian/embeds/mathml.py | 11 ++- src/librarian/functions.py | 5 +- src/librarian/html.py | 89 +++++++++++++----- src/librarian/mobi.py | 3 +- src/librarian/packagers.py | 6 +- src/librarian/parser.py | 2 +- src/librarian/partners.py | 35 +++++-- src/librarian/pdf.py | 104 +++++++++++++------- src/librarian/picture.py | 59 ++++++++---- src/librarian/text.py | 50 ++++++---- 15 files changed, 531 insertions(+), 217 deletions(-) diff --git a/src/librarian/book2anything.py b/src/librarian/book2anything.py index 948d9fd..d954ce6 100755 --- a/src/librarian/book2anything.py +++ b/src/librarian/book2anything.py @@ -32,7 +32,7 @@ class Option(object): class Book2Anything(object): """A class for creating book2... scripts. - + Subclass it for any format you want to convert to. """ format_name = None # Set format name, like "PDF". @@ -42,8 +42,10 @@ class Book2Anything(object): uses_provider = False # Does it need a DocProvider? transform = None # Transform method. Uses WLDocument.as_{ext} by default. parser_options = [] # List of Option objects for additional parser args. - transform_options = [] # List of Option objects for additional transform args. - transform_flags = [] # List of Option objects for supported transform flags. + # List of Option objects for additional transform args. + transform_options = [] + # List of Option objects for supported transform flags. + transform_flags = [] @classmethod def run(cls): @@ -53,22 +55,37 @@ class Book2Anything(object): parser = optparse.OptionParser(usage=usage) - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the output file in it') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') + parser.add_option( + '-v', '--verbose', action='store_true', dest='verbose', + default=False, help='print status messages to stdout') + parser.add_option( + '-d', '--make-dir', action='store_true', dest='make_dir', + default=False, + help='create a directory for author and put the output file in it' + ) + parser.add_option( + '-o', '--output-file', dest='output_file', metavar='FILE', + help='specifies the output file') + parser.add_option( + '-O', '--output-dir', dest='output_dir', metavar='DIR', + help='specifies the directory for output' + ) if cls.uses_cover: if cls.cover_optional: - parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, - help='create default cover') - parser.add_option('-C', '--image-cache', dest='image_cache', metavar='URL', - help='prefix for image download cache' + - (' (implies --with-cover)' if cls.cover_optional else '')) - for option in cls.parser_options + cls.transform_options + cls.transform_flags: + parser.add_option( + '-c', '--with-cover', action='store_true', + dest='with_cover', default=False, + help='create default cover' + ) + parser.add_option( + '-C', '--image-cache', dest='image_cache', metavar='URL', + help='prefix for image download cache' + + (' (implies --with-cover)' if cls.cover_optional else '') + ) + for option in ( + cls.parser_options + + cls.transform_options + + cls.transform_flags): option.add(parser) options, input_filenames = parser.parse_args() @@ -86,7 +103,11 @@ class Book2Anything(object): for option in cls.transform_options: transform_args[option.name()] = option.value(options) # Add flags to transform_args, if any. - transform_flags = [flag.name() for flag in cls.transform_flags if flag.value(options)] + transform_flags = [ + flag.name() + for flag in cls.transform_flags + if flag.value(options) + ] if transform_flags: transform_args['flags'] = transform_flags if options.verbose: @@ -95,7 +116,10 @@ class Book2Anything(object): if cls.uses_cover: if options.image_cache: def cover_class(book_info, *args, **kwargs): - return make_cover(book_info, image_cache=options.image_cache, *args, **kwargs) + return make_cover( + book_info, image_cache=options.image_cache, + *args, **kwargs + ) transform_args['cover'] = cover_class elif not cls.cover_optional or options.with_cover: transform_args['cover'] = make_cover @@ -123,13 +147,15 @@ class Book2Anything(object): output_file = options.output_file # Do the transformation. - doc = WLDocument.from_file(main_input, provider=provider, **parser_args) + doc = WLDocument.from_file(main_input, provider=provider, + **parser_args) transform = cls.transform if transform is None: transform = getattr(WLDocument, 'as_%s' % cls.ext) output = transform(doc, **transform_args) - doc.save_output_file(output, output_file, options.output_dir, options.make_dir, cls.ext) + doc.save_output_file(output, output_file, options.output_dir, + options.make_dir, cls.ext) except ParseError as e: print('%(file)s:%(name)s:%(message)s' % { diff --git a/src/librarian/cover.py b/src/librarian/cover.py index 0f38ec3..b5996ae 100644 --- a/src/librarian/cover.py +++ b/src/librarian/cover.py @@ -147,7 +147,8 @@ class Cover(object): self.format = format if width and height: self.height = int(round(height * self.width / width)) - scale = max(float(width or 0) / self.width, float(height or 0) / self.height) + scale = max(float(width or 0) / self.width, + float(height or 0) / self.height) if scale >= 1: self.scale = scale elif scale: @@ -163,7 +164,8 @@ class Cover(object): def image(self): metr = Metric(self, self.scale) - img = Image.new('RGB', (metr.width, metr.height), self.background_color) + img = Image.new('RGB', (metr.width, metr.height), + self.background_color) if self.background_img: background = Image.open(self.background_img) @@ -173,19 +175,26 @@ class Cover(object): # WL logo if metr.logo_width: logo = Image.open(get_resource('res/wl-logo.png')) - logo = logo.resize((metr.logo_width, int(round(logo.size[1] * metr.logo_width / logo.size[0])))) - img.paste(logo, ((metr.width - metr.logo_width) // 2, img.size[1] - logo.size[1] - metr.logo_bottom)) + logo = logo.resize(( + metr.logo_width, + int(round(logo.size[1] * metr.logo_width / logo.size[0])) + )) + img.paste(logo, ( + (metr.width - metr.logo_width) // 2, + img.size[1] - logo.size[1] - metr.logo_bottom + )) top = metr.author_top tbox = TextBox( metr.width - metr.author_margin_left - metr.author_margin_right, metr.height - top, ) - + author_font = ImageFont.truetype( self.author_font_ttf, metr.author_font_size) for pa in self.pretty_authors(): - tbox.text(pa, self.author_color, author_font, metr.author_lineskip, self.author_shadow) + tbox.text(pa, self.author_color, author_font, metr.author_lineskip, + self.author_shadow) text_img = tbox.image() img.paste(text_img, (metr.author_margin_left, top), text_img) @@ -196,7 +205,8 @@ class Cover(object): ) title_font = ImageFont.truetype( self.title_font_ttf, metr.title_font_size) - tbox.text(self.pretty_title(), self.title_color, title_font, metr.title_lineskip, self.title_shadow) + tbox.text(self.pretty_title(), self.title_color, title_font, + metr.title_lineskip, self.title_shadow) text_img = tbox.image() img.paste(text_img, (metr.title_margin_left, top), text_img) @@ -282,8 +292,10 @@ class WLCover(Cover): u'Epika': 'bottom', } - def __init__(self, book_info, format=None, width=None, height=None, bleed=0): - super(WLCover, self).__init__(book_info, format=format, width=width, height=height) + def __init__(self, book_info, format=None, width=None, height=None, + bleed=0): + super(WLCover, self).__init__(book_info, format=format, width=width, + height=height) # Set box position. self.box_position = book_info.cover_box_position or \ self.kind_box_position.get(book_info.kind, self.box_position) @@ -294,7 +306,8 @@ class WLCover(Cover): self.bar_color = book_info.cover_bar_color or \ self.epoch_colors.get(book_info.epoch, self.bar_color) # Set title color. - self.title_color = self.epoch_colors.get(book_info.epoch, self.title_color) + self.title_color = self.epoch_colors.get(book_info.epoch, + self.title_color) self.bleed = bleed self.box_top_margin += bleed @@ -319,19 +332,19 @@ class WLCover(Cover): metr = Metric(self, self.scale) # Write author name. - box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y) + box = TextBox(metr.title_box_width, metr.height, + padding_y=metr.box_padding_y) author_font = ImageFont.truetype( self.author_font_ttf, metr.author_font_size) for pa in self.pretty_authors(): - box.text(pa, - font=author_font, - line_height=metr.author_lineskip, - color=self.author_color, - shadow_color=self.author_shadow) + box.text(pa, font=author_font, line_height=metr.author_lineskip, + color=self.author_color, shadow_color=self.author_shadow) box.skip(metr.box_above_line) - box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height), - fill=self.author_color, width=metr.box_line_width) + box.draw.line( + (metr.box_line_left, box.height, metr.box_line_right, box.height), + fill=self.author_color, width=metr.box_line_width + ) box.skip(metr.box_below_line) # Write title. @@ -353,11 +366,20 @@ class WLCover(Cover): else: # Middle. box_top = (metr.height - box_img.size[1]) // 2 - box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) // 2 + box_left = metr.bar_width + ( + metr.width - metr.bar_width - box_img.size[0] + ) // 2 # Draw the white box. ImageDraw.Draw(img).rectangle( - (box_left, box_top, box_left + box_img.size[0], box_top + box_img.size[1]), fill='#fff') + ( + box_left, + box_top, + box_left + box_img.size[0], + box_top + box_img.size[1] + ), + fill='#fff' + ) # Paste the contents into the white box. img.paste(box_img, (box_left, box_top), box_img) return img @@ -368,24 +390,43 @@ class WLCover(Cover): return img metr = Metric(self, self.scale) draw = ImageDraw.Draw(img) - for corner_x, corner_y in ((0, 0), (metr.width, 0), (0, metr.height), (metr.width, metr.height)): + for corner_x, corner_y in ( + (0, 0), (metr.width, 0), + (0, metr.height), (metr.width, metr.height) + ): dir_x = 1 if corner_x == 0 else -1 dir_y = 1 if corner_y == 0 else -1 for offset in (-1, 0, 1): - draw.line((corner_x, corner_y + dir_y * metr.bleed + offset, - corner_x + dir_x * metr.bleed * line_ratio, corner_y + dir_y * metr.bleed + offset), - fill='black' if offset == 0 else 'white', width=1) - draw.line((corner_x + dir_x * metr.bleed + offset, corner_y, - corner_x + dir_x * metr.bleed + offset, corner_y + dir_y * metr.bleed * line_ratio), - fill='black' if offset == 0 else 'white', width=1) + draw.line( + ( + corner_x, + corner_y + dir_y * metr.bleed + offset, + corner_x + dir_x * metr.bleed * line_ratio, + corner_y + dir_y * metr.bleed + offset + ), + fill='black' if offset == 0 else 'white', + width=1 + ) + draw.line( + ( + corner_x + dir_x * metr.bleed + offset, + corner_y, + corner_x + dir_x * metr.bleed + offset, + corner_y + dir_y * metr.bleed * line_ratio + ), + fill='black' if offset == 0 else 'white', + width=1 + ) return img def image(self): metr = Metric(self, self.scale) - img = Image.new('RGB', (metr.width, metr.height), self.background_color) + img = Image.new('RGB', (metr.width, metr.height), + self.background_color) draw = ImageDraw.Draw(img) - draw.rectangle((0, 0, metr.bar_width, metr.height), fill=self.bar_color) + draw.rectangle((0, 0, metr.bar_width, metr.height), + fill=self.bar_color) if self.background_img: src = Image.open(self.background_img) @@ -447,27 +488,61 @@ class LogoWLCover(WLCover): def image(self): img = super(LogoWLCover, self).image() metr = Metric(self, self.scale) - gradient = Image.new('RGBA', (metr.width - metr.bar_width, metr.gradient_height), self.gradient_color) - gradient_mask = Image.new('L', (metr.width - metr.bar_width, metr.gradient_height)) + gradient = Image.new( + 'RGBA', + (metr.width - metr.bar_width, metr.gradient_height), + self.gradient_color + ) + gradient_mask = Image.new( + 'L', + (metr.width - metr.bar_width, metr.gradient_height) + ) draw = ImageDraw.Draw(gradient_mask) for line in range(0, metr.gradient_height): draw.line( (0, line, metr.width - metr.bar_width, line), - fill=int(255 * self.gradient_opacity * line / metr.gradient_height)) - img.paste(gradient, (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask) + fill=int( + 255 * self.gradient_opacity * line / metr.gradient_height + ) + ) + img.paste(gradient, + (metr.bar_width, metr.height - metr.gradient_height), + mask=gradient_mask) cursor = metr.width - metr.gradient_logo_margin_right - logo_top = int(metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 - metr.bleed / 2) - - logos = [get_resource(logo_path) for logo_path in self.gradient_logos[::-1]] + logo_top = int( + metr.height + - metr.gradient_height / 2 + - metr.gradient_logo_height / 2 - metr.bleed / 2 + ) + + logos = [ + get_resource(logo_path) + for logo_path in self.gradient_logos[::-1] + ] logos = logos + self.additional_cover_logos - logos = [Image.open(logo_bytes).convert('RGBA') for logo_bytes in logos] + logos = [ + Image.open(logo_bytes).convert('RGBA') + for logo_bytes in logos + ] # See if logos fit into the gradient. If not, scale down accordingly. - space_for_logos = metr.width - metr.bar_width - 2 * metr.gradient_logo_margin_right - widths = [logo.size[0] * metr.gradient_logo_height / logo.size[1] for logo in logos] - taken_space = sum(widths) + (len(logos) - 1) * (metr.gradient_logo_spacing) - logo_scale = space_for_logos / taken_space if taken_space > space_for_logos else 1 + space_for_logos = ( + metr.width + - metr.bar_width + - 2 * metr.gradient_logo_margin_right + ) + widths = [ + logo.size[0] * metr.gradient_logo_height / logo.size[1] + for logo in logos] + taken_space = ( + sum(widths) + + (len(logos) - 1) * (metr.gradient_logo_spacing) + ) + logo_scale = ( + space_for_logos / taken_space + if taken_space > space_for_logos else 1 + ) logo_top += int(metr.gradient_logo_height * (1 - logo_scale) / 2) for i, logo in enumerate(logos): diff --git a/src/librarian/dcparser.py b/src/librarian/dcparser.py index 92afc01..432b580 100644 --- a/src/librarian/dcparser.py +++ b/src/librarian/dcparser.py @@ -46,12 +46,19 @@ class Person(object): surname = parts[0] names = [] elif len(parts) != 2: - raise ValueError("Invalid person name. There should be at most one comma: \"%s\"." % text.encode('utf-8')) + raise ValueError( + "Invalid person name. " + "There should be at most one comma: \"%s\"." + % text.encode('utf-8') + ) else: surname = parts[0] if len(parts[1]) == 0: # there is no non-whitespace data after the comma - raise ValueError("Found a comma, but no names given: \"%s\" -> %r." % (text, parts)) + raise ValueError( + "Found a comma, but no names given: \"%s\" -> %r." + % (text, parts) + ) names = parts[1].split() return cls(surname, *names) @@ -59,10 +66,12 @@ class Person(object): return u" ".join(self.first_names + (self.last_name,)) def __eq__(self, right): - return self.last_name == right.last_name and self.first_names == right.first_names + return (self.last_name == right.last_name + and self.first_names == right.first_names) def __lt__(self, other): - return (self.last_name, self.first_names) < (other.last_name, other.first_names) + return ((self.last_name, self.first_names) + < (other.last_name, other.first_names)) def __hash__(self): return hash((self.last_name, self.first_names)) @@ -74,25 +83,32 @@ class Person(object): return self.last_name def __repr__(self): - return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names) + return 'Person(last_name=%r, first_names=*%r)' % ( + self.last_name, self.first_names + ) def as_date(text): - """Dates for digitization of pictures. It seems we need the following: -ranges: '1350-1450', -centuries: "XVIII w.' -half centuries/decades: '2 poł. XVIII w.', 'XVII w., l. 20' -later-then: 'po 1450' -circa 'ok. 1813-1814', 'ok.1876-ok.1886 -turn: 1893/1894 -for now we will translate this to some single date losing information of course. + """ + Dates for digitization of pictures. It seems we need the following: + ranges: '1350-1450', + centuries: "XVIII w.' + half centuries/decades: '2 poł. XVIII w.', 'XVII w., l. 20' + later-then: 'po 1450' + circa 'ok. 1813-1814', 'ok.1876-ok.1886 + turn: 1893/1894 + + For now we will translate this to some single date + losing information of course. """ try: # check out the "N. poł X w." syntax if isinstance(text, six.binary_type): text = text.decode("utf-8") - century_format = u"(?:([12]) *poł[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?" + century_format = ( + u"(?:([12]) *poł[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?" + ) vague_format = u"(?:po *|ok. *)?([0-9]{4})(-[0-9]{2}-[0-9]{2})?" m = re.match(century_format, text) @@ -103,7 +119,10 @@ for now we will translate this to some single date losing information of course. century = roman_to_int(m.group(2)) if half is not None: if decade is not None: - raise ValueError("Bad date format. Cannot specify both half and decade of century") + raise ValueError( + "Bad date format. " + "Cannot specify both half and decade of century." + ) half = int(half) t = ((century*100 + (half-1)*50), 1, 1) else: @@ -120,7 +139,7 @@ for now we will translate this to some single date losing information of course. raise ValueError return DatePlus(t[0], t[1], t[2]) - except ValueError as e: + except ValueError: raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.") @@ -140,7 +159,8 @@ def as_wluri_strict(text): class Field(object): - def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs): + def __init__(self, uri, attr_name, validator=as_unicode, strict=None, + multiple=False, salias=None, **kwargs): self.uri = uri self.name = attr_name self.validator = validator @@ -148,7 +168,8 @@ class Field(object): self.multiple = multiple self.salias = salias - self.required = kwargs.get('required', True) and 'default' not in kwargs + self.required = (kwargs.get('required', True) + and 'default' not in kwargs) self.default = kwargs.get('default', [] if multiple else [None]) def validate_value(self, val, strict=False): @@ -170,9 +191,14 @@ class Field(object): new_values.append(nv) return new_values elif len(val) > 1: - raise ValidationError("Multiple values not allowed for field '%s'" % self.uri) + raise ValidationError( + "Multiple values not allowed for field '%s'" % self.uri + ) elif len(val) == 0: - raise ValidationError("Field %s has no value to assign. Check your defaults." % self.uri) + raise ValidationError( + "Field %s has no value to assign. Check your defaults." + % self.uri + ) else: if validator is None or val[0] is None: return val[0] @@ -181,7 +207,10 @@ class Field(object): setattr(nv, 'lang', val[0].lang) return nv except ValueError as e: - raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message)) + raise ValidationError( + "Field '%s' - invald value: %s" + % (self.uri, e.message) + ) def validate(self, fdict, fallbacks=None, strict=False): if fallbacks is None: @@ -229,31 +258,37 @@ class DCInfo(type): class WorkInfo(six.with_metaclass(DCInfo, object)): FIELDS = ( - Field(DCNS('creator'), 'authors', as_person, salias='author', multiple=True), + Field(DCNS('creator'), 'authors', as_person, salias='author', + multiple=True), Field(DCNS('title'), 'title'), Field(DCNS('type'), 'type', required=False, multiple=True), Field(DCNS('contributor.editor'), 'editors', as_person, salias='editor', multiple=True, required=False), Field(DCNS('contributor.technical_editor'), 'technical_editors', - as_person, salias='technical_editor', multiple=True, required=False), - Field(DCNS('contributor.funding'), 'funders', salias='funder', multiple=True, required=False), + as_person, salias='technical_editor', multiple=True, + required=False), + Field(DCNS('contributor.funding'), 'funders', salias='funder', + multiple=True, required=False), Field(DCNS('contributor.thanks'), 'thanks', required=False), Field(DCNS('date'), 'created_at'), - Field(DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False), + Field(DCNS('date.pd'), 'released_to_public_domain_at', as_date, + required=False), Field(DCNS('publisher'), 'publisher', multiple=True), Field(DCNS('language'), 'language'), Field(DCNS('description'), 'description', required=False), Field(DCNS('source'), 'source_name', required=False), - Field(DCNS('source.URL'), 'source_urls', salias='source_url', multiple=True, required=False), + Field(DCNS('source.URL'), 'source_urls', salias='source_url', + multiple=True, required=False), Field(DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict), Field(DCNS('rights.license'), 'license', required=False), Field(DCNS('rights'), 'license_description'), - Field(PLMETNS('digitisationSponsor'), 'sponsors', multiple=True, required=False), + Field(PLMETNS('digitisationSponsor'), 'sponsors', multiple=True, + required=False), Field(WLNS('digitisationSponsorNote'), 'sponsor_note', required=False), Field(WLNS('developmentStage'), 'stage', required=False), ) @@ -292,12 +327,16 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): @classmethod def from_element(cls, rdf_tag, *args, **kwargs): - # the tree is already parsed, so we don't need to worry about Expat errors + # The tree is already parsed, + # so we don't need to worry about Expat errors. field_dict = {} desc = rdf_tag.find(".//" + RDFNS('Description')) if desc is None: - raise NoDublinCore("There must be a '%s' element inside the RDF." % RDFNS('Description')) + raise NoDublinCore( + "There must be a '%s' element inside the RDF." + % RDFNS('Description') + ) lang = None p = desc @@ -325,15 +364,19 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): return cls(desc.attrib, field_dict, *args, **kwargs) def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False): - """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description. - dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the - given field. """ + """ + rdf_attrs should be a dictionary-like object with any attributes + of the RDF:Description. + dc_fields - dictionary mapping DC fields (with namespace) to + list of text values for the given field. + """ self.about = rdf_attrs.get(RDFNS('about')) self.fmap = {} for field in self.FIELDS: - value = field.validate(dc_fields, fallbacks=fallbacks, strict=strict) + value = field.validate(dc_fields, fallbacks=fallbacks, + strict=strict) setattr(self, 'prop_' + field.name, value) self.fmap[field.name] = field if field.salias: @@ -367,8 +410,10 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): return object.__setattr__(self, name, newvalue) def update(self, field_dict): - """Update using field_dict. Verify correctness, but don't check if all - required fields are present.""" + """ + Update using field_dict. Verify correctness, but don't check + if all required fields are present. + """ for field in self.FIELDS: if field.name in field_dict: setattr(self, field.name, field_dict[field.name]) @@ -448,27 +493,36 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): class BookInfo(WorkInfo): FIELDS = ( - Field(DCNS('audience'), 'audiences', salias='audience', multiple=True, required=False), + Field(DCNS('audience'), 'audiences', salias='audience', multiple=True, + required=False), - Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, required=False), - Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True, required=False), - Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, required=False), + Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, + required=False), + Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True, + required=False), + Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, + required=False), Field(DCNS('subject.location'), 'location', required=False), Field(DCNS('contributor.translator'), 'translators', as_person, salias='translator', multiple=True, required=False), - Field(DCNS('relation.hasPart'), 'parts', WLURI, strict=as_wluri_strict, multiple=True, required=False), - Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI, strict=as_wluri_strict, required=False), + Field(DCNS('relation.hasPart'), 'parts', WLURI, strict=as_wluri_strict, + multiple=True, required=False), + Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI, + strict=as_wluri_strict, required=False), Field(DCNS('relation.coverImage.url'), 'cover_url', required=False), - Field(DCNS('relation.coverImage.attribution'), 'cover_by', required=False), - Field(DCNS('relation.coverImage.source'), 'cover_source', required=False), + Field(DCNS('relation.coverImage.attribution'), 'cover_by', + required=False), + Field(DCNS('relation.coverImage.source'), 'cover_source', + required=False), # WLCover-specific. Field(WLNS('coverBarColor'), 'cover_bar_color', required=False), Field(WLNS('coverBoxPosition'), 'cover_box_position', required=False), Field(WLNS('coverClass'), 'cover_class', default=['default']), - Field(WLNS('coverLogoUrl'), 'cover_logo_urls', multiple=True, required=False), + Field(WLNS('coverLogoUrl'), 'cover_logo_urls', multiple=True, + required=False), Field('pdf-id', 'isbn_pdf', required=False), Field('epub-id', 'isbn_epub', required=False), diff --git a/src/librarian/embeds/__init__.py b/src/librarian/embeds/__init__.py index fa74530..821ae37 100644 --- a/src/librarian/embeds/__init__.py +++ b/src/librarian/embeds/__init__.py @@ -8,6 +8,7 @@ known_types = { 'application/x-latex': 'librarian.embeds.latex.LaTeX', } + class Embed(): @classmethod def transforms_to(cls, mime_types, downgrade=False): @@ -20,7 +21,7 @@ class Embed(): return matches def transform_to(self, mime_type, downgrade=False): - for name, method in type(cls).__dict__.iteritems(): + for name, method in type(self).__dict__.iteritems(): if hasattr(method, "embed_converts_to"): conv_type, conv_downgrade = method.embed_converts_to if downgrade == conv_downgrade and conv_type == mime_type: @@ -31,21 +32,25 @@ class DataEmbed(Embed): def __init__(self, data=None): self.data = data + class TreeEmbed(Embed): def __init__(self, tree=None): if isinstance(tree, etree._Element): tree = etree.ElementTree(tree) self.tree = tree + def converts_to(mime_type, downgrade=False): def decorator(method): method.embed_converts_to = mime_type, downgrade return method return decorator + def downgrades_to(mime_type): return converts_to(mime_type, True) + def create_embed(mime_type, tree=None, data=None): embed = known_types.get(mime_type) if embed is None: diff --git a/src/librarian/embeds/latex.py b/src/librarian/embeds/latex.py index 8425d03..4c664a9 100644 --- a/src/librarian/embeds/latex.py +++ b/src/librarian/embeds/latex.py @@ -12,14 +12,16 @@ from . import DataEmbed, create_embed, downgrades_to class LaTeX(DataEmbed): @downgrades_to('image/png') def to_png(self): - tmpl = open(get_resource('res/embeds/latex/template.tex'), 'rb').read().decode('utf-8') + with open(get_resource('res/embeds/latex/template.tex'), 'rb') as f: + tmpl = f.read().decode('utf-8') tempdir = mkdtemp('-librarian-embed-latex') fpath = os.path.join(tempdir, 'doc.tex') with open(fpath, 'wb') as f: f.write((tmpl % {'code': self.data}).encode('utf-8')) - call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE) - call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim', - os.path.join(tempdir, 'doc.png')]) + call(['xelatex', '-interaction=batchmode', '-output-directory', + tempdir, fpath], stdout=PIPE, stderr=PIPE) + call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), + '-trim', os.path.join(tempdir, 'doc.png')]) pngdata = open(os.path.join(tempdir, 'doc.png'), 'rb').read() shutil.rmtree(tempdir) return create_embed('image/png', data=pngdata) diff --git a/src/librarian/embeds/mathml.py b/src/librarian/embeds/mathml.py index 564a9f4..801c213 100644 --- a/src/librarian/embeds/mathml.py +++ b/src/librarian/embeds/mathml.py @@ -11,16 +11,21 @@ class MathML(TreeEmbed): @downgrades_to('application/x-latex') def to_latex(self): """ - >>> print(MathML(etree.fromstring('a < b')).to_latex().data.strip()) + >>> print(MathML(etree.fromstring( + 'a < b' + )).to_latex().data.strip()) a < b - >>> print(MathML(etree.fromstring('< & &lt; A')).to_latex().data.strip()) + >>> print(MathML(etree.fromstring( + '< & &lt; A' + )).to_latex().data.strip()) < & < A """ xslt = etree.parse(get_resource('res/embeds/mathml/mathml2latex.xslt')) output = self.tree.xslt(xslt) text = six.text_type(output) - # Workaround for entities being preserved in output. But there should be a better way. + # Workaround for entities being preserved in output. + # But there should be a better way. text = text.replace('<', '<').replace('&', '&') return create_embed('application/x-latex', data=text) diff --git a/src/librarian/functions.py b/src/librarian/functions.py index e5a47d6..00f1f6e 100644 --- a/src/librarian/functions.py +++ b/src/librarian/functions.py @@ -69,7 +69,10 @@ def reg_ends_white(): def reg_wrap_words(): def wrap_words(context, text, wrapping): - """XPath extension function automatically wrapping words in passed text""" + """ + XPath extension function automatically wrapping words + in passed text. + """ if isinstance(text, list): text = ''.join(text) if not wrapping: diff --git a/src/librarian/html.py b/src/librarian/html.py index f8e4a83..78f3dad 100644 --- a/src/librarian/html.py +++ b/src/librarian/html.py @@ -32,16 +32,21 @@ def get_stylesheet(name): def html_has_content(text): - return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text) + return etree.ETXPath( + '//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)} + )(text) def transform_abstrakt(abstrakt_element): style_filename = get_stylesheet('legacy') style = etree.parse(style_filename) xml = etree.tostring(abstrakt_element, encoding='unicode') - document = etree.parse(six.StringIO(xml.replace('abstrakt', 'dlugi_cytat'))) # HACK + document = etree.parse(six.StringIO( + xml.replace('abstrakt', 'dlugi_cytat') + )) # HACK result = document.xslt(style) - html = re.sub('', '', etree.tostring(result, encoding='unicode')) + html = re.sub('', '', + etree.tostring(result, encoding='unicode')) return re.sub(']*>', '', html) @@ -72,7 +77,10 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None, css=None): options = {} options.setdefault('gallery', "''") - css = css or 'https://static.wolnelektury.pl/css/compressed/book_text.css' + css = ( + css + or 'https://static.wolnelektury.pl/css/compressed/book_text.css' + ) css = "'%s'" % css result = document.transform(style, css=css, **options) del document # no longer needed large object :) @@ -83,7 +91,9 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None, css=None): add_table_of_contents(result.getroot()) return OutputFile.from_bytes(etree.tostring( - result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8')) + result, method='html', xml_declaration=False, + pretty_print=True, encoding='utf-8' + )) else: return None except KeyError: @@ -122,7 +132,12 @@ class Fragment(object): for event, element in self.closed_events(): if event == 'start': result.append(u'<%s %s>' % ( - element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items()))) + element.tag, + ' '.join( + '%s="%s"' % (k, v) + for k, v in element.attrib.items() + ) + )) if element.text: result.append(element.text) elif event == 'end': @@ -146,7 +161,10 @@ def extract_fragments(input_filename): # iterparse would die on a HTML document parser = etree.HTMLParser(encoding='utf-8') buf = six.BytesIO() - buf.write(etree.tostring(etree.parse(input_filename, parser).getroot()[0][0], encoding='utf-8')) + buf.write(etree.tostring( + etree.parse(input_filename, parser).getroot()[0][0], + encoding='utf-8' + )) buf.seek(0) for event, element in etree.iterparse(buf, events=('start', 'end')): @@ -179,12 +197,15 @@ def extract_fragments(input_filename): try: fragment = open_fragments[element.get('fid')] except KeyError: - print('%s:closed not open fragment #%s' % (input_filename, element.get('fid'))) + print('%s:closed not open fragment #%s' % ( + input_filename, element.get('fid') + )) else: closed_fragments[fragment.id] = fragment del open_fragments[fragment.id] - # Append element tail to lost_text (we don't want to lose any text) + # Append element tail to lost_text + # (we don't want to lose any text) if element.tail: for fragment_id in open_fragments: open_fragments[fragment_id].append('text', element.tail) @@ -192,19 +213,24 @@ def extract_fragments(input_filename): # Process all elements except begin and end else: # Omit annotation tags - if (len(element.get('name', '')) or + if (len(element.get('name', '')) or element.get('class', '') in ('annotation', 'anchor')): if event == 'end' and element.tail: for fragment_id in open_fragments: - open_fragments[fragment_id].append('text', element.tail) + open_fragments[fragment_id].append( + 'text', element.tail + ) else: for fragment_id in open_fragments: - open_fragments[fragment_id].append(event, copy.copy(element)) + open_fragments[fragment_id].append( + event, copy.copy(element) + ) return closed_fragments, open_fragments -def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None): +def add_anchor(element, prefix, with_link=True, with_target=True, + link_text=None): parent = element.getparent() index = parent.index(element) @@ -234,8 +260,13 @@ def add_anchors(root): counter = 1 for element in root.iterdescendants(): def f(e): - return e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication', 'frame') or \ - e.get('id') == 'nota_red' or e.tag == 'blockquote' + return ( + e.get('class') in ( + 'note', 'motto', 'motto_podpis', 'dedication', 'frame' + ) + or e.get('id') == 'nota_red' + or e.tag == 'blockquote' + ) if any_ancestor(element, f): continue @@ -261,13 +292,19 @@ def add_table_of_contents(root): counter = 1 for element in root.iterdescendants(): if element.tag in ('h2', 'h3'): - if any_ancestor(element, - lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)): + if any_ancestor( + element, + lambda e: e.get('id') in ( + 'footnotes', 'nota_red' + ) or e.get('class') in ('person-list',)): continue element_text = raw_printable_text(element) - if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2': - sections[-1][3].append((counter, element.tag, element_text, [])) + if (element.tag == 'h3' and len(sections) + and sections[-1][1] == 'h2'): + sections[-1][3].append( + (counter, element.tag, element_text, []) + ) else: sections.append((counter, element.tag, element_text, [])) add_anchor(element, "s%d" % counter, with_link=False) @@ -281,17 +318,19 @@ def add_table_of_contents(root): for n, section, text, subsections in sections: section_element = etree.SubElement(toc_list, 'li') - add_anchor(section_element, "s%d" % n, with_target=False, link_text=text) + add_anchor(section_element, "s%d" % n, with_target=False, + link_text=text) if len(subsections): subsection_list = etree.SubElement(section_element, 'ol') for n1, subsection, subtext, _ in subsections: subsection_element = etree.SubElement(subsection_list, 'li') - add_anchor(subsection_element, "s%d" % n1, with_target=False, link_text=subtext) + add_anchor(subsection_element, "s%d" % n1, with_target=False, + link_text=subtext) root.insert(0, toc) - + def add_table_of_themes(root): try: from sortify import sortify @@ -341,8 +380,10 @@ def extract_annotations(html_path): footnote.text = None if len(footnote) and footnote[-1].tail == '\n': footnote[-1].tail = None - text_str = etree.tostring(footnote, method='text', encoding='unicode').strip() - html_str = etree.tostring(footnote, method='html', encoding='unicode').strip() + text_str = etree.tostring(footnote, method='text', + encoding='unicode').strip() + html_str = etree.tostring(footnote, method='html', + encoding='unicode').strip() match = re_qualifier.match(text_str) if match: diff --git a/src/librarian/mobi.py b/src/librarian/mobi.py index 6f1f5d6..a4eef5c 100644 --- a/src/librarian/mobi.py +++ b/src/librarian/mobi.py @@ -31,7 +31,8 @@ def transform(wldoc, verbose=False, sample=None, cover=None, epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True, cover=cover or True, flags=flags, - hyphenate=hyphenate, ilustr_path=ilustr_path, output_type='mobi') + hyphenate=hyphenate, ilustr_path=ilustr_path, + output_type='mobi') if verbose: kwargs = {} else: diff --git a/src/librarian/packagers.py b/src/librarian/packagers.py index b3f5548..0f7a42e 100644 --- a/src/librarian/packagers.py +++ b/src/librarian/packagers.py @@ -21,7 +21,8 @@ class Packager(object): return cls.converter.transform(*args, **kwargs) @classmethod - def prepare_file(cls, main_input, output_dir, verbose=False, overwrite=False): + def prepare_file(cls, main_input, output_dir, verbose=False, + overwrite=False): path, fname = os.path.realpath(main_input).rsplit('/', 1) provider = DirDocProvider(path) slug, ext = os.path.splitext(fname) @@ -37,7 +38,8 @@ class Packager(object): doc.save_output_file(output_file, output_path=outfile) @classmethod - def prepare(cls, input_filenames, output_dir='', verbose=False, overwrite=False): + def prepare(cls, input_filenames, output_dir='', verbose=False, + overwrite=False): try: for main_input in input_filenames: if verbose: diff --git a/src/librarian/parser.py b/src/librarian/parser.py index efe6e95..6cce0f7 100644 --- a/src/librarian/parser.py +++ b/src/librarian/parser.py @@ -23,7 +23,7 @@ class WLDocument(object): LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE) provider = None - def __init__(self, edoc, parse_dublincore=True, provider=None, + def __init__(self, edoc, parse_dublincore=True, provider=None, strict=False, meta_fallbacks=None): self.edoc = edoc self.provider = provider diff --git a/src/librarian/partners.py b/src/librarian/partners.py index 671cf4d..2c0682d 100644 --- a/src/librarian/partners.py +++ b/src/librarian/partners.py @@ -60,8 +60,11 @@ class Virtualo(packagers.Packager): from copy import deepcopy import os.path - xml = etree.fromstring(""" - """) + xml = etree.fromstring( + """ + """ + "" + ) product = etree.fromstring(""" @@ -96,18 +99,26 @@ class Virtualo(packagers.Packager): product_elem[1].text = cls.utf_trunc(info.title, 255) product_elem[2].text = cls.utf_trunc(info.description, 255) product_elem[3].text = cls.utf_trunc(info.source_name, 3000) - product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100) - product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100) + product_elem[4][0][0].text = cls.utf_trunc( + u' '.join(info.author.first_names), 100 + ) + product_elem[4][0][1].text = cls.utf_trunc( + info.author.last_name, 100 + ) xml.append(product_elem) - cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg')) + cover.VirtualoCover(info).save( + os.path.join(outfile_dir, slug+'.jpg') + ) outfile = os.path.join(outfile_dir, '1.epub') outfile_sample = os.path.join(outfile_dir, '1.sample.epub') doc.save_output_file(doc.as_epub(), output_path=outfile) - doc.save_output_file(doc.as_epub(doc, sample=25), output_path=outfile_sample) + doc.save_output_file(doc.as_epub(doc, sample=25), + output_path=outfile_sample) outfile = os.path.join(outfile_dir, '1.mobi') outfile_sample = os.path.join(outfile_dir, '1.sample.mobi') - doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), output_path=outfile) + doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), + output_path=outfile) doc.save_output_file( doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), output_path=outfile_sample) @@ -118,6 +129,12 @@ class Virtualo(packagers.Packager): 'message': e.message }) - xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w') - xml_file.write(etree.tostring(xml, pretty_print=True, encoding='unicode').encode('utf-8')) + with open(os.path.join( + output_dir, 'import_products.xml' + ), 'w') as xml_file: + xml_file.write( + etree.tostring( + xml, pretty_print=True, encoding='unicode' + ).encode('utf-8') + ) xml_file.close() diff --git a/src/librarian/pdf.py b/src/librarian/pdf.py index e6d897d..a51dbb5 100644 --- a/src/librarian/pdf.py +++ b/src/librarian/pdf.py @@ -54,7 +54,9 @@ STYLESHEETS = { def insert_tags(doc, split_re, tagname, exclude=None): - """ inserts for every occurence of `split_re' in text nodes in the `doc' tree + """ + Inserts for every occurence of `split_re' + in text nodes in the `doc' tree. >>> t = etree.fromstring('A-B-CX-Y-Z') >>> insert_tags(t, re.compile('-'), 'd') @@ -84,19 +86,21 @@ def insert_tags(doc, split_re, tagname, exclude=None): def substitute_hyphens(doc): - insert_tags(doc, - re.compile("(?<=[^-\s])-(?=[^-\s])"), - "dywiz", - exclude=[DCNS("identifier.url"), DCNS("rights.license"), "meta"] - ) + insert_tags( + doc, + re.compile(r"(?<=[^-\s])-(?=[^-\s])"), + "dywiz", + exclude=[DCNS("identifier.url"), DCNS("rights.license"), "meta"] + ) def fix_hanging(doc): - insert_tags(doc, - re.compile("(?<=\s\w)\s+"), - "nbsp", - exclude=[DCNS("identifier.url"), DCNS("rights.license")] - ) + insert_tags( + doc, + re.compile(r"(?<=\s\w)\s+"), + "nbsp", + exclude=[DCNS("identifier.url"), DCNS("rights.license")] + ) def fix_tables(doc): @@ -112,25 +116,37 @@ def fix_tables(doc): def mark_subauthors(doc): - root_author = ', '.join(elem.text for elem in doc.findall('./' + RDFNS('RDF') + '//' + DCNS('creator_parsed'))) + root_author = ', '.join( + elem.text + for elem in doc.findall( + './' + RDFNS('RDF') + '//' + DCNS('creator_parsed') + ) + ) last_author = None # jeśli autor jest inny niż autor całości i niż poprzedni autor # to wstawiamy jakiś znacznik w rdf? for subutwor in doc.xpath('/utwor/utwor'): - author = ', '.join(elem.text for elem in subutwor.findall('.//' + DCNS('creator_parsed'))) + author = ', '.join( + elem.text + for elem in subutwor.findall('.//' + DCNS('creator_parsed')) + ) if author not in (last_author, root_author): - subutwor.find('.//' + RDFNS('RDF')).append(etree.Element('use_subauthor')) + subutwor.find('.//' + RDFNS('RDF')).append( + etree.Element('use_subauthor') + ) last_author = author def move_motifs_inside(doc): """ moves motifs to be into block elements """ for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|' - '//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'): + '//dramat_wierszowany_l|//dramat_wierszowany_lp|' + '//dramat_wspolczesny'): for motif in master.xpath('motyw'): for sib in motif.itersiblings(): - if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia', - 'begin', 'end', 'motyw', 'extra', 'uwaga'): + if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', + 'separator_linia', 'begin', 'end', + 'motyw', 'extra', 'uwaga'): # motif shouldn't have a tail - it would be untagged text motif.tail = None motif.getparent().remove(motif) @@ -139,18 +155,21 @@ def move_motifs_inside(doc): def hack_motifs(doc): - """ dirty hack for the marginpar-creates-orphans LaTeX problem + """ + Dirty hack for the marginpar-creates-orphans LaTeX problem see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304 - moves motifs in stanzas from first verse to second - and from next to last to last, then inserts negative vspace before them + Moves motifs in stanzas from first verse to second and from next + to last to last, then inserts negative vspace before them. """ for motif in doc.findall('//strofa//motyw'): # find relevant verse-level tag verse, stanza = motif, motif.getparent() while stanza is not None and stanza.tag != 'strofa': verse, stanza = stanza, stanza.getparent() - breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True)) + breaks_before = sum( + 1 for i in verse.itersiblings('br', preceding=True) + ) breaks_after = sum(1 for i in verse.itersiblings('br')) if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1: move_by = 1 @@ -176,8 +195,11 @@ def parse_creator(doc): Finds all dc:creator and dc.contributor.translator tags and adds *_parsed versions with forenames first. """ - for person in doc.xpath("|".join('//dc:' + tag for tag in ('creator', 'contributor.translator')), - namespaces={'dc': str(DCNS)})[::-1]: + for person in doc.xpath( + "|".join('//dc:' + tag for tag in ( + 'creator', 'contributor.translator' + )), + namespaces={'dc': str(DCNS)})[::-1]: if not person.text: continue p = Person.from_text(person.text) @@ -193,7 +215,10 @@ def get_stylesheet(name): def package_available(package, args='', verbose=False): - """ check if a verion of a latex package accepting given args is available """ + """ + Check if a verion of a latex package accepting given args + is available. + """ tempdir = mkdtemp('-wl2pdf-test') fpath = os.path.join(tempdir, 'test.tex') f = open(fpath, 'w') @@ -207,13 +232,18 @@ def package_available(package, args='', verbose=False): if verbose: p = call(['xelatex', '-output-directory', tempdir, fpath]) else: - p = call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE) + p = call( + ['xelatex', '-interaction=batchmode', '-output-directory', + tempdir, fpath], + stdout=PIPE, stderr=PIPE + ) shutil.rmtree(tempdir) return p == 0 def transform(wldoc, verbose=False, save_tex=None, morefloats=None, - cover=None, flags=None, customizations=None, ilustr_path='', latex_dir=False): + cover=None, flags=None, customizations=None, ilustr_path='', + latex_dir=False): """ produces a PDF file with XeLaTeX wldoc: a WLDocument @@ -222,7 +252,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, morefloats (old/new/none): force specific morefloats cover: a cover.Cover factory or True for default flags: less-advertising, - customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class) + customizations: user requested customizations regarding various + formatting parameters (passed to wl LaTeX class) """ # Parse XSLT @@ -294,7 +325,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, shutil.copy(logo, os.path.join(temp, fname)) ins.set('src', fname) root.insert(0, ins) - + if book_info.sponsor_note: root.set("sponsor-note", book_info.sponsor_note) @@ -334,14 +365,18 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, if verbose: p = call(['xelatex', tex_path]) else: - p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE) + p = call( + ['xelatex', '-interaction=batchmode', tex_path], + stdout=PIPE, stderr=PIPE + ) if p: raise ParseError("Error parsing .tex file") if cwd is not None: os.chdir(cwd) - output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False) + output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', + delete=False) pdf_path = os.path.join(temp, 'doc.pdf') shutil.move(pdf_path, output_file.name) shutil.rmtree(temp) @@ -353,7 +388,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, def load_including_children(wldoc=None, provider=None, uri=None): """ Makes one big xml file with children inserted at end. - + Either wldoc or provider and URI must be provided. """ @@ -365,11 +400,14 @@ def load_including_children(wldoc=None, provider=None, uri=None): text = etree.tostring(wldoc.edoc, encoding='unicode') provider = wldoc.provider else: - raise ValueError('Neither a WLDocument, nor provider and URI were provided.') + raise ValueError( + 'Neither a WLDocument, nor provider and URI were provided.' + ) text = re.sub(r"([\u0400-\u04ff]+)", r"\1", text) - document = WLDocument.from_bytes(text.encode('utf-8'), parse_dublincore=True, provider=provider) + document = WLDocument.from_bytes(text.encode('utf-8'), + parse_dublincore=True, provider=provider) document.swap_endlines() for child_uri in document.book_info.parts: diff --git a/src/librarian/picture.py b/src/librarian/picture.py index 10d2ae7..eeb8e8e 100644 --- a/src/librarian/picture.py +++ b/src/librarian/picture.py @@ -14,7 +14,9 @@ import six class WLPictureURI(WLURI): - _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/(?P[-a-z0-9]+)/?$') + _re_wl_uri = re.compile( + 'http://wolnelektury.pl/katalog/obraz/(?P[-a-z0-9]+)/?$' + ) @classmethod def from_slug(cls, slug): @@ -34,15 +36,19 @@ class PictureInfo(WorkInfo): Field(DCNS('language'), 'language', required=False), Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True), Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True), - Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, required=False), - Field(DCNS('subject.style'), 'styles', salias='style', multiple=True, required=False), + Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, + required=False), + Field(DCNS('subject.style'), 'styles', salias='style', multiple=True, + required=False), Field(DCNS('format.dimensions'), 'dimensions', required=False), Field(DCNS('format.checksum.sha1'), 'sha1', required=True), Field(DCNS('description.medium'), 'medium', required=False), - Field(DCNS('description.dimensions'), 'original_dimensions', required=False), + Field(DCNS('description.dimensions'), 'original_dimensions', + required=False), Field(DCNS('format'), 'mime_type', required=False), - Field(DCNS('identifier.url'), 'url', WLPictureURI, strict=as_wlpictureuri_strict) + Field(DCNS('identifier.url'), 'url', WLPictureURI, + strict=as_wlpictureuri_strict) ) @@ -53,8 +59,9 @@ class ImageStore(object): MIME = ['image/gif', 'image/jpeg', 'image/png', 'application/x-shockwave-flash', 'image/psd', 'image/bmp', 'image/tiff', 'image/tiff', 'application/octet-stream', - 'image/jp2', 'application/octet-stream', 'application/octet-stream', - 'application/x-shockwave-flash', 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm'] + 'image/jp2', 'application/octet-stream', + 'application/octet-stream', 'application/x-shockwave-flash', + 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm'] def __init__(self, dir_): super(ImageStore, self).__init__() @@ -68,7 +75,10 @@ class ImageStore(object): try: i = self.MIME.index(mime_type) except ValueError: - err = ValueError("Picture %s has unknown mime type: %s" % (slug, mime_type)) + err = ValueError( + "Picture %s has unknown mime type: %s" + % (slug, mime_type) + ) err.slug = slug err.mime_type = mime_type raise err @@ -87,13 +97,18 @@ class WLPicture(object): dc_path = './/' + RDFNS('RDF') if root_elem.tag != 'picture': - raise ValidationError("Invalid root element. Found '%s', should be 'picture'" % root_elem.tag) + raise ValidationError( + "Invalid root element. Found '%s', should be 'picture'" + % root_elem.tag + ) if parse_dublincore: self.rdf_elem = root_elem.find(dc_path) if self.rdf_elem is None: - raise NoDublinCore('Document has no DublinCore - which is required.') + raise NoDublinCore( + "Document has no DublinCore - which is required." + ) self.picture_info = PictureInfo.from_element(self.rdf_elem) else: @@ -130,7 +145,8 @@ class WLPicture(object): parser = etree.XMLParser(remove_blank_text=False) tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser) - me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store) + me = cls(tree, parse_dublincore=parse_dublincore, + image_store=image_store) me.load_frame_info() return me except (ExpatError, XMLSyntaxError, XSLTApplyError) as e: @@ -139,7 +155,9 @@ class WLPicture(object): @property def mime_type(self): if self.picture_info is None: - raise ValueError('DC is not loaded, hence we don\'t know the image type') + raise ValueError( + "DC is not loaded, hence we don't know the image type." + ) return self.picture_info.mime_type @property @@ -173,7 +191,8 @@ class WLPicture(object): def partiter(self): """ - Iterates the parts of this picture and returns them and their metadata + Iterates the parts of this picture and returns them + and their metadata. """ # omg no support for //sem[(@type='theme') or (@type='object')] ? for part in list(self.edoc.iterfind("//sem[@type='theme']")) +\ @@ -190,13 +209,21 @@ class WLPicture(object): return x.decode('utf-8') else: return x - pd['object'] = part.attrib['type'] == 'object' and want_unicode(part.attrib.get('object', u'')) or None - pd['themes'] = part.attrib['type'] == 'theme' and [part.attrib.get('theme', u'')] or [] + pd['object'] = ( + part.attrib['type'] == 'object' + and want_unicode(part.attrib.get('object', u'')) + or None + ) + pd['themes'] = ( + part.attrib['type'] == 'theme' + and [part.attrib.get('theme', u'')] + or [] + ) yield pd def load_frame_info(self): k = self.edoc.find("//sem[@object='kadr']") - + if k is not None: clip = self.get_sem_coords(k) self.frame = clip diff --git a/src/librarian/text.py b/src/librarian/text.py index 7ba6d29..d0531a4 100644 --- a/src/librarian/text.py +++ b/src/librarian/text.py @@ -39,7 +39,8 @@ def transform(wldoc, flags=None, **options): possible flags: raw-text, """ # Parse XSLT - style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt') + style_filename = os.path.join(os.path.dirname(__file__), + 'xslt/book2txt.xslt') style = etree.parse(style_filename) document = copy.deepcopy(wldoc) @@ -59,31 +60,47 @@ def transform(wldoc, flags=None, **options): parsed_dc = document.book_info description = parsed_dc.description url = document.book_info.url - + license_description = parsed_dc.license_description license = parsed_dc.license if license: - license_description = u"Ten utwór jest udostępniony na licencji %s: \n%s" % ( - license_description, license) + license_description = ( + u"Ten utwór jest udostępniony na licencji %s: \n%s" % ( + license_description, license + ) + ) else: - license_description = u"Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się " \ - u"w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, " \ - u"publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi " \ - u"materiałami (przypisy, motywy literackie etc.), które podlegają prawu " \ - u"autorskiemu, to te dodatkowe materiały udostępnione są na licencji " \ - u"Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL " \ - u"(http://creativecommons.org/licenses/by-sa/3.0/)" + license_description = ( + "Ten utwór nie jest objęty majątkowym prawem autorskim " + "i znajduje się w domenie publicznej, co oznacza że " + "możesz go swobodnie wykorzystywać, publikować " + "i rozpowszechniać. Jeśli utwór opatrzony jest " + "dodatkowymi materiałami (przypisy, motywy literackie " + "etc.), które podlegają prawu autorskiemu, to te " + "dodatkowe materiały udostępnione są na licencji " + "Creative Commons Uznanie Autorstwa – Na Tych Samych " + "Warunkach 3.0 PL " + "(http://creativecommons.org/licenses/by-sa/3.0/)" + ) source = parsed_dc.source_name if source: source = "\n\nTekst opracowany na podstawie: " + source else: source = '' - - contributors = ', '.join(person.readable() for person in - sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p))) + + contributors = ', '.join( + person.readable() + for person in sorted(set( + p for p in ( + parsed_dc.technical_editors + parsed_dc.editors + ) if p)) + ) if contributors: - contributors = "\n\nOpracowanie redakcyjne i przypisy: %s." % contributors + contributors = ( + "\n\nOpracowanie redakcyjne i przypisy: %s." + % contributors + ) funders = ', '.join(parsed_dc.funders) if funders: funders = u"\n\nPublikację wsparli i wsparły: %s." % funders @@ -94,7 +111,8 @@ def transform(wldoc, flags=None, **options): else: isbn = '' else: - description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).' + description = ("Publikacja zrealizowana w ramach projektu " + "Wolne Lektury (http://wolnelektury.pl).") url = '*' * 10 license_description = "" source = "" -- 2.20.1