class Book2Anything(object):
"""A class for creating book2... scripts.
-
+
Subclass it for any format you want to convert to.
"""
format_name = None # Set format name, like "PDF".
uses_provider = False # Does it need a DocProvider?
transform = None # Transform method. Uses WLDocument.as_{ext} by default.
parser_options = [] # List of Option objects for additional parser args.
- transform_options = [] # List of Option objects for additional transform args.
- transform_flags = [] # List of Option objects for supported transform flags.
+ # List of Option objects for additional transform args.
+ transform_options = []
+ # List of Option objects for supported transform flags.
+ transform_flags = []
@classmethod
def run(cls):
parser = optparse.OptionParser(usage=usage)
- parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
- help='print status messages to stdout')
- parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False,
- help='create a directory for author and put the output file in it')
- parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE',
- help='specifies the output file')
- parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR',
- help='specifies the directory for output')
+ parser.add_option(
+ '-v', '--verbose', action='store_true', dest='verbose',
+ default=False, help='print status messages to stdout')
+ parser.add_option(
+ '-d', '--make-dir', action='store_true', dest='make_dir',
+ default=False,
+ help='create a directory for author and put the output file in it'
+ )
+ parser.add_option(
+ '-o', '--output-file', dest='output_file', metavar='FILE',
+ help='specifies the output file')
+ parser.add_option(
+ '-O', '--output-dir', dest='output_dir', metavar='DIR',
+ help='specifies the directory for output'
+ )
if cls.uses_cover:
if cls.cover_optional:
- parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False,
- help='create default cover')
- parser.add_option('-C', '--image-cache', dest='image_cache', metavar='URL',
- help='prefix for image download cache' +
- (' (implies --with-cover)' if cls.cover_optional else ''))
- for option in cls.parser_options + cls.transform_options + cls.transform_flags:
+ parser.add_option(
+ '-c', '--with-cover', action='store_true',
+ dest='with_cover', default=False,
+ help='create default cover'
+ )
+ parser.add_option(
+ '-C', '--image-cache', dest='image_cache', metavar='URL',
+ help='prefix for image download cache'
+ + (' (implies --with-cover)' if cls.cover_optional else '')
+ )
+ for option in (
+ cls.parser_options
+ + cls.transform_options
+ + cls.transform_flags):
option.add(parser)
options, input_filenames = parser.parse_args()
for option in cls.transform_options:
transform_args[option.name()] = option.value(options)
# Add flags to transform_args, if any.
- transform_flags = [flag.name() for flag in cls.transform_flags if flag.value(options)]
+ transform_flags = [
+ flag.name()
+ for flag in cls.transform_flags
+ if flag.value(options)
+ ]
if transform_flags:
transform_args['flags'] = transform_flags
if options.verbose:
if cls.uses_cover:
if options.image_cache:
def cover_class(book_info, *args, **kwargs):
- return make_cover(book_info, image_cache=options.image_cache, *args, **kwargs)
+ return make_cover(
+ book_info, image_cache=options.image_cache,
+ *args, **kwargs
+ )
transform_args['cover'] = cover_class
elif not cls.cover_optional or options.with_cover:
transform_args['cover'] = make_cover
output_file = options.output_file
# Do the transformation.
- doc = WLDocument.from_file(main_input, provider=provider, **parser_args)
+ doc = WLDocument.from_file(main_input, provider=provider,
+ **parser_args)
transform = cls.transform
if transform is None:
transform = getattr(WLDocument, 'as_%s' % cls.ext)
output = transform(doc, **transform_args)
- doc.save_output_file(output, output_file, options.output_dir, options.make_dir, cls.ext)
+ doc.save_output_file(output, output_file, options.output_dir,
+ options.make_dir, cls.ext)
except ParseError as e:
print('%(file)s:%(name)s:%(message)s' % {
self.format = format
if width and height:
self.height = int(round(height * self.width / width))
- scale = max(float(width or 0) / self.width, float(height or 0) / self.height)
+ scale = max(float(width or 0) / self.width,
+ float(height or 0) / self.height)
if scale >= 1:
self.scale = scale
elif scale:
def image(self):
metr = Metric(self, self.scale)
- img = Image.new('RGB', (metr.width, metr.height), self.background_color)
+ img = Image.new('RGB', (metr.width, metr.height),
+ self.background_color)
if self.background_img:
background = Image.open(self.background_img)
# WL logo
if metr.logo_width:
logo = Image.open(get_resource('res/wl-logo.png'))
- logo = logo.resize((metr.logo_width, int(round(logo.size[1] * metr.logo_width / logo.size[0]))))
- img.paste(logo, ((metr.width - metr.logo_width) // 2, img.size[1] - logo.size[1] - metr.logo_bottom))
+ logo = logo.resize((
+ metr.logo_width,
+ int(round(logo.size[1] * metr.logo_width / logo.size[0]))
+ ))
+ img.paste(logo, (
+ (metr.width - metr.logo_width) // 2,
+ img.size[1] - logo.size[1] - metr.logo_bottom
+ ))
top = metr.author_top
tbox = TextBox(
metr.width - metr.author_margin_left - metr.author_margin_right,
metr.height - top,
)
-
+
author_font = ImageFont.truetype(
self.author_font_ttf, metr.author_font_size)
for pa in self.pretty_authors():
- tbox.text(pa, self.author_color, author_font, metr.author_lineskip, self.author_shadow)
+ tbox.text(pa, self.author_color, author_font, metr.author_lineskip,
+ self.author_shadow)
text_img = tbox.image()
img.paste(text_img, (metr.author_margin_left, top), text_img)
)
title_font = ImageFont.truetype(
self.title_font_ttf, metr.title_font_size)
- tbox.text(self.pretty_title(), self.title_color, title_font, metr.title_lineskip, self.title_shadow)
+ tbox.text(self.pretty_title(), self.title_color, title_font,
+ metr.title_lineskip, self.title_shadow)
text_img = tbox.image()
img.paste(text_img, (metr.title_margin_left, top), text_img)
u'Epika': 'bottom',
}
- def __init__(self, book_info, format=None, width=None, height=None, bleed=0):
- super(WLCover, self).__init__(book_info, format=format, width=width, height=height)
+ def __init__(self, book_info, format=None, width=None, height=None,
+ bleed=0):
+ super(WLCover, self).__init__(book_info, format=format, width=width,
+ height=height)
# Set box position.
self.box_position = book_info.cover_box_position or \
self.kind_box_position.get(book_info.kind, self.box_position)
self.bar_color = book_info.cover_bar_color or \
self.epoch_colors.get(book_info.epoch, self.bar_color)
# Set title color.
- self.title_color = self.epoch_colors.get(book_info.epoch, self.title_color)
+ self.title_color = self.epoch_colors.get(book_info.epoch,
+ self.title_color)
self.bleed = bleed
self.box_top_margin += bleed
metr = Metric(self, self.scale)
# Write author name.
- box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y)
+ box = TextBox(metr.title_box_width, metr.height,
+ padding_y=metr.box_padding_y)
author_font = ImageFont.truetype(
self.author_font_ttf, metr.author_font_size)
for pa in self.pretty_authors():
- box.text(pa,
- font=author_font,
- line_height=metr.author_lineskip,
- color=self.author_color,
- shadow_color=self.author_shadow)
+ box.text(pa, font=author_font, line_height=metr.author_lineskip,
+ color=self.author_color, shadow_color=self.author_shadow)
box.skip(metr.box_above_line)
- box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height),
- fill=self.author_color, width=metr.box_line_width)
+ box.draw.line(
+ (metr.box_line_left, box.height, metr.box_line_right, box.height),
+ fill=self.author_color, width=metr.box_line_width
+ )
box.skip(metr.box_below_line)
# Write title.
else: # Middle.
box_top = (metr.height - box_img.size[1]) // 2
- box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) // 2
+ box_left = metr.bar_width + (
+ metr.width - metr.bar_width - box_img.size[0]
+ ) // 2
# Draw the white box.
ImageDraw.Draw(img).rectangle(
- (box_left, box_top, box_left + box_img.size[0], box_top + box_img.size[1]), fill='#fff')
+ (
+ box_left,
+ box_top,
+ box_left + box_img.size[0],
+ box_top + box_img.size[1]
+ ),
+ fill='#fff'
+ )
# Paste the contents into the white box.
img.paste(box_img, (box_left, box_top), box_img)
return img
return img
metr = Metric(self, self.scale)
draw = ImageDraw.Draw(img)
- for corner_x, corner_y in ((0, 0), (metr.width, 0), (0, metr.height), (metr.width, metr.height)):
+ for corner_x, corner_y in (
+ (0, 0), (metr.width, 0),
+ (0, metr.height), (metr.width, metr.height)
+ ):
dir_x = 1 if corner_x == 0 else -1
dir_y = 1 if corner_y == 0 else -1
for offset in (-1, 0, 1):
- draw.line((corner_x, corner_y + dir_y * metr.bleed + offset,
- corner_x + dir_x * metr.bleed * line_ratio, corner_y + dir_y * metr.bleed + offset),
- fill='black' if offset == 0 else 'white', width=1)
- draw.line((corner_x + dir_x * metr.bleed + offset, corner_y,
- corner_x + dir_x * metr.bleed + offset, corner_y + dir_y * metr.bleed * line_ratio),
- fill='black' if offset == 0 else 'white', width=1)
+ draw.line(
+ (
+ corner_x,
+ corner_y + dir_y * metr.bleed + offset,
+ corner_x + dir_x * metr.bleed * line_ratio,
+ corner_y + dir_y * metr.bleed + offset
+ ),
+ fill='black' if offset == 0 else 'white',
+ width=1
+ )
+ draw.line(
+ (
+ corner_x + dir_x * metr.bleed + offset,
+ corner_y,
+ corner_x + dir_x * metr.bleed + offset,
+ corner_y + dir_y * metr.bleed * line_ratio
+ ),
+ fill='black' if offset == 0 else 'white',
+ width=1
+ )
return img
def image(self):
metr = Metric(self, self.scale)
- img = Image.new('RGB', (metr.width, metr.height), self.background_color)
+ img = Image.new('RGB', (metr.width, metr.height),
+ self.background_color)
draw = ImageDraw.Draw(img)
- draw.rectangle((0, 0, metr.bar_width, metr.height), fill=self.bar_color)
+ draw.rectangle((0, 0, metr.bar_width, metr.height),
+ fill=self.bar_color)
if self.background_img:
src = Image.open(self.background_img)
def image(self):
img = super(LogoWLCover, self).image()
metr = Metric(self, self.scale)
- gradient = Image.new('RGBA', (metr.width - metr.bar_width, metr.gradient_height), self.gradient_color)
- gradient_mask = Image.new('L', (metr.width - metr.bar_width, metr.gradient_height))
+ gradient = Image.new(
+ 'RGBA',
+ (metr.width - metr.bar_width, metr.gradient_height),
+ self.gradient_color
+ )
+ gradient_mask = Image.new(
+ 'L',
+ (metr.width - metr.bar_width, metr.gradient_height)
+ )
draw = ImageDraw.Draw(gradient_mask)
for line in range(0, metr.gradient_height):
draw.line(
(0, line, metr.width - metr.bar_width, line),
- fill=int(255 * self.gradient_opacity * line / metr.gradient_height))
- img.paste(gradient, (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask)
+ fill=int(
+ 255 * self.gradient_opacity * line / metr.gradient_height
+ )
+ )
+ img.paste(gradient,
+ (metr.bar_width, metr.height - metr.gradient_height),
+ mask=gradient_mask)
cursor = metr.width - metr.gradient_logo_margin_right
- logo_top = int(metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2 - metr.bleed / 2)
-
- logos = [get_resource(logo_path) for logo_path in self.gradient_logos[::-1]]
+ logo_top = int(
+ metr.height
+ - metr.gradient_height / 2
+ - metr.gradient_logo_height / 2 - metr.bleed / 2
+ )
+
+ logos = [
+ get_resource(logo_path)
+ for logo_path in self.gradient_logos[::-1]
+ ]
logos = logos + self.additional_cover_logos
- logos = [Image.open(logo_bytes).convert('RGBA') for logo_bytes in logos]
+ logos = [
+ Image.open(logo_bytes).convert('RGBA')
+ for logo_bytes in logos
+ ]
# See if logos fit into the gradient. If not, scale down accordingly.
- space_for_logos = metr.width - metr.bar_width - 2 * metr.gradient_logo_margin_right
- widths = [logo.size[0] * metr.gradient_logo_height / logo.size[1] for logo in logos]
- taken_space = sum(widths) + (len(logos) - 1) * (metr.gradient_logo_spacing)
- logo_scale = space_for_logos / taken_space if taken_space > space_for_logos else 1
+ space_for_logos = (
+ metr.width
+ - metr.bar_width
+ - 2 * metr.gradient_logo_margin_right
+ )
+ widths = [
+ logo.size[0] * metr.gradient_logo_height / logo.size[1]
+ for logo in logos]
+ taken_space = (
+ sum(widths)
+ + (len(logos) - 1) * (metr.gradient_logo_spacing)
+ )
+ logo_scale = (
+ space_for_logos / taken_space
+ if taken_space > space_for_logos else 1
+ )
logo_top += int(metr.gradient_logo_height * (1 - logo_scale) / 2)
for i, logo in enumerate(logos):
surname = parts[0]
names = []
elif len(parts) != 2:
- raise ValueError("Invalid person name. There should be at most one comma: \"%s\"." % text.encode('utf-8'))
+ raise ValueError(
+ "Invalid person name. "
+ "There should be at most one comma: \"%s\"."
+ % text.encode('utf-8')
+ )
else:
surname = parts[0]
if len(parts[1]) == 0:
# there is no non-whitespace data after the comma
- raise ValueError("Found a comma, but no names given: \"%s\" -> %r." % (text, parts))
+ raise ValueError(
+ "Found a comma, but no names given: \"%s\" -> %r."
+ % (text, parts)
+ )
names = parts[1].split()
return cls(surname, *names)
return u" ".join(self.first_names + (self.last_name,))
def __eq__(self, right):
- return self.last_name == right.last_name and self.first_names == right.first_names
+ return (self.last_name == right.last_name
+ and self.first_names == right.first_names)
def __lt__(self, other):
- return (self.last_name, self.first_names) < (other.last_name, other.first_names)
+ return ((self.last_name, self.first_names)
+ < (other.last_name, other.first_names))
def __hash__(self):
return hash((self.last_name, self.first_names))
return self.last_name
def __repr__(self):
- return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names)
+ return 'Person(last_name=%r, first_names=*%r)' % (
+ self.last_name, self.first_names
+ )
def as_date(text):
- """Dates for digitization of pictures. It seems we need the following:
-ranges: '1350-1450',
-centuries: "XVIII w.'
-half centuries/decades: '2 poł. XVIII w.', 'XVII w., l. 20'
-later-then: 'po 1450'
-circa 'ok. 1813-1814', 'ok.1876-ok.1886
-turn: 1893/1894
-for now we will translate this to some single date losing information of course.
+ """
+ Dates for digitization of pictures. It seems we need the following:
+ ranges: '1350-1450',
+ centuries: "XVIII w.'
+ half centuries/decades: '2 poł. XVIII w.', 'XVII w., l. 20'
+ later-then: 'po 1450'
+ circa 'ok. 1813-1814', 'ok.1876-ok.1886
+ turn: 1893/1894
+
+ For now we will translate this to some single date
+ losing information of course.
"""
try:
# check out the "N. poł X w." syntax
if isinstance(text, six.binary_type):
text = text.decode("utf-8")
- century_format = u"(?:([12]) *poł[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?"
+ century_format = (
+ u"(?:([12]) *poł[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?"
+ )
vague_format = u"(?:po *|ok. *)?([0-9]{4})(-[0-9]{2}-[0-9]{2})?"
m = re.match(century_format, text)
century = roman_to_int(m.group(2))
if half is not None:
if decade is not None:
- raise ValueError("Bad date format. Cannot specify both half and decade of century")
+ raise ValueError(
+ "Bad date format. "
+ "Cannot specify both half and decade of century."
+ )
half = int(half)
t = ((century*100 + (half-1)*50), 1, 1)
else:
raise ValueError
return DatePlus(t[0], t[1], t[2])
- except ValueError as e:
+ except ValueError:
raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
class Field(object):
- def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs):
+ def __init__(self, uri, attr_name, validator=as_unicode, strict=None,
+ multiple=False, salias=None, **kwargs):
self.uri = uri
self.name = attr_name
self.validator = validator
self.multiple = multiple
self.salias = salias
- self.required = kwargs.get('required', True) and 'default' not in kwargs
+ self.required = (kwargs.get('required', True)
+ and 'default' not in kwargs)
self.default = kwargs.get('default', [] if multiple else [None])
def validate_value(self, val, strict=False):
new_values.append(nv)
return new_values
elif len(val) > 1:
- raise ValidationError("Multiple values not allowed for field '%s'" % self.uri)
+ raise ValidationError(
+ "Multiple values not allowed for field '%s'" % self.uri
+ )
elif len(val) == 0:
- raise ValidationError("Field %s has no value to assign. Check your defaults." % self.uri)
+ raise ValidationError(
+ "Field %s has no value to assign. Check your defaults."
+ % self.uri
+ )
else:
if validator is None or val[0] is None:
return val[0]
setattr(nv, 'lang', val[0].lang)
return nv
except ValueError as e:
- raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
+ raise ValidationError(
+ "Field '%s' - invald value: %s"
+ % (self.uri, e.message)
+ )
def validate(self, fdict, fallbacks=None, strict=False):
if fallbacks is None:
class WorkInfo(six.with_metaclass(DCInfo, object)):
FIELDS = (
- Field(DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
+ Field(DCNS('creator'), 'authors', as_person, salias='author',
+ multiple=True),
Field(DCNS('title'), 'title'),
Field(DCNS('type'), 'type', required=False, multiple=True),
Field(DCNS('contributor.editor'), 'editors',
as_person, salias='editor', multiple=True, required=False),
Field(DCNS('contributor.technical_editor'), 'technical_editors',
- as_person, salias='technical_editor', multiple=True, required=False),
- Field(DCNS('contributor.funding'), 'funders', salias='funder', multiple=True, required=False),
+ as_person, salias='technical_editor', multiple=True,
+ required=False),
+ Field(DCNS('contributor.funding'), 'funders', salias='funder',
+ multiple=True, required=False),
Field(DCNS('contributor.thanks'), 'thanks', required=False),
Field(DCNS('date'), 'created_at'),
- Field(DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
+ Field(DCNS('date.pd'), 'released_to_public_domain_at', as_date,
+ required=False),
Field(DCNS('publisher'), 'publisher', multiple=True),
Field(DCNS('language'), 'language'),
Field(DCNS('description'), 'description', required=False),
Field(DCNS('source'), 'source_name', required=False),
- Field(DCNS('source.URL'), 'source_urls', salias='source_url', multiple=True, required=False),
+ Field(DCNS('source.URL'), 'source_urls', salias='source_url',
+ multiple=True, required=False),
Field(DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
Field(DCNS('rights.license'), 'license', required=False),
Field(DCNS('rights'), 'license_description'),
- Field(PLMETNS('digitisationSponsor'), 'sponsors', multiple=True, required=False),
+ Field(PLMETNS('digitisationSponsor'), 'sponsors', multiple=True,
+ required=False),
Field(WLNS('digitisationSponsorNote'), 'sponsor_note', required=False),
Field(WLNS('developmentStage'), 'stage', required=False),
)
@classmethod
def from_element(cls, rdf_tag, *args, **kwargs):
- # the tree is already parsed, so we don't need to worry about Expat errors
+ # The tree is already parsed,
+ # so we don't need to worry about Expat errors.
field_dict = {}
desc = rdf_tag.find(".//" + RDFNS('Description'))
if desc is None:
- raise NoDublinCore("There must be a '%s' element inside the RDF." % RDFNS('Description'))
+ raise NoDublinCore(
+ "There must be a '%s' element inside the RDF."
+ % RDFNS('Description')
+ )
lang = None
p = desc
return cls(desc.attrib, field_dict, *args, **kwargs)
def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False):
- """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description.
- dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the
- given field. """
+ """
+ rdf_attrs should be a dictionary-like object with any attributes
+ of the RDF:Description.
+ dc_fields - dictionary mapping DC fields (with namespace) to
+ list of text values for the given field.
+ """
self.about = rdf_attrs.get(RDFNS('about'))
self.fmap = {}
for field in self.FIELDS:
- value = field.validate(dc_fields, fallbacks=fallbacks, strict=strict)
+ value = field.validate(dc_fields, fallbacks=fallbacks,
+ strict=strict)
setattr(self, 'prop_' + field.name, value)
self.fmap[field.name] = field
if field.salias:
return object.__setattr__(self, name, newvalue)
def update(self, field_dict):
- """Update using field_dict. Verify correctness, but don't check if all
- required fields are present."""
+ """
+ Update using field_dict. Verify correctness, but don't check
+ if all required fields are present.
+ """
for field in self.FIELDS:
if field.name in field_dict:
setattr(self, field.name, field_dict[field.name])
class BookInfo(WorkInfo):
FIELDS = (
- Field(DCNS('audience'), 'audiences', salias='audience', multiple=True, required=False),
+ Field(DCNS('audience'), 'audiences', salias='audience', multiple=True,
+ required=False),
- Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, required=False),
- Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True, required=False),
- Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, required=False),
+ Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True,
+ required=False),
+ Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True,
+ required=False),
+ Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
+ required=False),
Field(DCNS('subject.location'), 'location', required=False),
Field(DCNS('contributor.translator'), 'translators',
as_person, salias='translator', multiple=True, required=False),
- Field(DCNS('relation.hasPart'), 'parts', WLURI, strict=as_wluri_strict, multiple=True, required=False),
- Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI, strict=as_wluri_strict, required=False),
+ Field(DCNS('relation.hasPart'), 'parts', WLURI, strict=as_wluri_strict,
+ multiple=True, required=False),
+ Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI,
+ strict=as_wluri_strict, required=False),
Field(DCNS('relation.coverImage.url'), 'cover_url', required=False),
- Field(DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
- Field(DCNS('relation.coverImage.source'), 'cover_source', required=False),
+ Field(DCNS('relation.coverImage.attribution'), 'cover_by',
+ required=False),
+ Field(DCNS('relation.coverImage.source'), 'cover_source',
+ required=False),
# WLCover-specific.
Field(WLNS('coverBarColor'), 'cover_bar_color', required=False),
Field(WLNS('coverBoxPosition'), 'cover_box_position', required=False),
Field(WLNS('coverClass'), 'cover_class', default=['default']),
- Field(WLNS('coverLogoUrl'), 'cover_logo_urls', multiple=True, required=False),
+ Field(WLNS('coverLogoUrl'), 'cover_logo_urls', multiple=True,
+ required=False),
Field('pdf-id', 'isbn_pdf', required=False),
Field('epub-id', 'isbn_epub', required=False),
'application/x-latex': 'librarian.embeds.latex.LaTeX',
}
+
class Embed():
@classmethod
def transforms_to(cls, mime_types, downgrade=False):
return matches
def transform_to(self, mime_type, downgrade=False):
- for name, method in type(cls).__dict__.iteritems():
+ for name, method in type(self).__dict__.iteritems():
if hasattr(method, "embed_converts_to"):
conv_type, conv_downgrade = method.embed_converts_to
if downgrade == conv_downgrade and conv_type == mime_type:
def __init__(self, data=None):
self.data = data
+
class TreeEmbed(Embed):
def __init__(self, tree=None):
if isinstance(tree, etree._Element):
tree = etree.ElementTree(tree)
self.tree = tree
+
def converts_to(mime_type, downgrade=False):
def decorator(method):
method.embed_converts_to = mime_type, downgrade
return method
return decorator
+
def downgrades_to(mime_type):
return converts_to(mime_type, True)
+
def create_embed(mime_type, tree=None, data=None):
embed = known_types.get(mime_type)
if embed is None:
class LaTeX(DataEmbed):
@downgrades_to('image/png')
def to_png(self):
- tmpl = open(get_resource('res/embeds/latex/template.tex'), 'rb').read().decode('utf-8')
+ with open(get_resource('res/embeds/latex/template.tex'), 'rb') as f:
+ tmpl = f.read().decode('utf-8')
tempdir = mkdtemp('-librarian-embed-latex')
fpath = os.path.join(tempdir, 'doc.tex')
with open(fpath, 'wb') as f:
f.write((tmpl % {'code': self.data}).encode('utf-8'))
- call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
- call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim',
- os.path.join(tempdir, 'doc.png')])
+ call(['xelatex', '-interaction=batchmode', '-output-directory',
+ tempdir, fpath], stdout=PIPE, stderr=PIPE)
+ call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'),
+ '-trim', os.path.join(tempdir, 'doc.png')])
pngdata = open(os.path.join(tempdir, 'doc.png'), 'rb').read()
shutil.rmtree(tempdir)
return create_embed('image/png', data=pngdata)
@downgrades_to('application/x-latex')
def to_latex(self):
"""
- >>> print(MathML(etree.fromstring('<mat>a < b</mat>')).to_latex().data.strip())
+ >>> print(MathML(etree.fromstring(
+ '<mat>a < b</mat>'
+ )).to_latex().data.strip())
a < b
- >>> print(MathML(etree.fromstring('<mat>< & &lt; A</mat>')).to_latex().data.strip())
+ >>> print(MathML(etree.fromstring(
+ '<mat>< & &lt; A</mat>'
+ )).to_latex().data.strip())
< & < A
"""
xslt = etree.parse(get_resource('res/embeds/mathml/mathml2latex.xslt'))
output = self.tree.xslt(xslt)
text = six.text_type(output)
- # Workaround for entities being preserved in output. But there should be a better way.
+ # Workaround for entities being preserved in output.
+ # But there should be a better way.
text = text.replace('<', '<').replace('&', '&')
return create_embed('application/x-latex', data=text)
def reg_wrap_words():
def wrap_words(context, text, wrapping):
- """XPath extension function automatically wrapping words in passed text"""
+ """
+ XPath extension function automatically wrapping words
+ in passed text.
+ """
if isinstance(text, list):
text = ''.join(text)
if not wrapping:
def html_has_content(text):
- return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
+ return etree.ETXPath(
+ '//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)}
+ )(text)
def transform_abstrakt(abstrakt_element):
style_filename = get_stylesheet('legacy')
style = etree.parse(style_filename)
xml = etree.tostring(abstrakt_element, encoding='unicode')
- document = etree.parse(six.StringIO(xml.replace('abstrakt', 'dlugi_cytat'))) # HACK
+ document = etree.parse(six.StringIO(
+ xml.replace('abstrakt', 'dlugi_cytat')
+ )) # HACK
result = document.xslt(style)
- html = re.sub('<a name="sec[0-9]*"/>', '', etree.tostring(result, encoding='unicode'))
+ html = re.sub('<a name="sec[0-9]*"/>', '',
+ etree.tostring(result, encoding='unicode'))
return re.sub('</?blockquote[^>]*>', '', html)
options = {}
options.setdefault('gallery', "''")
- css = css or 'https://static.wolnelektury.pl/css/compressed/book_text.css'
+ css = (
+ css
+ or 'https://static.wolnelektury.pl/css/compressed/book_text.css'
+ )
css = "'%s'" % css
result = document.transform(style, css=css, **options)
del document # no longer needed large object :)
add_table_of_contents(result.getroot())
return OutputFile.from_bytes(etree.tostring(
- result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8'))
+ result, method='html', xml_declaration=False,
+ pretty_print=True, encoding='utf-8'
+ ))
else:
return None
except KeyError:
for event, element in self.closed_events():
if event == 'start':
result.append(u'<%s %s>' % (
- element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
+ element.tag,
+ ' '.join(
+ '%s="%s"' % (k, v)
+ for k, v in element.attrib.items()
+ )
+ ))
if element.text:
result.append(element.text)
elif event == 'end':
# iterparse would die on a HTML document
parser = etree.HTMLParser(encoding='utf-8')
buf = six.BytesIO()
- buf.write(etree.tostring(etree.parse(input_filename, parser).getroot()[0][0], encoding='utf-8'))
+ buf.write(etree.tostring(
+ etree.parse(input_filename, parser).getroot()[0][0],
+ encoding='utf-8'
+ ))
buf.seek(0)
for event, element in etree.iterparse(buf, events=('start', 'end')):
try:
fragment = open_fragments[element.get('fid')]
except KeyError:
- print('%s:closed not open fragment #%s' % (input_filename, element.get('fid')))
+ print('%s:closed not open fragment #%s' % (
+ input_filename, element.get('fid')
+ ))
else:
closed_fragments[fragment.id] = fragment
del open_fragments[fragment.id]
- # Append element tail to lost_text (we don't want to lose any text)
+ # Append element tail to lost_text
+ # (we don't want to lose any text)
if element.tail:
for fragment_id in open_fragments:
open_fragments[fragment_id].append('text', element.tail)
# Process all elements except begin and end
else:
# Omit annotation tags
- if (len(element.get('name', '')) or
+ if (len(element.get('name', '')) or
element.get('class', '') in ('annotation', 'anchor')):
if event == 'end' and element.tail:
for fragment_id in open_fragments:
- open_fragments[fragment_id].append('text', element.tail)
+ open_fragments[fragment_id].append(
+ 'text', element.tail
+ )
else:
for fragment_id in open_fragments:
- open_fragments[fragment_id].append(event, copy.copy(element))
+ open_fragments[fragment_id].append(
+ event, copy.copy(element)
+ )
return closed_fragments, open_fragments
-def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None):
+def add_anchor(element, prefix, with_link=True, with_target=True,
+ link_text=None):
parent = element.getparent()
index = parent.index(element)
counter = 1
for element in root.iterdescendants():
def f(e):
- return e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication', 'frame') or \
- e.get('id') == 'nota_red' or e.tag == 'blockquote'
+ return (
+ e.get('class') in (
+ 'note', 'motto', 'motto_podpis', 'dedication', 'frame'
+ )
+ or e.get('id') == 'nota_red'
+ or e.tag == 'blockquote'
+ )
if any_ancestor(element, f):
continue
counter = 1
for element in root.iterdescendants():
if element.tag in ('h2', 'h3'):
- if any_ancestor(element,
- lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)):
+ if any_ancestor(
+ element,
+ lambda e: e.get('id') in (
+ 'footnotes', 'nota_red'
+ ) or e.get('class') in ('person-list',)):
continue
element_text = raw_printable_text(element)
- if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2':
- sections[-1][3].append((counter, element.tag, element_text, []))
+ if (element.tag == 'h3' and len(sections)
+ and sections[-1][1] == 'h2'):
+ sections[-1][3].append(
+ (counter, element.tag, element_text, [])
+ )
else:
sections.append((counter, element.tag, element_text, []))
add_anchor(element, "s%d" % counter, with_link=False)
for n, section, text, subsections in sections:
section_element = etree.SubElement(toc_list, 'li')
- add_anchor(section_element, "s%d" % n, with_target=False, link_text=text)
+ add_anchor(section_element, "s%d" % n, with_target=False,
+ link_text=text)
if len(subsections):
subsection_list = etree.SubElement(section_element, 'ol')
for n1, subsection, subtext, _ in subsections:
subsection_element = etree.SubElement(subsection_list, 'li')
- add_anchor(subsection_element, "s%d" % n1, with_target=False, link_text=subtext)
+ add_anchor(subsection_element, "s%d" % n1, with_target=False,
+ link_text=subtext)
root.insert(0, toc)
-
+
def add_table_of_themes(root):
try:
from sortify import sortify
footnote.text = None
if len(footnote) and footnote[-1].tail == '\n':
footnote[-1].tail = None
- text_str = etree.tostring(footnote, method='text', encoding='unicode').strip()
- html_str = etree.tostring(footnote, method='html', encoding='unicode').strip()
+ text_str = etree.tostring(footnote, method='text',
+ encoding='unicode').strip()
+ html_str = etree.tostring(footnote, method='html',
+ encoding='unicode').strip()
match = re_qualifier.match(text_str)
if match:
epub = document.as_epub(verbose=verbose, sample=sample,
html_toc=True, cover=cover or True, flags=flags,
- hyphenate=hyphenate, ilustr_path=ilustr_path, output_type='mobi')
+ hyphenate=hyphenate, ilustr_path=ilustr_path,
+ output_type='mobi')
if verbose:
kwargs = {}
else:
return cls.converter.transform(*args, **kwargs)
@classmethod
- def prepare_file(cls, main_input, output_dir, verbose=False, overwrite=False):
+ def prepare_file(cls, main_input, output_dir, verbose=False,
+ overwrite=False):
path, fname = os.path.realpath(main_input).rsplit('/', 1)
provider = DirDocProvider(path)
slug, ext = os.path.splitext(fname)
doc.save_output_file(output_file, output_path=outfile)
@classmethod
- def prepare(cls, input_filenames, output_dir='', verbose=False, overwrite=False):
+ def prepare(cls, input_filenames, output_dir='', verbose=False,
+ overwrite=False):
try:
for main_input in input_filenames:
if verbose:
LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
provider = None
- def __init__(self, edoc, parse_dublincore=True, provider=None,
+ def __init__(self, edoc, parse_dublincore=True, provider=None,
strict=False, meta_fallbacks=None):
self.edoc = edoc
self.provider = provider
from copy import deepcopy
import os.path
- xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
- <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
+ xml = etree.fromstring(
+ """<?xml version="1.0" encoding="utf-8"?>
+ <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">"""
+ "</products>"
+ )
product = etree.fromstring("""<product>
<publisherProductId></publisherProductId>
<title></title>
product_elem[1].text = cls.utf_trunc(info.title, 255)
product_elem[2].text = cls.utf_trunc(info.description, 255)
product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
- product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
- product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
+ product_elem[4][0][0].text = cls.utf_trunc(
+ u' '.join(info.author.first_names), 100
+ )
+ product_elem[4][0][1].text = cls.utf_trunc(
+ info.author.last_name, 100
+ )
xml.append(product_elem)
- cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
+ cover.VirtualoCover(info).save(
+ os.path.join(outfile_dir, slug+'.jpg')
+ )
outfile = os.path.join(outfile_dir, '1.epub')
outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
doc.save_output_file(doc.as_epub(), output_path=outfile)
- doc.save_output_file(doc.as_epub(doc, sample=25), output_path=outfile_sample)
+ doc.save_output_file(doc.as_epub(doc, sample=25),
+ output_path=outfile_sample)
outfile = os.path.join(outfile_dir, '1.mobi')
outfile_sample = os.path.join(outfile_dir, '1.sample.mobi')
- doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), output_path=outfile)
+ doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover),
+ output_path=outfile)
doc.save_output_file(
doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
output_path=outfile_sample)
'message': e.message
})
- xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
- xml_file.write(etree.tostring(xml, pretty_print=True, encoding='unicode').encode('utf-8'))
+ with open(os.path.join(
+ output_dir, 'import_products.xml'
+ ), 'w') as xml_file:
+ xml_file.write(
+ etree.tostring(
+ xml, pretty_print=True, encoding='unicode'
+ ).encode('utf-8')
+ )
xml_file.close()
def insert_tags(doc, split_re, tagname, exclude=None):
- """ inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
+ """
+ Inserts <tagname> for every occurence of `split_re'
+ in text nodes in the `doc' tree.
>>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>')
>>> insert_tags(t, re.compile('-'), 'd')
def substitute_hyphens(doc):
- insert_tags(doc,
- re.compile("(?<=[^-\s])-(?=[^-\s])"),
- "dywiz",
- exclude=[DCNS("identifier.url"), DCNS("rights.license"), "meta"]
- )
+ insert_tags(
+ doc,
+ re.compile(r"(?<=[^-\s])-(?=[^-\s])"),
+ "dywiz",
+ exclude=[DCNS("identifier.url"), DCNS("rights.license"), "meta"]
+ )
def fix_hanging(doc):
- insert_tags(doc,
- re.compile("(?<=\s\w)\s+"),
- "nbsp",
- exclude=[DCNS("identifier.url"), DCNS("rights.license")]
- )
+ insert_tags(
+ doc,
+ re.compile(r"(?<=\s\w)\s+"),
+ "nbsp",
+ exclude=[DCNS("identifier.url"), DCNS("rights.license")]
+ )
def fix_tables(doc):
def mark_subauthors(doc):
- root_author = ', '.join(elem.text for elem in doc.findall('./' + RDFNS('RDF') + '//' + DCNS('creator_parsed')))
+ root_author = ', '.join(
+ elem.text
+ for elem in doc.findall(
+ './' + RDFNS('RDF') + '//' + DCNS('creator_parsed')
+ )
+ )
last_author = None
# jeśli autor jest inny niż autor całości i niż poprzedni autor
# to wstawiamy jakiś znacznik w rdf?
for subutwor in doc.xpath('/utwor/utwor'):
- author = ', '.join(elem.text for elem in subutwor.findall('.//' + DCNS('creator_parsed')))
+ author = ', '.join(
+ elem.text
+ for elem in subutwor.findall('.//' + DCNS('creator_parsed'))
+ )
if author not in (last_author, root_author):
- subutwor.find('.//' + RDFNS('RDF')).append(etree.Element('use_subauthor'))
+ subutwor.find('.//' + RDFNS('RDF')).append(
+ etree.Element('use_subauthor')
+ )
last_author = author
def move_motifs_inside(doc):
""" moves motifs to be into block elements """
for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|'
- '//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'):
+ '//dramat_wierszowany_l|//dramat_wierszowany_lp|'
+ '//dramat_wspolczesny'):
for motif in master.xpath('motyw'):
for sib in motif.itersiblings():
- if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia',
- 'begin', 'end', 'motyw', 'extra', 'uwaga'):
+ if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk',
+ 'separator_linia', 'begin', 'end',
+ 'motyw', 'extra', 'uwaga'):
# motif shouldn't have a tail - it would be untagged text
motif.tail = None
motif.getparent().remove(motif)
def hack_motifs(doc):
- """ dirty hack for the marginpar-creates-orphans LaTeX problem
+ """
+ Dirty hack for the marginpar-creates-orphans LaTeX problem
see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304
- moves motifs in stanzas from first verse to second
- and from next to last to last, then inserts negative vspace before them
+ Moves motifs in stanzas from first verse to second and from next
+ to last to last, then inserts negative vspace before them.
"""
for motif in doc.findall('//strofa//motyw'):
# find relevant verse-level tag
verse, stanza = motif, motif.getparent()
while stanza is not None and stanza.tag != 'strofa':
verse, stanza = stanza, stanza.getparent()
- breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True))
+ breaks_before = sum(
+ 1 for i in verse.itersiblings('br', preceding=True)
+ )
breaks_after = sum(1 for i in verse.itersiblings('br'))
if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1:
move_by = 1
Finds all dc:creator and dc.contributor.translator tags
and adds *_parsed versions with forenames first.
"""
- for person in doc.xpath("|".join('//dc:' + tag for tag in ('creator', 'contributor.translator')),
- namespaces={'dc': str(DCNS)})[::-1]:
+ for person in doc.xpath(
+ "|".join('//dc:' + tag for tag in (
+ 'creator', 'contributor.translator'
+ )),
+ namespaces={'dc': str(DCNS)})[::-1]:
if not person.text:
continue
p = Person.from_text(person.text)
def package_available(package, args='', verbose=False):
- """ check if a verion of a latex package accepting given args is available """
+ """
+ Check if a verion of a latex package accepting given args
+ is available.
+ """
tempdir = mkdtemp('-wl2pdf-test')
fpath = os.path.join(tempdir, 'test.tex')
f = open(fpath, 'w')
if verbose:
p = call(['xelatex', '-output-directory', tempdir, fpath])
else:
- p = call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
+ p = call(
+ ['xelatex', '-interaction=batchmode', '-output-directory',
+ tempdir, fpath],
+ stdout=PIPE, stderr=PIPE
+ )
shutil.rmtree(tempdir)
return p == 0
def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
- cover=None, flags=None, customizations=None, ilustr_path='', latex_dir=False):
+ cover=None, flags=None, customizations=None, ilustr_path='',
+ latex_dir=False):
""" produces a PDF file with XeLaTeX
wldoc: a WLDocument
morefloats (old/new/none): force specific morefloats
cover: a cover.Cover factory or True for default
flags: less-advertising,
- customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class)
+ customizations: user requested customizations regarding various
+ formatting parameters (passed to wl LaTeX class)
"""
# Parse XSLT
shutil.copy(logo, os.path.join(temp, fname))
ins.set('src', fname)
root.insert(0, ins)
-
+
if book_info.sponsor_note:
root.set("sponsor-note", book_info.sponsor_note)
if verbose:
p = call(['xelatex', tex_path])
else:
- p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
+ p = call(
+ ['xelatex', '-interaction=batchmode', tex_path],
+ stdout=PIPE, stderr=PIPE
+ )
if p:
raise ParseError("Error parsing .tex file")
if cwd is not None:
os.chdir(cwd)
- output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
+ output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf',
+ delete=False)
pdf_path = os.path.join(temp, 'doc.pdf')
shutil.move(pdf_path, output_file.name)
shutil.rmtree(temp)
def load_including_children(wldoc=None, provider=None, uri=None):
""" Makes one big xml file with children inserted at end.
-
+
Either wldoc or provider and URI must be provided.
"""
text = etree.tostring(wldoc.edoc, encoding='unicode')
provider = wldoc.provider
else:
- raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
+ raise ValueError(
+ 'Neither a WLDocument, nor provider and URI were provided.'
+ )
text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
- document = WLDocument.from_bytes(text.encode('utf-8'), parse_dublincore=True, provider=provider)
+ document = WLDocument.from_bytes(text.encode('utf-8'),
+ parse_dublincore=True, provider=provider)
document.swap_endlines()
for child_uri in document.book_info.parts:
class WLPictureURI(WLURI):
- _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/(?P<slug>[-a-z0-9]+)/?$')
+ _re_wl_uri = re.compile(
+ 'http://wolnelektury.pl/katalog/obraz/(?P<slug>[-a-z0-9]+)/?$'
+ )
@classmethod
def from_slug(cls, slug):
Field(DCNS('language'), 'language', required=False),
Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
- Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, required=False),
- Field(DCNS('subject.style'), 'styles', salias='style', multiple=True, required=False),
+ Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
+ required=False),
+ Field(DCNS('subject.style'), 'styles', salias='style', multiple=True,
+ required=False),
Field(DCNS('format.dimensions'), 'dimensions', required=False),
Field(DCNS('format.checksum.sha1'), 'sha1', required=True),
Field(DCNS('description.medium'), 'medium', required=False),
- Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
+ Field(DCNS('description.dimensions'), 'original_dimensions',
+ required=False),
Field(DCNS('format'), 'mime_type', required=False),
- Field(DCNS('identifier.url'), 'url', WLPictureURI, strict=as_wlpictureuri_strict)
+ Field(DCNS('identifier.url'), 'url', WLPictureURI,
+ strict=as_wlpictureuri_strict)
)
MIME = ['image/gif', 'image/jpeg', 'image/png',
'application/x-shockwave-flash', 'image/psd', 'image/bmp',
'image/tiff', 'image/tiff', 'application/octet-stream',
- 'image/jp2', 'application/octet-stream', 'application/octet-stream',
- 'application/x-shockwave-flash', 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm']
+ 'image/jp2', 'application/octet-stream',
+ 'application/octet-stream', 'application/x-shockwave-flash',
+ 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm']
def __init__(self, dir_):
super(ImageStore, self).__init__()
try:
i = self.MIME.index(mime_type)
except ValueError:
- err = ValueError("Picture %s has unknown mime type: %s" % (slug, mime_type))
+ err = ValueError(
+ "Picture %s has unknown mime type: %s"
+ % (slug, mime_type)
+ )
err.slug = slug
err.mime_type = mime_type
raise err
dc_path = './/' + RDFNS('RDF')
if root_elem.tag != 'picture':
- raise ValidationError("Invalid root element. Found '%s', should be 'picture'" % root_elem.tag)
+ raise ValidationError(
+ "Invalid root element. Found '%s', should be 'picture'"
+ % root_elem.tag
+ )
if parse_dublincore:
self.rdf_elem = root_elem.find(dc_path)
if self.rdf_elem is None:
- raise NoDublinCore('Document has no DublinCore - which is required.')
+ raise NoDublinCore(
+ "Document has no DublinCore - which is required."
+ )
self.picture_info = PictureInfo.from_element(self.rdf_elem)
else:
parser = etree.XMLParser(remove_blank_text=False)
tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser)
- me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
+ me = cls(tree, parse_dublincore=parse_dublincore,
+ image_store=image_store)
me.load_frame_info()
return me
except (ExpatError, XMLSyntaxError, XSLTApplyError) as e:
@property
def mime_type(self):
if self.picture_info is None:
- raise ValueError('DC is not loaded, hence we don\'t know the image type')
+ raise ValueError(
+ "DC is not loaded, hence we don't know the image type."
+ )
return self.picture_info.mime_type
@property
def partiter(self):
"""
- Iterates the parts of this picture and returns them and their metadata
+ Iterates the parts of this picture and returns them
+ and their metadata.
"""
# omg no support for //sem[(@type='theme') or (@type='object')] ?
for part in list(self.edoc.iterfind("//sem[@type='theme']")) +\
return x.decode('utf-8')
else:
return x
- pd['object'] = part.attrib['type'] == 'object' and want_unicode(part.attrib.get('object', u'')) or None
- pd['themes'] = part.attrib['type'] == 'theme' and [part.attrib.get('theme', u'')] or []
+ pd['object'] = (
+ part.attrib['type'] == 'object'
+ and want_unicode(part.attrib.get('object', u''))
+ or None
+ )
+ pd['themes'] = (
+ part.attrib['type'] == 'theme'
+ and [part.attrib.get('theme', u'')]
+ or []
+ )
yield pd
def load_frame_info(self):
k = self.edoc.find("//sem[@object='kadr']")
-
+
if k is not None:
clip = self.get_sem_coords(k)
self.frame = clip
possible flags: raw-text,
"""
# Parse XSLT
- style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt')
+ style_filename = os.path.join(os.path.dirname(__file__),
+ 'xslt/book2txt.xslt')
style = etree.parse(style_filename)
document = copy.deepcopy(wldoc)
parsed_dc = document.book_info
description = parsed_dc.description
url = document.book_info.url
-
+
license_description = parsed_dc.license_description
license = parsed_dc.license
if license:
- license_description = u"Ten utwór jest udostępniony na licencji %s: \n%s" % (
- license_description, license)
+ license_description = (
+ u"Ten utwór jest udostępniony na licencji %s: \n%s" % (
+ license_description, license
+ )
+ )
else:
- license_description = u"Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się " \
- u"w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, " \
- u"publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi " \
- u"materiałami (przypisy, motywy literackie etc.), które podlegają prawu " \
- u"autorskiemu, to te dodatkowe materiały udostępnione są na licencji " \
- u"Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL " \
- u"(http://creativecommons.org/licenses/by-sa/3.0/)"
+ license_description = (
+ "Ten utwór nie jest objęty majątkowym prawem autorskim "
+ "i znajduje się w domenie publicznej, co oznacza że "
+ "możesz go swobodnie wykorzystywać, publikować "
+ "i rozpowszechniać. Jeśli utwór opatrzony jest "
+ "dodatkowymi materiałami (przypisy, motywy literackie "
+ "etc.), które podlegają prawu autorskiemu, to te "
+ "dodatkowe materiały udostępnione są na licencji "
+ "Creative Commons Uznanie Autorstwa – Na Tych Samych "
+ "Warunkach 3.0 PL "
+ "(http://creativecommons.org/licenses/by-sa/3.0/)"
+ )
source = parsed_dc.source_name
if source:
source = "\n\nTekst opracowany na podstawie: " + source
else:
source = ''
-
- contributors = ', '.join(person.readable() for person in
- sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p)))
+
+ contributors = ', '.join(
+ person.readable()
+ for person in sorted(set(
+ p for p in (
+ parsed_dc.technical_editors + parsed_dc.editors
+ ) if p))
+ )
if contributors:
- contributors = "\n\nOpracowanie redakcyjne i przypisy: %s." % contributors
+ contributors = (
+ "\n\nOpracowanie redakcyjne i przypisy: %s."
+ % contributors
+ )
funders = ', '.join(parsed_dc.funders)
if funders:
funders = u"\n\nPublikację wsparli i wsparły: %s." % funders
else:
isbn = ''
else:
- description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).'
+ description = ("Publikacja zrealizowana w ramach projektu "
+ "Wolne Lektury (http://wolnelektury.pl).")
url = '*' * 10
license_description = ""
source = ""