import shutil
import urllib
+from wolnelektury.utils import makedirs
+
class UnicodeException(Exception):
def __str__(self):
"""Save file to a path. Create directories, if necessary."""
dirname = os.path.dirname(os.path.abspath(path))
- if not os.path.isdir(dirname):
- os.makedirs(dirname)
+ makedirs(dirname)
shutil.copy(self.get_filename(), path)
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from collections import namedtuple
import os.path
import optparse
Subclass it for any format you want to convert to.
"""
- format_name = None # Set format name, like "PDF".
- ext = None # Set file extension, like "pdf".
- uses_cover = False # Can it add a cover?
- cover_optional = True # Only relevant if uses_cover
- uses_provider = False # Does it need a DocProvider?
- transform = None # Transform method. Uses WLDocument.as_{ext} by default.
- parser_options = [] # List of Option objects for additional parser args.
- transform_options = [] # List of Option objects for additional transform args.
- transform_flags = [] # List of Option objects for supported transform flags.
-
+ format_name = None # Set format name, like "PDF".
+ ext = None # Set file extension, like "pdf".
+ uses_cover = False # Can it add a cover?
+ cover_optional = True # Only relevant if uses_cover
+ uses_provider = False # Does it need a DocProvider?
+ transform = None # Transform method. Uses WLDocument.as_{ext} by default.
+ parser_options = [] # List of Option objects for additional parser args.
+ transform_options = [] # List of Option objects for additional transform args.
+ transform_flags = [] # List of Option objects for supported transform flags.
@classmethod
def run(cls):
parser = optparse.OptionParser(usage=usage)
- parser.add_option('-v', '--verbose',
- action='store_true', dest='verbose', default=False,
- help='print status messages to stdout')
- parser.add_option('-d', '--make-dir',
- action='store_true', dest='make_dir', default=False,
- help='create a directory for author and put the output file in it')
- parser.add_option('-o', '--output-file',
- dest='output_file', metavar='FILE',
- help='specifies the output file')
- parser.add_option('-O', '--output-dir',
- dest='output_dir', metavar='DIR',
- help='specifies the directory for output')
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+ parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False,
+ help='create a directory for author and put the output file in it')
+ parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE',
+ help='specifies the output file')
+ parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR',
+ help='specifies the directory for output')
if cls.uses_cover:
if cls.cover_optional:
- parser.add_option('-c', '--with-cover',
- action='store_true', dest='with_cover', default=False,
- help='create default cover')
- parser.add_option('-C', '--image-cache',
- dest='image_cache', metavar='URL',
- help='prefix for image download cache' +
- (' (implies --with-cover)' if cls.cover_optional else ''))
+ parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False,
+ help='create default cover')
+ parser.add_option('-C', '--image-cache', dest='image_cache', metavar='URL',
+ help='prefix for image download cache' +
+ (' (implies --with-cover)' if cls.cover_optional else ''))
for option in cls.parser_options + cls.transform_options + cls.transform_flags:
option.add(parser)
if len(input_filenames) < 1:
parser.print_help()
- return(1)
+ return 1
# Prepare additional args for parser.
parser_args = {}
for option in cls.transform_options:
transform_args[option.name()] = option.value(options)
# Add flags to transform_args, if any.
- transform_flags = [flag.name() for flag in cls.transform_flags
- if flag.value(options)]
+ transform_flags = [flag.name() for flag in cls.transform_flags if flag.value(options)]
if transform_flags:
transform_args['flags'] = transform_flags
if options.verbose:
elif not cls.cover_optional or options.with_cover:
transform_args['cover'] = DefaultEbookCover
-
# Do some real work
try:
for main_input in input_filenames:
transform = getattr(WLDocument, 'as_%s' % cls.ext)
output = transform(doc, **transform_args)
- doc.save_output_file(output,
- output_file, options.output_dir, options.make_dir, cls.ext)
+ doc.save_output_file(output, output_file, options.output_dir, options.make_dir, cls.ext)
except ParseError, e:
print '%(file)s:%(name)s:%(message)s' % {
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
import re
-from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance
+from PIL import Image, ImageFont, ImageDraw, ImageFilter
from StringIO import StringIO
from librarian import get_resource, OutputFile, URLOpener
author_font = ImageFont.truetype(
self.author_font_ttf, metr.author_font_size)
- tbox.text(self.pretty_author(), self.author_color, author_font,
- metr.author_lineskip, self.author_shadow)
+ tbox.text(self.pretty_author(), self.author_color, author_font, metr.author_lineskip, self.author_shadow)
text_img = tbox.image()
img.paste(text_img, (metr.author_margin_left, top), text_img)
)
title_font = ImageFont.truetype(
self.title_font_ttf, metr.title_font_size)
- tbox.text(self.pretty_title(), self.title_color, title_font,
- metr.title_lineskip, self.title_shadow)
+ tbox.text(self.pretty_title(), self.title_color, title_font, metr.title_lineskip, self.title_shadow)
text_img = tbox.image()
img.paste(text_img, (metr.title_margin_left, top), text_img)
font=author_font,
line_height=metr.author_lineskip,
color=self.author_color,
- shadow_color=self.author_shadow,
- )
+ shadow_color=self.author_shadow)
box.skip(metr.box_above_line)
box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height),
- fill=self.author_color, width=metr.box_line_width)
+ fill=self.author_color, width=metr.box_line_width)
box.skip(metr.box_below_line)
# Write title.
line_height=metr.title_lineskip,
font=title_font,
color=self.title_color,
- shadow_color=self.title_shadow,
- )
+ shadow_color=self.title_shadow)
box_img = box.image()
else: # Middle.
box_top = (metr.height - box_img.size[1]) / 2
- box_left = metr.bar_width + (metr.width - metr.bar_width -
- box_img.size[0]) / 2
+ box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) / 2
# Draw the white box.
- ImageDraw.Draw(img).rectangle((box_left, box_top,
- box_left + box_img.size[0], box_top + box_img.size[1]),
- fill='#fff')
+ ImageDraw.Draw(img).rectangle(
+ (box_left, box_top, box_left + box_img.size[0], box_top + box_img.size[1]), fill='#fff')
# Paste the contents into the white box.
img.paste(box_img, (box_left, box_top), box_img)
return img
gradient_mask = Image.new('L', (metr.width - metr.bar_width, metr.gradient_height))
draw = ImageDraw.Draw(gradient_mask)
for line in range(0, metr.gradient_height):
- draw.line((0, line, metr.width - metr.bar_width, line), fill=int(255 * self.gradient_opacity * line / metr.gradient_height))
- img.paste(gradient,
- (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask)
+ draw.line(
+ (0, line, metr.width - metr.bar_width, line),
+ fill=int(255 * self.gradient_opacity * line / metr.gradient_height))
+ img.paste(gradient, (metr.bar_width, metr.height - metr.gradient_height), mask=gradient_mask)
cursor = metr.width - metr.gradient_logo_margin_right
- logo_top = metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2
+ logo_top = metr.height - metr.gradient_height / 2 - metr.gradient_logo_height / 2
for logo_path in self.gradient_logos[::-1]:
logo = Image.open(get_resource(logo_path))
logo = logo.resize(
DefaultEbookCover = LogoWLCover
-
from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
XMLNS, WLURI, WLNS, PLMETNS)
-import lxml.etree as etree # ElementTree API using libxml2
+import lxml.etree as etree # ElementTree API using libxml2
from lxml.etree import XMLSyntaxError
class TextPlus(unicode):
pass
+
class DatePlus(date):
pass
@classmethod
def from_text(cls, text):
- parts = [ token.strip() for token in text.split(',') ]
+ parts = [token.strip() for token in text.split(',')]
if len(parts) == 1:
surname = parts[0]
names = []
if len(parts[1]) == 0:
# there is no non-whitespace data after the comma
raise ValueError("Found a comma, but no names given: \"%s\" -> %r." % (text, parts))
- names = [ name for name in parts[1].split() if len(name) ] # all non-whitespace tokens
+ names = [name for name in parts[1].split() if len(name)] # all non-whitespace tokens
return cls(surname, *names)
def readable(self):
def __repr__(self):
return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names)
+
def as_date(text):
"""Dates for digitization of pictures. It seems we need the following:
ranges: '1350-1450',
"""
try:
# check out the "N. poł X w." syntax
- if isinstance(text, str): text = text.decode("utf-8")
+ if isinstance(text, str):
+ text = text.decode("utf-8")
century_format = u"(?:([12]) *poł[.]? +)?([MCDXVI]+) *w[.,]*(?: *l[.]? *([0-9]+))?"
vague_format = u"(?:po *|ok. *)?([0-9]{4})(-[0-9]{2}-[0-9]{2})?"
except ValueError, e:
raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
+
def as_person(text):
return Person.from_text(text)
+
def as_unicode(text):
if isinstance(text, unicode):
return text
else:
return TextPlus(text.decode('utf-8'))
+
def as_wluri_strict(text):
return WLURI.strict(text)
+
class Field(object):
def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs):
self.uri = uri
self.multiple = multiple
self.salias = salias
- self.required = kwargs.get('required', True) and not kwargs.has_key('default')
+ self.required = kwargs.get('required', True) and 'default' not in kwargs
self.default = kwargs.get('default', [] if multiple else [None])
def validate_value(self, val, strict=False):
def validate(self, fdict, fallbacks=None, strict=False):
if fallbacks is None:
fallbacks = {}
- if not fdict.has_key(self.uri):
+ if self.uri not in fdict:
if not self.required:
# Accept single value for single fields and saliases.
if self.name in fallbacks:
class DCInfo(type):
- def __new__(meta, classname, bases, class_dict):
+ def __new__(mcs, classname, bases, class_dict):
fields = list(class_dict['FIELDS'])
for base in bases[::-1]:
fields.insert(0, field)
class_dict['FIELDS'] = tuple(fields)
- return super(DCInfo, meta).__new__(meta, classname, bases, class_dict)
+ return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict)
class WorkInfo(object):
__metaclass__ = DCInfo
FIELDS = (
- Field( DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
- Field( DCNS('title'), 'title'),
- Field( DCNS('type'), 'type', required=False, multiple=True),
-
- Field( DCNS('contributor.editor'), 'editors', \
- as_person, salias='editor', multiple=True, default=[]),
- Field( DCNS('contributor.technical_editor'), 'technical_editors',
- as_person, salias='technical_editor', multiple=True, default=[]),
- Field( DCNS('contributor.funding'), 'funders',
- salias='funder', multiple=True, default=[]),
- Field( DCNS('contributor.thanks'), 'thanks', required=False),
-
- Field( DCNS('date'), 'created_at'),
- Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
- Field( DCNS('publisher'), 'publisher'),
-
- Field( DCNS('language'), 'language'),
- Field( DCNS('description'), 'description', required=False),
-
- Field( DCNS('source'), 'source_name', required=False),
- Field( DCNS('source.URL'), 'source_url', required=False),
- Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
- Field( DCNS('rights.license'), 'license', required=False),
- Field( DCNS('rights'), 'license_description'),
-
- Field( PLMETNS('digitisationSponsor'), 'sponsors', multiple=True, default=[]),
- Field( WLNS('digitisationSponsorNote'), 'sponsor_note', required=False),
- Field( WLNS('developmentStage'), 'stage', required=False),
+ Field(DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
+ Field(DCNS('title'), 'title'),
+ Field(DCNS('type'), 'type', required=False, multiple=True),
+
+ Field(DCNS('contributor.editor'), 'editors',
+ as_person, salias='editor', multiple=True, default=[]),
+ Field(DCNS('contributor.technical_editor'), 'technical_editors',
+ as_person, salias='technical_editor', multiple=True, default=[]),
+ Field(DCNS('contributor.funding'), 'funders', salias='funder', multiple=True, default=[]),
+ Field(DCNS('contributor.thanks'), 'thanks', required=False),
+
+ Field(DCNS('date'), 'created_at'),
+ Field(DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
+ Field(DCNS('publisher'), 'publisher'),
+
+ Field(DCNS('language'), 'language'),
+ Field(DCNS('description'), 'description', required=False),
+
+ Field(DCNS('source'), 'source_name', required=False),
+ Field(DCNS('source.URL'), 'source_url', required=False),
+ Field(DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
+ Field(DCNS('rights.license'), 'license', required=False),
+ Field(DCNS('rights'), 'license_description'),
+
+ Field(PLMETNS('digitisationSponsor'), 'sponsors', multiple=True, default=[]),
+ Field(WLNS('digitisationSponsorNote'), 'sponsor_note', required=False),
+ Field(WLNS('developmentStage'), 'stage', required=False),
)
@classmethod
self.fmap = {}
for field in self.FIELDS:
- value = field.validate(dc_fields, fallbacks=fallbacks,
- strict=strict)
+ value = field.validate(dc_fields, fallbacks=fallbacks, strict=strict)
setattr(self, 'prop_' + field.name, value)
self.fmap[field.name] = field
- if field.salias: self.fmap[field.salias] = field
+ if field.salias:
+ self.fmap[field.salias] = field
def __getattribute__(self, name):
try:
value = object.__getattribute__(self, 'prop_'+field.name)
if field.name == name:
return value
- else: # singular alias
+ else: # singular alias
if not field.multiple:
raise "OUCH!! for field %s" % name
field = object.__getattribute__(self, 'fmap')[name]
if field.name == name:
object.__setattr__(self, 'prop_'+field.name, newvalue)
- else: # singular alias
+ else: # singular alias
if not field.multiple:
raise "OUCH! while setting field %s" % name
"""Update using field_dict. Verify correctness, but don't check if all
required fields are present."""
for field in self.FIELDS:
- if field_dict.has_key(field.name):
+ if field.name in field_dict:
setattr(self, field.name, field_dict[field.name])
- def to_etree(self, parent = None):
+ def to_etree(self, parent=None):
"""XML representation of this object."""
- #etree._namespace_map[str(self.RDF)] = 'rdf'
- #etree._namespace_map[str(self.DC)] = 'dc'
+ # etree._namespace_map[str(self.RDF)] = 'rdf'
+ # etree._namespace_map[str(self.DC)] = 'dc'
if parent is None:
root = etree.Element(RDFNS('RDF'))
v = getattr(self, field.name, None)
if v is not None:
if field.multiple:
- if len(v) == 0: continue
+ if len(v) == 0:
+ continue
for x in v:
e = etree.Element(field.uri)
if x is not None:
return root
def serialize(self):
- rdf = {}
- rdf['about'] = { 'uri': RDFNS('about'), 'value': self.about }
+ rdf = {'about': {'uri': RDFNS('about'), 'value': self.about}}
dc = {}
for field in self.FIELDS:
v = getattr(self, field.name, None)
if v is not None:
if field.multiple:
- if len(v) == 0: continue
- v = [ unicode(x) for x in v if x is not None ]
+ if len(v) == 0:
+ continue
+ v = [unicode(x) for x in v if x is not None]
else:
v = unicode(v)
if v is not None:
if field.multiple:
- if len(v) == 0: continue
- v = [ unicode(x) for x in v if x is not None ]
+ if len(v) == 0:
+ continue
+ v = [unicode(x) for x in v if x is not None]
else:
v = unicode(v)
result[field.name] = v
if field.salias:
v = getattr(self, field.salias)
- if v is not None: result[field.salias] = unicode(v)
+ if v is not None:
+ result[field.salias] = unicode(v)
return result
class BookInfo(WorkInfo):
FIELDS = (
- Field( DCNS('audience'), 'audiences', salias='audience', multiple=True,
- required=False),
-
- Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True,
- required=False),
- Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True,
- required=False),
- Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
- required=False),
+ Field(DCNS('audience'), 'audiences', salias='audience', multiple=True, required=False),
+
+ Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, required=False),
+ Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True, required=False),
+ Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, required=False),
- Field( DCNS('contributor.translator'), 'translators', \
- as_person, salias='translator', multiple=True, default=[]),
- Field( DCNS('relation.hasPart'), 'parts',
- WLURI, strict=as_wluri_strict, multiple=True, required=False),
- Field( DCNS('relation.isVariantOf'), 'variant_of',
- WLURI, strict=as_wluri_strict, required=False),
-
- Field( DCNS('relation.coverImage.url'), 'cover_url', required=False),
- Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
- Field( DCNS('relation.coverImage.source'), 'cover_source', required=False),
+ Field(DCNS('contributor.translator'), 'translators',
+ as_person, salias='translator', multiple=True, default=[]),
+ Field(DCNS('relation.hasPart'), 'parts', WLURI, strict=as_wluri_strict, multiple=True, required=False),
+ Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI, strict=as_wluri_strict, required=False),
+
+ Field(DCNS('relation.coverImage.url'), 'cover_url', required=False),
+ Field(DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
+ Field(DCNS('relation.coverImage.source'), 'cover_source', required=False),
# WLCover-specific.
- Field( WLNS('coverBarColor'), 'cover_bar_color', required=False),
- Field( WLNS('coverBoxPosition'), 'cover_box_position', required=False),
+ Field(WLNS('coverBarColor'), 'cover_bar_color', required=False),
+ Field(WLNS('coverBoxPosition'), 'cover_box_position', required=False),
)
+# -*- coding: utf-8 -*-
import os
import shutil
from subprocess import call, PIPE
from tempfile import mkdtemp
from librarian import get_resource
-from . import DataEmbed, create_embed, downgrades_to, converts_to
+from . import DataEmbed, create_embed, downgrades_to
+
class LaTeX(DataEmbed):
@downgrades_to('image/png')
f.write((tmpl % {'code': self.data}).encode('utf-8'))
call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim',
- os.path.join(tempdir, 'doc.png')])
+ os.path.join(tempdir, 'doc.png')])
pngdata = open(os.path.join(tempdir, 'doc.png')).read()
shutil.rmtree(tempdir)
return create_embed('image/png', data=pngdata)
+# -*- coding: utf-8 -*-
from lxml import etree
from librarian import get_resource
-from . import TreeEmbed, create_embed, downgrades_to, converts_to
+from . import TreeEmbed, create_embed, downgrades_to
+
class MathML(TreeEmbed):
@downgrades_to('application/x-latex')
'"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
))
- if not flags or not 'without-fonts' in flags:
+ if not flags or 'without-fonts' not in flags:
# strip fonts
tmpdir = mkdtemp('-librarian-epub')
try:
def sectionify(tree):
"""Finds section headers and adds a tree of _section tags."""
- sections = ['naglowek_czesc',
- 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena',
- 'naglowek_podrozdzial']
- section_level = dict((v,k) for (k,v) in enumerate(sections))
+ sections = [
+ 'naglowek_czesc',
+ 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena',
+ 'naglowek_podrozdzial']
+ section_level = dict((v, k) for (k, v) in enumerate(sections))
# We can assume there are just subelements an no text at section level.
for level, section_name in reversed(list(enumerate(sections))):
from librarian.dcparser import Person
from librarian import get_resource
+
def _register_function(f):
""" Register extension function with lxml """
ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
def reg_substitute_entities():
- ENTITY_SUBSTITUTIONS = [
+ entity_substitutions = [
(u'---', u'—'),
(u'--', u'–'),
(u'...', u'…'),
"""XPath extension function converting all entites in passed text."""
if isinstance(text, list):
text = ''.join(text)
- for entity, substitutution in ENTITY_SUBSTITUTIONS:
+ for entity, substitutution in entity_substitutions:
text = text.replace(entity, substitutution)
return text
text = ''.join(text)
return re.sub(r'[^a-zA-Z]', '', text).strip()
_register_function(texcommand)
-
+
+
def reg_lang_code_3to2():
- def lang_code_3to2(context, text):
- """Convert 3-letter language code to 2-letter code"""
- result = ''
- text = ''.join(text)
- with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
- for line in f:
- list = line.strip().split('|')
- if list[0] == text:
- result=list[2]
- if result == '':
- return text
- else:
- return result
- _register_function(lang_code_3to2)
+ def lang_code_3to2(context, text):
+ """Convert 3-letter language code to 2-letter code"""
+ result = ''
+ text = ''.join(text)
+ with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
+ for line in f:
+ list = line.strip().split('|')
+ if list[0] == text:
+ result = list[2]
+ if result == '':
+ return text
+ else:
+ return result
+ _register_function(lang_code_3to2)
def mathml_latex(context, trees):
text = text.replace(u'\u2062', '')
return text
+
def reg_mathml_latex():
_register_function(mathml_latex)
+
def reg_mathml_epub(zipf):
from librarian.embeds.mathml import MathML
+
def mathml(context, trees):
data = MathML(trees[0]).to_latex().to_png().data
name = "math%d.png" % mathml.count
return name
mathml.count = 0
_register_function(mathml)
-
'partial': 'xslt/wl2html_partial.xslt'
}
+
def get_stylesheet(name):
return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
+
def html_has_content(text):
return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
+
def transform(wldoc, stylesheet='legacy', options=None, flags=None):
"""Transforms the WL document to XHTML.
if not options:
options = {}
result = document.transform(style, **options)
- del document # no longer needed large object :)
+ del document # no longer needed large object :)
if html_has_content(result):
add_anchors(result.getroot())
add_table_of_themes(result.getroot())
add_table_of_contents(result.getroot())
- return OutputFile.from_string(etree.tostring(result, method='html',
- xml_declaration=False, pretty_print=True, encoding='utf-8'))
+ return OutputFile.from_string(etree.tostring(
+ result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8'))
else:
return None
except KeyError:
except (XMLSyntaxError, XSLTApplyError), e:
raise ParseError(e)
+
class Fragment(object):
def __init__(self, id, themes):
super(Fragment, self).__init__()
result = []
for event, element in self.closed_events():
if event == 'start':
- result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
+ result.append(u'<%s %s>' % (
+ element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
if element.text:
result.append(element.text)
elif event == 'end':
for event, element in etree.iterparse(buf, events=('start', 'end')):
# Process begin and end elements
if element.get('class', '') in ('theme-begin', 'theme-end'):
- if not event == 'end': continue # Process elements only once, on end event
+ if not event == 'end':
+ continue # Process elements only once, on end event
# Open new fragment
if element.get('class', '') == 'theme-begin':
for fragment_id in open_fragments:
open_fragments[fragment_id].append('text', element.tail)
-
# Process all elements except begin and end
else:
# Omit annotation tags
def add_anchors(root):
counter = 1
for element in root.iterdescendants():
- if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
- or e.get('id') == 'nota_red'
- or e.tag == 'blockquote'):
+ def f(e):
+ return e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication') or \
+ e.get('id') == 'nota_red' or e.tag == 'blockquote'
+ if any_ancestor(element, f):
continue
if element.tag == 'p' and 'verse' in element.get('class', ''):
counter = 1
for element in root.iterdescendants():
if element.tag in ('h2', 'h3'):
- if any_ancestor(element, lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)):
+ if any_ancestor(element,
+ lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)):
continue
element_text = raw_printable_text(element)
if len(subsections):
subsection_list = etree.SubElement(section_element, 'ol')
- for n, subsection, text, _ in subsections:
+ for n1, subsection, subtext, _ in subsections:
subsection_element = etree.SubElement(subsection_list, 'li')
- add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)
+ add_anchor(subsection_element, "s%d" % n1, with_target=False, link_text=subtext)
root.insert(0, toc)
try:
from sortify import sortify
except ImportError:
- sortify = lambda x: x
+ def sortify(x):
+ return x
book_themes = {}
for fragment in root.findall('.//a[@class="theme-begin"]'):
qualifiers = []
yield anchor, fn_type, qualifiers, text_str, html_str
-
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
import os
-from librarian import pdf, epub, mobi, DirDocProvider, ParseError, cover
+from librarian import pdf, epub, mobi, DirDocProvider, ParseError
from librarian.parser import WLDocument
+from wolnelektury.utils import makedirs
+
class Packager(object):
cover = None
slug, ext = os.path.splitext(fname)
if output_dir != '':
- try:
- os.makedirs(output_dir)
- except:
- pass
+ makedirs(output_dir)
outfile = os.path.join(output_dir, slug + '.' + cls.ext)
if os.path.exists(outfile) and not overwrite:
return
doc = WLDocument.from_file(main_input, provider=provider)
- output_file = cls.transform(doc,
- cover=cls.cover, flags=cls.flags)
+ output_file = cls.transform(doc, cover=cls.cover, flags=cls.flags)
doc.save_output_file(output_file, output_path=outfile)
-
@classmethod
def prepare(cls, input_filenames, output_dir='', verbose=False, overwrite=False):
try:
converter = epub
ext = 'epub'
+
class MobiPackager(Packager):
converter = mobi
ext = 'mobi'
+
class PdfPackager(Packager):
converter = pdf
ext = 'pdf'
import re
from StringIO import StringIO
+
class WLDocument(object):
LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
provider = None
def __init__(self, edoc, parse_dublincore=True, provider=None,
- strict=False, meta_fallbacks=None):
+ strict=False, meta_fallbacks=None):
self.edoc = edoc
self.provider = provider
if self.book_info is None:
raise NoDublinCore('No Dublin Core in document.')
for part_uri in self.book_info.parts:
- yield self.from_file(self.provider.by_uri(part_uri),
- provider=self.provider)
+ yield self.from_file(self.provider.by_uri(part_uri), provider=self.provider)
def chunk(self, path):
# convert the path to XPath
parts.append(part)
else:
tag, n = match.groups()
- parts.append("*[%d][name() = '%s']" % (int(n)+1, tag) )
+ parts.append("*[%d][name() = '%s']" % (int(n)+1, tag))
if parts[0] == '.':
parts[0] = ''
def update_dc(self):
if self.book_info:
parent = self.rdf_elem.getparent()
- parent.replace( self.rdf_elem, self.book_info.to_etree(parent) )
+ parent.replace(self.rdf_elem, self.book_info.to_etree(parent))
def serialize(self):
self.update_dc()
try:
xpath = self.path_to_xpath(key)
node = self.edoc.xpath(xpath)[0]
- repl = etree.fromstring(u"<%s>%s</%s>" %(node.tag, data, node.tag) )
+ repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
node.getparent().replace(node, repl)
except Exception, e:
- unmerged.append( repr( (key, xpath, e) ) )
+ unmerged.append(repr((key, xpath, e)))
return unmerged
""" deletes forbidden tags from nota_red """
for node in self.edoc.xpath('|'.join('//%s//%s' % (note_tag, tag) for tag in
- ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))):
+ ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))):
tail = node.tail
node.clear()
node.tag = 'span'
"""
if self.book_info is None:
raise NoDublinCore('No Dublin Core in document.')
- persons = set(self.book_info.editors +
- self.book_info.technical_editors)
+ persons = set(self.book_info.editors + self.book_info.technical_editors)
for child in self.parts():
persons.update(child.editors())
if None in persons:
cover_class = DefaultEbookCover
return cover_class(self.book_info, *args, **kwargs).output_file()
- def save_output_file(self, output_file, output_path=None,
- output_dir_path=None, make_author_dir=False, ext=None):
+ def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None):
if output_dir_path:
save_path = output_dir_path
if make_author_dir:
- save_path = os.path.join(save_path,
- unicode(self.book_info.author).encode('utf-8'))
- save_path = os.path.join(save_path,
- self.book_info.uri.slug)
+ save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8'))
+ save_path = os.path.join(save_path, self.book_info.uri.slug)
if ext:
save_path += '.%s' % ext
else:
"""
from librarian import packagers, cover
+from wolnelektury.utils import makedirs
+
class GandalfEpub(packagers.EpubPackager):
cover = cover.GandalfCover
+
class GandalfPdf(packagers.PdfPackager):
cover = cover.GandalfCover
+
class BookotekaEpub(packagers.EpubPackager):
cover = cover.BookotekaCover
+
class PrestigioEpub(packagers.EpubPackager):
cover = cover.PrestigioCover
flags = ('less-advertising',)
+
class PrestigioPdf(packagers.PdfPackager):
cover = cover.PrestigioCover
flags = ('less-advertising',)
from librarian import DirDocProvider, ParseError
from librarian.parser import WLDocument
from copy import deepcopy
- import os
import os.path
xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
slug, ext = os.path.splitext(fname)
outfile_dir = os.path.join(output_dir, slug)
- os.makedirs(os.path.join(output_dir, slug))
+ makedirs(os.path.join(output_dir, slug))
doc = WLDocument.from_file(main_input, provider=provider)
info = doc.book_info
cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
outfile = os.path.join(outfile_dir, '1.epub')
outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
- doc.save_output_file(doc.as_epub(),
- output_path=outfile)
- doc.save_output_file(doc.as_epub(doc, sample=25),
- output_path=outfile_sample)
+ doc.save_output_file(doc.as_epub(), output_path=outfile)
+ doc.save_output_file(doc.as_epub(doc, sample=25), output_path=outfile_sample)
outfile = os.path.join(outfile_dir, '1.mobi')
outfile_sample = os.path.join(outfile_dir, '1.sample.mobi')
- doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover),
- output_path=outfile)
+ doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), output_path=outfile)
doc.save_output_file(
- doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
- output_path=outfile_sample)
+ doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
+ output_path=outfile_sample)
except ParseError, e:
print '%(file)s:%(name)s:%(message)s' % {
'file': main_input,
'wl2tex': 'pdf/wl2tex.xslt',
}
-#CUSTOMIZATIONS = [
-# 'nofootnotes',
-# 'nothemes',
-# 'defaultleading',
-# 'onehalfleading',
-# 'doubleleading',
-# 'nowlfont',
-# ]
+# CUSTOMIZATIONS = [
+# 'nofootnotes',
+# 'nothemes',
+# 'defaultleading',
+# 'onehalfleading',
+# 'doubleleading',
+# 'nowlfont',
+# ]
+
def insert_tags(doc, split_re, tagname, exclude=None):
""" inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
- >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>');
- >>> insert_tags(t, re.compile('-'), 'd');
+ >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>')
+ >>> insert_tags(t, re.compile('-'), 'd')
>>> print etree.tostring(t)
<a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
"""
exclude=[DCNS("identifier.url"), DCNS("rights.license")]
)
+
def fix_tables(doc):
for kol in doc.iter(tag='kol'):
if kol.tail is not None:
def move_motifs_inside(doc):
""" moves motifs to be into block elements """
- for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'):
+ for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|'
+ '//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'):
for motif in master.xpath('motyw'):
for sib in motif.itersiblings():
- if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia', 'begin', 'end', 'motyw', 'extra', 'uwaga'):
+ if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia',
+ 'begin', 'end', 'motyw', 'extra', 'uwaga'):
# motif shouldn't have a tail - it would be untagged text
motif.tail = None
motif.getparent().remove(motif)
Finds all dc:creator and dc.contributor.translator tags
and adds *_parsed versions with forenames first.
"""
- for person in doc.xpath("|".join('//dc:'+(tag) for tag in (
- 'creator', 'contributor.translator')),
- namespaces = {'dc': str(DCNS)})[::-1]:
+ for person in doc.xpath("|".join('//dc:' + tag for tag in ('creator', 'contributor.translator')),
+ namespaces={'dc': str(DCNS)})[::-1]:
if not person.text:
continue
p = Person.from_text(person.text)
if book_info.cover_by:
root.set('data-cover-by', book_info.cover_by)
if book_info.cover_source:
- root.set('data-cover-source',
- book_info.cover_source)
+ root.set('data-cover-source', book_info.cover_source)
if flags:
for flag in flags:
root.set('flag-' + flag, 'yes')
with open(os.path.join(temp, 'cover.png'), 'w') as f:
bound_cover.save(f, quality=80)
- del document # no longer needed large object :)
+ del document # no longer needed large object :)
tex_path = os.path.join(temp, 'doc.tex')
fout = open(tex_path, 'w')
text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
- document = WLDocument.from_string(text,
- parse_dublincore=True, provider=provider)
+ document = WLDocument.from_string(text, parse_dublincore=True, provider=provider)
document.swap_endlines()
for child_uri in document.book_info.parts:
-
-from dcparser import (as_person, as_date, Field, WorkInfo, DCNS)
+# -*- coding: utf-8 -*-
+from dcparser import Field, WorkInfo, DCNS
from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
from xml.parsers.expat import ExpatError
from os import path
from functools import *
from operator import *
+
class WLPictureURI(WLURI):
- _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
- '(?P<slug>[-a-z0-9]+)/?$')
+ _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/(?P<slug>[-a-z0-9]+)/?$')
@classmethod
def from_slug(cls, slug):
uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
return cls(uri)
+
def as_wlpictureuri_strict(text):
return WLPictureURI.strict(text)
Field(DCNS('description.medium'), 'medium', required=False),
Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
Field(DCNS('format'), 'mime_type', required=False),
- Field(DCNS('identifier.url'), 'url', WLPictureURI,
- strict=as_wlpictureuri_strict),
- )
+ Field(DCNS('identifier.url'), 'url', WLPictureURI, strict=as_wlpictureuri_strict)
+ )
class ImageStore(object):
EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
- 'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
- 'aiff', 'wbmp', 'xbm']
+ 'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
+ 'aiff', 'wbmp', 'xbm']
MIME = ['image/gif', 'image/jpeg', 'image/png',
'application/x-shockwave-flash', 'image/psd', 'image/bmp',
'image/tiff', 'image/tiff', 'application/octet-stream',
'application/x-shockwave-flash', 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm']
def __init__(self, dir_):
+ super(ImageStore, self).__init__()
self.dir = dir_
- return super(ImageStore, self).__init__()
def path(self, slug, mime_type):
"""
parser = etree.XMLParser(remove_blank_text=False)
tree = etree.parse(StringIO(data.encode('utf-8')), parser)
- me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
+ me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
me.load_frame_info()
return me
except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
def has_all_props(node, props):
return reduce(and_, map(lambda prop: prop in node.attrib, props))
- if has_all_props(area, ['x1', 'x2', 'y1', 'y2']) == False:
+ if not has_all_props(area, ['x1', 'x2', 'y1', 'y2']):
return None
-
+
def n(prop): return int(area.get(prop))
return [[n('x1'), n('y1')], [n('x2'), n('y2')]]
-
def partiter(self):
"""
Iterates the parts of this picture and returns them and their metadata
"""
# omg no support for //sem[(@type='theme') or (@type='object')] ?
- for part in list(self.edoc.iterfind("//sem[@type='theme']")) + list(self.edoc.iterfind("//sem[@type='object']")):
- pd = {}
- pd['type'] = part.get('type')
+ for part in list(self.edoc.iterfind("//sem[@type='theme']")) +\
+ list(self.edoc.iterfind("//sem[@type='object']")):
+ pd = {'type': part.get('type')}
coords = self.get_sem_coords(part)
- if coords is None: continue
+ if coords is None:
+ continue
pd['coords'] = coords
def want_unicode(x):
- if not isinstance(x, unicode): return x.decode('utf-8')
- else: return x
+ if not isinstance(x, unicode):
+ return x.decode('utf-8')
+ else:
+ return x
pd['object'] = part.attrib['type'] == 'object' and want_unicode(part.attrib.get('object', u'')) or None
pd['themes'] = part.attrib['type'] == 'theme' and [part.attrib.get('theme', u'')] or []
yield pd
#
from librarian import get_resource
+
def sponsor_logo(name):
return {
'Narodowe Centrum Kultury': get_resource('res/sponsors/nck.png')
%(description)s%(contributors)s%(funders)s
"""
+
def transform(wldoc, flags=None, **options):
"""
Transforms input_file in XML to output_file in TXT.
license_description = parsed_dc.license_description
license = parsed_dc.license
if license:
- license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (license_description, license)
+ license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (
+ license_description, license)
else:
- license_description = u"Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)"
-
+ license_description = u"Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się " \
+ u"w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, " \
+ u"publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi " \
+ u"materiałami (przypisy, motywy literackie etc.), które podlegają prawu " \
+ u"autorskiemu, to te dodatkowe materiały udostępnione są na licencji " \
+ u"Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL " \
+ u"(http://creativecommons.org/licenses/by-sa/3.0/)"
+
source = parsed_dc.source_name
if source:
source = "\n\nTekst opracowany na podstawie: " + source
else:
description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).'
url = '*' * 10
- license = ""
license_description = ""
source = ""
contributors = ""
else:
result = unicode(result).encode('utf-8')
return OutputFile.from_string("\r\n".join(result.splitlines()) + "\r\n")
-
import codecs
from datetime import date
+
def check_dcparser(xml_file, result_file):
xml = file(xml_file).read()
result = codecs.open(result_file, encoding='utf-8').read()
for fixture in get_all_fixtures('dcparser', '*.xml'):
yield check_serialize, fixture
+
def test_asdate():
assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 03))
assert_equals(dcparser.as_date(u"2011"), date(2011, 1, 1))
assert_equals(dcparser.as_date(u"ok. 1813-1814"), date(1813, 1, 1))
assert_equals(dcparser.as_date(u"ok.1876-ok.1886"), date(1876, 1, 1))
assert_equals(dcparser.as_date(u"1893/1894"), date(1893, 1, 1))
-
for par in tree.findall("//p"):
if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'):
editors_attribution = True
- assert_equal(par.text.rstrip(),
+ assert_equal(
+ par.text.rstrip(),
u'Opracowanie redakcyjne i przypisy: '
u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
assert_true(editors_attribution)
parse_dublincore=False,
).as_html()
+
def test_empty():
assert not WLDocument.from_string(
'<utwor />',
# -*- coding: utf-8
from __future__ import unicode_literals
-from StringIO import StringIO
-import tempfile
from librarian.parser import WLDocument
from librarian.html import extract_annotations
-from lxml import etree
from nose.tools import eq_
assert got[0].startswith('anchor-'), "%s: Unexpected anchor: '%s', should begin with 'anchor-'" % (name, got[0])
eq_(expected[0], got[1], "%s: Unexpected type, expected '%s', got '%s'" % (name, expected[0], got[1]))
eq_(expected[1], got[2], "%s: Unexpected qualifier, expected '%s', got '%s'" % (name, expected[1], got[2]))
- eq_(expected[2], got[3], "%s: Unexpected text representation, expected '%s', got '%s'" % (name, expected[2], got[3]))
+ eq_(expected[2], got[3], "%s: Unexpected text representation, expected '%s', got '%s'" %
+ (name, expected[2], got[3]))
exp_html = '<div class="fn-%s">%s</div>' % (expected[0], expected[3])
eq_(exp_html, got[4], "%s: Unexpected html representation, expected '%s', got '%s'" % (name, exp_html, got[4]))
-
+
def test_annotations():
annotations = (
),
'Empty footnote'),
- (
- '<pr>Definiendum --- definiens.</pr>', (
+ ('<pr>Definiendum --- definiens.</pr>', (
'pr',
[],
'Definiendum \u2014 definiens.',
),
'Footnote with a second parentheses and mdash.'),
- ('<pe><slowo_obce>gemajna</slowo_obce> (daw., z niem. <slowo_obce>gemein</slowo_obce>: zwykły) --- częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</pe>', (
+ ('<pe><slowo_obce>gemajna</slowo_obce> (daw., z niem. <slowo_obce>gemein</slowo_obce>: zwykły) --- '
+ 'częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</pe>', (
'pe',
['daw.', 'niem.'],
- 'gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.',
- '<p><em class="foreign-word">gemajna</em> (daw., z niem. <em class="foreign-word">gemein</em>: zwykły) \u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</p>'
+ 'gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, '
+ 'szeregowiec w wojsku polskim cudzoziemskiego autoramentu.',
+ '<p><em class="foreign-word">gemajna</em> (daw., z niem. <em class="foreign-word">gemein</em>: zwykły) '
+ '\u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</p>'
),
'Footnote with multiple and qualifiers and emphasis.'),
closed_fragments, open_fragments = extract_fragments(
get_fixture('text', 'asnyk_miedzy_nami_expected.html'))
assert not open_fragments
- fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f)
- for f in closed_fragments.values())
+ fragments_text = u"\n\n".join(u"%s: %s\n%s" % (f.id, f.themes, f) for f in closed_fragments.values())
assert_equal(fragments_text, file(expected_output_file_path).read().decode('utf-8'))
-
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-import re
from tempfile import NamedTemporaryFile
from nose.tools import *
from librarian import DirDocProvider
print tex
# Check contributor list.
- editors = re.search(ur'\\def\\editors\{'
- ur'Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
- assert_equal(editors.group(1),
- u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
+ editors = re.search(ur'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
+ assert_equal(editors.group(1), u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from librarian import picture, dcparser
-from lxml import etree
-from nose.tools import *
-from os.path import splitext
from tests.utils import get_all_fixtures, get_fixture
-import codecs
from os import path
+
def test_wlpictureuri():
uri = picture.WLPictureURI('http://wolnelektury.pl/katalog/obraz/angelus-novus')
+
def check_load(xml_file):
pi = dcparser.parse(xml_file, picture.PictureInfo)
assert pi is not None
f = wlp.image_file('r')
f.close()
+
def test_picture_parts():
wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml')))
parts = list(wlp.partiter())
if p['object']:
names.add(p['object'])
- assert motifs == set([u'anioł historii', u'spojrzenie']), "missing motifs, got: %s" % motifs
- assert names == set([u'obraz cały', u'skrzydło']), 'missing objects, got: %s' % names
-
-
+ assert motifs == {u'anioł historii', u'spojrzenie'}, "missing motifs, got: %s" % motifs
+ assert names == {u'obraz cały', u'skrzydło'}, 'missing objects, got: %s' % names
from __future__ import with_statement
from os.path import realpath, join, dirname
import glob
-import os
+
def get_fixture_dir(dir_name):
"""Returns path to fixtures directory dir_name."""