import re
import shutil
import urllib
+import lxml.etree as etree
class UnicodeException(Exception):
message = unicode(args, encoding='utf-8', errors='ignore')
return message
+
class ParseError(UnicodeException):
pass
+
class ValidationError(UnicodeException):
pass
+
class NoDublinCore(ValidationError):
"""There's no DublinCore section, and it's required."""
pass
+
class NoProvider(UnicodeException):
"""There's no DocProvider specified, and it's needed."""
pass
+
class XMLNamespace(object):
- '''A handy structure to repsent names in an XML namespace.'''
+ """A handy structure to repsent names in an XML namespace."""
def __init__(self, uri):
self.uri = uri
def __str__(self):
return '%s' % self.uri
+
class EmptyNamespace(XMLNamespace):
def __init__(self):
super(EmptyNamespace, self).__init__('')
slug = None
example = 'http://edukacjamedialna.edu.pl/lekcje/template'
- _re_wl_uri = re.compile(r'http://(www\.)?edukacjamedialna.edu.pl/lekcje/'
- '(?P<slug>[-a-z0-9]+)/?$')
+ _re_wl_uri = re.compile(
+ r'http://(www\.)?edukacjamedialna.edu.pl/lekcje/'
+ '(?P<slug>[-a-z0-9]+)/?$')
def __init__(self, uri):
uri = unicode(uri)
return IOFile.from_filename(os.path.join(self.dir, fname))
-import lxml.etree as etree
-import dcparser
-
-DEFAULT_BOOKINFO = dcparser.BookInfo(
- { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
- {
- DCNS('creator.expert'): [u'Some, Author'],
- DCNS('creator.scenario'): [u'Some, Author'],
- DCNS('creator.textbook'): [u'Some, Author'],
- DCNS('title'): [u'Some Title'],
- DCNS('subject.period'): [u'Unknown'],
- DCNS('subject.type'): [u'Unknown'],
- DCNS('subject.genre'): [u'Unknown'],
- DCNS('date'): ['1970-01-01'],
- DCNS('language'): [u'pol'],
- # DCNS('date'): [creation_date],
- DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
- DCNS('description'):
- [u"""Publikacja zrealizowana w ramach projektu
- Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
- wykonana przez Bibliotekę Narodową z egzemplarza
- pochodzącego ze zbiorów BN."""],
- DCNS('identifier.url'): [WLURI.example],
- DCNS('rights'):
- [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })
+def get_default_bookinfo():
+ import dcparser
+ dcparser.BookInfo(
+ {RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
+ {
+ DCNS('creator.expert'): [u'Some, Author'],
+ DCNS('creator.scenario'): [u'Some, Author'],
+ DCNS('creator.textbook'): [u'Some, Author'],
+ DCNS('title'): [u'Some Title'],
+ DCNS('subject.period'): [u'Unknown'],
+ DCNS('subject.type'): [u'Unknown'],
+ DCNS('subject.genre'): [u'Unknown'],
+ DCNS('date'): ['1970-01-01'],
+ DCNS('language'): [u'pol'],
+ # DCNS('date'): [creation_date],
+ DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
+ DCNS('description'):
+ [u"""Publikacja zrealizowana w ramach projektu
+ Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
+ wykonana przez Bibliotekę Narodową z egzemplarza
+ pochodzącego ze zbiorów BN."""],
+ DCNS('identifier.url'): [WLURI.example],
+ DCNS('rights'):
+ [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"],
+ })
+
+DEFAULT_BOOKINFO = get_default_bookinfo()
+
def xinclude_forURI(uri):
e = etree.Element(XINS("include"))
e.set("href", uri)
return etree.tostring(e, encoding=unicode)
+
def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
"""Wrap the text within the minimal XML structure with a DC template."""
bookinfo.created_at = creation_date
- dcstring = etree.tostring(bookinfo.to_etree(), \
- method='xml', encoding=unicode, pretty_print=True)
+ dcstring = etree.tostring(bookinfo.to_etree(), encoding=unicode, pretty_print=True)
return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
u'\n</plain-text>\n</utwor>'
b = u'' + (element.text or '')
for child in element.iterchildren():
- e = etree.tostring(child, method='xml', encoding=unicode,
- pretty_print=True)
+ e = etree.tostring(child, encoding=unicode, pretty_print=True)
b += e
return b
'raw': serialize_raw,
}
+
def serialize_children(element, format='raw'):
return SERIALIZERS[format](element)
+
def get_resource(path):
return os.path.join(os.path.dirname(__file__), path)
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from collections import namedtuple
import os.path
import optparse
class Book2Anything(object):
"""A class for creating book2... scripts.
-
+
Subclass it for any format you want to convert to.
"""
- format_name = None # Set format name, like "PDF".
- ext = None # Set file extension, like "pdf".
- uses_cover = False # Can it add a cover?
- cover_optional = True # Only relevant if uses_cover
- uses_provider = False # Does it need a DocProvider?
- transform = None # Transform method. Uses WLDocument.as_{ext} by default.
- parser_options = [] # List of Option objects for additional parser args.
- transform_options = [] # List of Option objects for additional transform args.
- transform_flags = [] # List of Option objects for supported transform flags.
-
+ format_name = None # Set format name, like "PDF".
+ ext = None # Set file extension, like "pdf".
+ uses_cover = False # Can it add a cover?
+ cover_optional = True # Only relevant if uses_cover
+ uses_provider = False # Does it need a DocProvider?
+ transform = None # Transform method. Uses WLDocument.as_{ext} by default.
+ parser_options = [] # List of Option objects for additional parser args.
+ transform_options = [] # List of Option objects for additional transform args.
+ transform_flags = [] # List of Option objects for supported transform flags.
@classmethod
def run(cls):
parser = optparse.OptionParser(usage=usage)
- parser.add_option('-v', '--verbose',
- action='store_true', dest='verbose', default=False,
- help='print status messages to stdout')
- parser.add_option('-d', '--make-dir',
- action='store_true', dest='make_dir', default=False,
- help='create a directory for author and put the output file in it')
- parser.add_option('-o', '--output-file',
- dest='output_file', metavar='FILE',
- help='specifies the output file')
- parser.add_option('-O', '--output-dir',
- dest='output_dir', metavar='DIR',
- help='specifies the directory for output')
+ parser.add_option(
+ '-v', '--verbose',
+ action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+ parser.add_option(
+ '-d', '--make-dir',
+ action='store_true', dest='make_dir', default=False,
+ help='create a directory for author and put the output file in it')
+ parser.add_option(
+ '-o', '--output-file',
+ dest='output_file', metavar='FILE',
+ help='specifies the output file')
+ parser.add_option(
+ '-O', '--output-dir',
+ dest='output_dir', metavar='DIR',
+ help='specifies the directory for output')
if cls.uses_cover:
if cls.cover_optional:
- parser.add_option('-c', '--with-cover',
- action='store_true', dest='with_cover', default=False,
- help='create default cover')
- parser.add_option('-C', '--image-cache',
- dest='image_cache', metavar='URL',
- help='prefix for image download cache' +
- (' (implies --with-cover)' if cls.cover_optional else ''))
+ parser.add_option(
+ '-c', '--with-cover',
+ action='store_true', dest='with_cover', default=False,
+ help='create default cover')
+ parser.add_option(
+ '-C', '--image-cache',
+ dest='image_cache', metavar='URL',
+ help='prefix for image download cache' +
+ (' (implies --with-cover)' if cls.cover_optional else ''))
for option in cls.parser_options + cls.transform_options + cls.transform_flags:
option.add(parser)
if len(input_filenames) < 1:
parser.print_help()
- return(1)
+ return 1
# Prepare additional args for parser.
parser_args = {}
for option in cls.transform_options:
transform_args[option.name()] = option.value(options)
# Add flags to transform_args, if any.
- transform_flags = [flag.name() for flag in cls.transform_flags
- if flag.value(options)]
+ transform_flags = [flag.name() for flag in cls.transform_flags if flag.value(options)]
if transform_flags:
transform_args['flags'] = transform_flags
# Add cover support, if any.
elif not cls.cover_optional or options.with_cover:
transform_args['cover'] = WLCover
-
# Do some real work
+ main_input = None
try:
for main_input in input_filenames:
if options.verbose:
print main_input
- # Where to find input?
- if cls.uses_provider:
- path, fname = os.path.realpath(main_input).rsplit('/', 1)
- provider = DirDocProvider(path)
- else:
- provider = None
-
- # Where to write output?
- if not (options.output_file or options.output_dir):
- output_file = os.path.splitext(main_input)[0] + '.' + cls.ext
- else:
- output_file = None
-
- # Do the transformation.
- doc = WLDocument.from_file(main_input, provider=provider, **parser_args)
- transform = cls.transform
- if transform is None:
- transform = getattr(WLDocument, 'as_%s' % cls.ext)
- output = transform(doc, **transform_args)
-
- doc.save_output_file(output,
- output_file, options.output_dir, options.make_dir, cls.ext)
+ # Where to find input?
+ if cls.uses_provider:
+ path, fname = os.path.realpath(main_input).rsplit('/', 1)
+ provider = DirDocProvider(path)
+ else:
+ provider = None
+
+ # Where to write output?
+ if not (options.output_file or options.output_dir):
+ output_file = os.path.splitext(main_input)[0] + '.' + cls.ext
+ else:
+ output_file = None
+
+ # Do the transformation.
+ doc = WLDocument.from_file(main_input, provider=provider, **parser_args)
+ transform = cls.transform
+ if transform is None:
+ transform = getattr(WLDocument, 'as_%s' % cls.ext)
+ output = transform(doc, **transform_args)
+
+ doc.save_output_file(
+ output, output_file, options.output_dir, options.make_dir, cls.ext)
except ParseError, e:
print '%(file)s:%(name)s:%(message)s' % {
exts = {
'JPEG': 'jpg',
'PNG': 'png',
- }
+ }
mime_types = {
'JPEG': 'image/jpeg',
'PNG': 'image/png',
- }
+ }
def __init__(self, book_info, format=None):
try:
top = self.author_top
tbox = TextBox(
self.width - self.author_margin_left - self.author_margin_right,
- self.height - top,
- )
+ self.height - top)
author_font = self.author_font or ImageFont.truetype(
get_resource('fonts/DejaVuSerif.ttf'), 30)
tbox.text(self.pretty_author(), self.author_color, author_font,
- self.author_lineskip, self.author_shadow)
+ self.author_lineskip, self.author_shadow)
text_img = tbox.image()
img.paste(text_img, (self.author_margin_left, top), text_img)
top += text_img.size[1] + self.title_top
tbox = TextBox(
self.width - self.title_margin_left - self.title_margin_right,
- self.height - top,
- )
+ self.height - top)
title_font = self.author_font or ImageFont.truetype(
get_resource('fonts/DejaVuSerif.ttf'), 40)
tbox.text(self.pretty_title(), self.title_color, title_font,
- self.title_lineskip, self.title_shadow)
+ self.title_lineskip, self.title_shadow)
text_img = tbox.image()
img.paste(text_img, (self.title_margin_left, top), text_img)
from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
WLURI)
-import lxml.etree as etree # ElementTree API using libxml2
+import lxml.etree as etree # ElementTree API using libxml2
from lxml.etree import XMLSyntaxError
@classmethod
def from_text(cls, text):
- parts = [ token.strip() for token in text.split(',') ]
+ parts = [token.strip() for token in text.split(',')]
if len(parts) == 1:
surname = parts[0]
names = []
if len(parts[1]) == 0:
# there is no non-whitespace data after the comma
raise ValueError("Found a comma, but no names given: \"%s\" -> %r." % (text, parts))
- names = [ name for name in parts[1].split() if len(name) ] # all non-whitespace tokens
+ names = [name for name in parts[1].split() if len(name)] # all non-whitespace tokens
return cls(surname, *names)
def readable(self):
def __repr__(self):
return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names)
+
def as_date(text):
try:
try:
except ValueError, e:
raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
+
def as_person(text):
return Person.from_text(text)
+
def as_unicode(text):
if isinstance(text, unicode):
return text
else:
return text.decode('utf-8')
+
def as_wluri_strict(text):
return WLURI.strict(text)
+
class Field(object):
def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs):
self.uri = uri
self.multiple = multiple
self.salias = salias
- self.required = kwargs.get('required', True) and not kwargs.has_key('default')
+ self.required = kwargs.get('required', True) and 'default' not in kwargs
self.default = kwargs.get('default', [] if multiple else [None])
def validate_value(self, val, strict=False):
if self.multiple:
if validator is None:
return val
- return [ validator(v) if v is not None else v for v in val ]
+ return [validator(v) if v is not None else v for v in val]
elif len(val) > 1:
raise ValidationError("Multiple values not allowed for field '%s'" % self.uri)
elif len(val) == 0:
def validate(self, fdict, fallbacks=None, strict=False):
if fallbacks is None:
fallbacks = {}
- if not fdict.has_key(self.uri):
+ if self.uri not in fdict:
if not self.required:
# Accept single value for single fields and saliases.
if self.name in fallbacks:
class DCInfo(type):
- def __new__(meta, classname, bases, class_dict):
+ def __new__(mcs, classname, bases, class_dict):
fields = list(class_dict['FIELDS'])
for base in bases[::-1]:
fields.insert(0, field)
class_dict['FIELDS'] = tuple(fields)
- return super(DCInfo, meta).__new__(meta, classname, bases, class_dict)
+ return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict)
class WorkInfo(object):
__metaclass__ = DCInfo
FIELDS = (
- Field( DCNS('creator.expert'), 'authors_expert', as_person, salias='author', required=False, multiple=True),
- Field( DCNS('creator.methodologist'), 'authors_methodologist', as_person, salias='author', required=False, multiple=True),
- Field( DCNS('creator.scenario'), 'authors_scenario', as_person, salias='author', required=False, multiple=True),
- Field( DCNS('creator.textbook'), 'authors_textbook', as_person, salias='author', required=False, multiple=True),
- Field( DCNS('requires'), 'requires', required=False, multiple=True),
- Field( DCNS('title'), 'title'),
- Field( DCNS('type'), 'type', required=False),
-
- Field( DCNS('contributor.editor'), 'editors', \
- as_person, salias='editor', multiple=True, default=[]),
- Field( DCNS('contributor.technical_editor'), 'technical_editors',
- as_person, salias='technical_editor', multiple=True, default=[]),
-
- Field( DCNS('date'), 'created_at', as_date),
- Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
- Field( DCNS('publisher'), 'publisher'),
-
- Field( DCNS('subject.competence'), 'competences', multiple=True, required=False),
- Field( DCNS('subject.curriculum'), 'curriculum', multiple=True, required=False),
-
- Field( DCNS('language'), 'language'),
- Field( DCNS('description'), 'description', required=False),
-
- Field( DCNS('source'), 'source_name', required=False),
- Field( DCNS('source.URL'), 'source_url', required=False),
- Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
- Field( DCNS('rights.license'), 'license', required=False),
- Field( DCNS('rights'), 'license_description'),
+ Field(DCNS('creator.expert'), 'authors_expert', as_person, salias='author', required=False, multiple=True),
+ Field(DCNS('creator.methodologist'), 'authors_methodologist', as_person, salias='author', required=False,
+ multiple=True),
+ Field(DCNS('creator.scenario'), 'authors_scenario', as_person, salias='author', required=False, multiple=True),
+ Field(DCNS('creator.textbook'), 'authors_textbook', as_person, salias='author', required=False, multiple=True),
+ Field(DCNS('requires'), 'requires', required=False, multiple=True),
+ Field(DCNS('title'), 'title'),
+ Field(DCNS('type'), 'type', required=False),
+
+ Field(DCNS('contributor.editor'), 'editors', as_person, salias='editor', multiple=True, default=[]),
+ Field(DCNS('contributor.technical_editor'), 'technical_editors', as_person, salias='technical_editor',
+ multiple=True, default=[]),
+
+ Field(DCNS('date'), 'created_at', as_date),
+ Field(DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
+ Field(DCNS('publisher'), 'publisher'),
+
+ Field(DCNS('subject.competence'), 'competences', multiple=True, required=False),
+ Field(DCNS('subject.curriculum'), 'curriculum', multiple=True, required=False),
+
+ Field(DCNS('language'), 'language'),
+ Field(DCNS('description'), 'description', required=False),
+
+ Field(DCNS('source'), 'source_name', required=False),
+ Field(DCNS('source.URL'), 'source_url', required=False),
+ Field(DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
+ Field(DCNS('rights.license'), 'license', required=False),
+ Field(DCNS('rights'), 'license_description'),
)
@classmethod
def from_file(cls, xmlfile, *args, **kwargs):
desc_tag = None
try:
- iter = etree.iterparse(xmlfile, ['start', 'end'])
- for (event, element) in iter:
+ elements = etree.iterparse(xmlfile, ['start', 'end'])
+ for (event, element) in elements:
if element.tag == RDFNS('RDF') and event == 'start':
desc_tag = element
break
Check if there are rdf:RDF and rdf:Description tags.")
# continue 'till the end of RDF section
- for (event, element) in iter:
+ for (event, element) in elements:
if element.tag == RDFNS('RDF') and event == 'end':
break
self.fmap = {}
for field in self.FIELDS:
- value = field.validate(dc_fields, fallbacks=fallbacks,
- strict=strict)
+ value = field.validate(dc_fields, fallbacks=fallbacks, strict=strict)
if field.multiple:
value = getattr(self, 'prop_' + field.name, []) + value
setattr(self, 'prop_' + field.name, value)
self.fmap[field.name] = field
- if field.salias: self.fmap[field.salias] = field
+ if field.salias:
+ self.fmap[field.salias] = field
def __getattribute__(self, name):
try:
value = object.__getattribute__(self, 'prop_'+field.name)
if field.name == name:
return value
- else: # singular alias
+ else:
+ # singular alias
if not field.multiple:
raise "OUCH!! for field %s" % name
field = object.__getattribute__(self, 'fmap')[name]
if field.name == name:
object.__setattr__(self, 'prop_'+field.name, newvalue)
- else: # singular alias
+ else:
+ # singular alias
if not field.multiple:
raise "OUCH! while setting field %s" % name
"""Update using field_dict. Verify correctness, but don't check if all
required fields are present."""
for field in self.FIELDS:
- if field_dict.has_key(field.name):
+ if field.name in field_dict:
setattr(self, field.name, field_dict[field.name])
- def to_etree(self, parent = None):
+ def to_etree(self, parent=None):
"""XML representation of this object."""
- #etree._namespace_map[str(self.RDF)] = 'rdf'
- #etree._namespace_map[str(self.DC)] = 'dc'
+ # etree._namespace_map[str(self.RDF)] = 'rdf'
+ # etree._namespace_map[str(self.DC)] = 'dc'
if parent is None:
root = etree.Element(RDFNS('RDF'))
v = getattr(self, field.name, None)
if v is not None:
if field.multiple:
- if len(v) == 0: continue
+ if len(v) == 0:
+ continue
for x in v:
e = etree.Element(field.uri)
if x is not None:
return root
def serialize(self):
- rdf = {}
- rdf['about'] = { 'uri': RDFNS('about'), 'value': self.about }
+ rdf = {'about': {'uri': RDFNS('about'), 'value': self.about}}
dc = {}
for field in self.FIELDS:
v = getattr(self, field.name, None)
if v is not None:
if field.multiple:
- if len(v) == 0: continue
- v = [ unicode(x) for x in v if x is not None ]
+ if len(v) == 0:
+ continue
+ v = [unicode(x) for x in v if x is not None]
else:
v = unicode(v)
if v is not None:
if field.multiple:
- if len(v) == 0: continue
- v = [ unicode(x) for x in v if x is not None ]
+ if len(v) == 0:
+ continue
+ v = [unicode(x) for x in v if x is not None]
else:
v = unicode(v)
result[field.name] = v
if field.salias:
v = getattr(self, field.salias)
- if v is not None: result[field.salias] = unicode(v)
+ if v is not None:
+ result[field.salias] = unicode(v)
return result
class BookInfo(WorkInfo):
FIELDS = (
- Field( DCNS('audience'), 'audiences', salias='audience', multiple=True,
- required=False),
-
- Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True,
- required=False),
- Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True,
- required=False),
- Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
- required=False),
-
- Field( DCNS('contributor.translator'), 'translators', \
- as_person, salias='translator', multiple=True, default=[]),
- Field( DCNS('relation.hasPart'), 'parts',
- WLURI, strict=as_wluri_strict, multiple=True, required=False),
- Field( DCNS('relation.isVariantOf'), 'variant_of',
- WLURI, strict=as_wluri_strict, required=False),
- Field( DCNS('relation'), 'relations',
- WLURI, strict=as_wluri_strict, multiple=True, required=False),
-
- Field( DCNS('relation.coverImage.url'), 'cover_url', required=False),
- Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
- Field( DCNS('relation.coverImage.source'), 'cover_source', required=False),
+ Field(DCNS('audience'), 'audiences', salias='audience', multiple=True, required=False),
+
+ Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, required=False),
+ Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True, required=False),
+ Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, required=False),
+
+ Field(DCNS('contributor.translator'), 'translators', as_person, salias='translator', multiple=True,
+ default=[]),
+ Field(DCNS('relation.hasPart'), 'parts', WLURI, strict=as_wluri_strict, multiple=True, required=False),
+ Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI, strict=as_wluri_strict, required=False),
+ Field(DCNS('relation'), 'relations', WLURI, strict=as_wluri_strict, multiple=True, required=False),
+
+ Field(DCNS('relation.coverImage.url'), 'cover_url', required=False),
+ Field(DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
+ Field(DCNS('relation.coverImage.source'), 'cover_source', required=False),
)
nt = node.text if node.text is not None else ''
return ''.join([nt] + [etree.tostring(child) for child in node])
+
def set_inner_xml(node, text):
""" sets node's text and children from a string
>>> print etree.tostring(s)
<strofa><wers_normalny>a</wers_normalny><wers_normalny>b<x>x/
y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
-
+
"""
def __init__(self, stanza_elem):
self.stanza = stanza_elem
def add_to_spine(spine, partno):
""" Adds a node to the spine section in content.opf file """
- e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno});
+ e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
spine.append(e)
# prepare a container for each chunk
part_xml = etree.Element('utwor')
etree.SubElement(part_xml, 'master')
- main_xml_part = part_xml[0] # master
+ main_xml_part = part_xml[0] # master
last_node_part = False
for one_part in main_text:
yield part_xml
-def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
+def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=None):
""" transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
+ if _empty_html_static is None:
+ _empty_html_static = []
toc = TOC()
for element in chunk_xml[0]:
# write book title page
html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
chars = used_chars(html_tree.getroot())
- zip.writestr('OPS/title.html',
- etree.tostring(html_tree, method="html", pretty_print=True))
+ zip.writestr('OPS/title.html', etree.tostring(html_tree, method="html", pretty_print=True))
# add a title page TOC entry
toc.add(u"Strona tytułowa", "title.html")
elif wldoc.book_info.parts:
return toc, chunk_counter, chars, sample
-
document = deepcopy(wldoc)
del wldoc
mime.compress_type = zipfile.ZIP_STORED
mime.extra = ''
zip.writestr(mime, 'application/epub+zip')
- zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
- 'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
- '<rootfiles><rootfile full-path="OPS/content.opf" ' \
- 'media-type="application/oebps-package+xml" />' \
- '</rootfiles></container>')
+ zip.writestr(
+ 'META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
+ 'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
+ '<rootfiles><rootfile full-path="OPS/content.opf" '
+ 'media-type="application/oebps-package+xml" />'
+ '</rootfiles></container>')
zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png'))
zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png'))
if not style:
opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
-
annotations = etree.Element('annotations')
- toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
- '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
- '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
- 'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
- '</navMap></ncx>')
+ toc_file = etree.fromstring(
+ '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
+ '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
+ '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
+ 'version="2005-1"><head></head><docTitle></docTitle><navMap>'
+ '</navMap></ncx>')
nav_map = toc_file[-1]
if html_toc:
zip.writestr('OPS/last.html', etree.tostring(
html_tree, method="html", pretty_print=True))
- if not flags or not 'without-fonts' in flags:
+ if not flags or 'without-fonts' not in flags:
# strip fonts
tmpdir = mkdtemp('-librarian-epub')
try:
def sectionify(tree):
"""Finds section headers and adds a tree of _section tags."""
- sections = ['naglowek_czesc',
- 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena',
- 'naglowek_podrozdzial']
- section_level = dict((v,k) for (k,v) in enumerate(sections))
+ sections = [
+ 'naglowek_czesc',
+ 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena',
+ 'naglowek_podrozdzial']
+ section_level = {v: k for (k, v) in enumerate(sections)}
# We can assume there are just subelements an no text at section level.
for level, section_name in reversed(list(enumerate(sections))):
from librarian.dcparser import Person
+
def _register_function(f):
""" Register extension function with lxml """
ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
ENTITY_SUBSTITUTIONS = [
- (u'---', u'—'),
- (u'--', u'–'),
- (u'...', u'…'),
- (u',,', u'„'),
- (u'"', u'”'),
+ (u'---', u'—'),
+ (u'--', u'–'),
+ (u'...', u'…'),
+ (u',,', u'„'),
+ (u'"', u'”'),
]
+
def substitute_entities(text):
"""XPath extension function converting all entites in passed text."""
if isinstance(text, list):
'partial': 'xslt/wl2html_partial.xslt'
}
+
def get_stylesheet(name):
return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
+
def html_has_content(text):
return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
+
def transform(wldoc, stylesheet='legacy', options=None, flags=None):
"""Transforms the WL document to XHTML.
if not options:
options = {}
result = document.transform(style, **options)
- del document # no longer needed large object :)
+ del document # no longer needed large object :)
if html_has_content(result):
add_anchors(result.getroot())
add_table_of_contents(result.getroot())
- return IOFile.from_string(etree.tostring(result, method='html',
- xml_declaration=False, pretty_print=True, encoding='utf-8'))
+ return IOFile.from_string(
+ etree.tostring(result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8'))
else:
return None
except KeyError:
except (XMLSyntaxError, XSLTApplyError), e:
raise ParseError(e)
+
class Fragment(object):
def __init__(self, id, themes):
super(Fragment, self).__init__()
result = []
for event, element in self.closed_events():
if event == 'start':
- result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
+ result.append(u'<%s %s>' % (
+ element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
if element.text:
result.append(element.text)
elif event == 'end':
for event, element in etree.iterparse(buf, events=('start', 'end')):
# Process begin and end elements
if element.get('class', '') in ('theme-begin', 'theme-end'):
- if not event == 'end': continue # Process elements only once, on end event
+ if not event == 'end':
+ continue # Process elements only once, on end event
# Open new fragment
if element.get('class', '') == 'theme-begin':
for fragment_id in open_fragments:
open_fragments[fragment_id].append('text', element.tail)
-
# Process all elements except begin and end
else:
# Omit annotation tags
- if (len(element.get('name', '')) or
+ if (len(element.get('name', '')) or
element.get('class', '') in ('annotation', 'anchor')):
if event == 'end' and element.tail:
for fragment_id in open_fragments:
def add_anchors(root):
counter = 1
+
+ def is_side_text(e):
+ side_classes = ('note', 'motto', 'motto_podpis', 'dedication')
+ return e.get('class') in side_classes or e.get('id') == 'nota_red' or e.tag == 'blockquote'
+
for element in root.iterdescendants():
- if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
- or e.get('id') == 'nota_red'
- or e.tag == 'blockquote'):
+ if any_ancestor(element, is_side_text):
continue
if element.tag == 'p' and 'verse' in element.get('class', ''):
def add_table_of_contents(root):
sections = []
counter = 1
+
+ def is_side_text(e):
+ return e.get('id') in ('footnotes', 'nota_red') or e.get('class') == 'person-list'
+
for element in root.iterdescendants():
if element.tag in ('h2', 'h3'):
- if any_ancestor(element, lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)):
+ if any_ancestor(element, is_side_text):
continue
element_text = raw_printable_text(element)
if len(subsections):
subsection_list = etree.SubElement(section_element, 'ol')
- for n, subsection, text, _ in subsections:
+ for n1, subsection, text1, _ in subsections:
subsection_element = etree.SubElement(subsection_list, 'li')
- add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)
+ add_anchor(subsection_element, "s%d" % n1, with_target=False, link_text=text1)
root.insert(0, toc)
text_str = etree.tostring(footnote, method='text', encoding='utf-8').strip()
html_str = etree.tostring(footnote, method='html', encoding='utf-8')
yield anchor, text_str, html_str
-
from tempfile import NamedTemporaryFile
from librarian import IOFile
-from librarian.cover import WLCover
from librarian import get_resource
book_info = document.book_info
# provide a cover by default
- if not cover:
- cover = WLCover
+ # if not cover:
+ # cover = WLCover
cover_file = NamedTemporaryFile(suffix='.png', delete=False)
bound_cover = cover(book_info)
bound_cover.save(cover_file)
if not flags:
flags = []
flags = list(flags) + ['without-fonts']
- epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True,
- flags=flags, style=get_resource('mobi/style.css'))
+ epub = document.as_epub(
+ verbose=verbose, sample=sample, html_toc=True, flags=flags, style=get_resource('mobi/style.css'))
if verbose:
kwargs = {}
output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False)
output_file.close()
- subprocess.check_call(['ebook-convert', epub.get_filename(), output_file.name,
- '--no-inline-toc', '--cover=%s' % cover_file.name], **kwargs)
+ subprocess.check_call(
+ ['ebook-convert', epub.get_filename(), output_file.name, '--no-inline-toc', '--cover=%s' % cover_file.name],
+ **kwargs)
os.unlink(cover_file.name)
- return IOFile.from_filename(output_file.name)
\ No newline at end of file
+ return IOFile.from_filename(output_file.name)
from lxml import etree
from librarian import pdf, epub, DirDocProvider, ParseError, cover
from librarian.parser import WLDocument
+from librarian.styles.wolnelektury.partners import cover
class Packager(object):
cover = None
flags = None
+ converter = NotImplemented
+ ext = NotImplemented
@classmethod
- def prepare_file(cls, main_input, output_dir, verbose=False):
+ def prepare_file(cls, main_input, output_dir):
path, fname = os.path.realpath(main_input).rsplit('/', 1)
provider = DirDocProvider(path)
slug, ext = os.path.splitext(fname)
if output_dir != '':
- try:
+ if not os.path.isdir(output_dir):
os.makedirs(output_dir)
- except:
- pass
outfile = os.path.join(output_dir, slug + '.' + cls.ext)
doc = WLDocument.from_file(main_input, provider=provider)
- output_file = cls.converter.transform(doc,
- cover=cls.cover, flags=cls.flags)
+ output_file = cls.converter.transform(doc, cover=cls.cover, flags=cls.flags)
doc.save_output_file(output_file, output_path=outfile)
-
@classmethod
def prepare(cls, input_filenames, output_dir='', verbose=False):
+ main_input = None
try:
for main_input in input_filenames:
if verbose:
print main_input
- cls.prepare_file(main_input, output_dir, verbose)
+ cls.prepare_file(main_input, output_dir)
except ParseError, e:
print '%(file)s:%(name)s:%(message)s' % {
'file': main_input,
converter = epub
ext = 'epub'
+
class PdfPackager(Packager):
converter = pdf
ext = 'pdf'
class GandalfEpubPackager(EpubPackager):
cover = cover.GandalfCover
+
class GandalfPdfPackager(PdfPackager):
cover = cover.GandalfCover
+
class BookotekaEpubPackager(EpubPackager):
cover = cover.BookotekaCover
+
class PrestigioEpubPackager(EpubPackager):
cover = cover.PrestigioCover
flags = ('less-advertising',)
+
class PrestigioPdfPackager(PdfPackager):
cover = cover.PrestigioCover
flags = ('less-advertising',)
<language>PL</language>
</product>""")
+ main_input = None
try:
for main_input in input_filenames:
if verbose:
cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
outfile = os.path.join(outfile_dir, '1.epub')
outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
- doc.save_output_file(doc.as_epub(),
- output_path=outfile)
- doc.save_output_file(doc.as_epub(doc, sample=25),
- output_path=outfile_sample)
+ doc.save_output_file(doc.as_epub(), output_path=outfile)
+ doc.save_output_file(doc.as_epub(doc, sample=25), output_path=outfile_sample)
outfile = os.path.join(outfile_dir, '1.mobi')
outfile_sample = os.path.join(outfile_dir, '1.sample.mobi')
- doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover),
- output_path=outfile)
+ doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), output_path=outfile)
doc.save_output_file(
- doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
- output_path=outfile_sample)
+ doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), output_path=outfile_sample)
except ParseError, e:
print '%(file)s:%(name)s:%(message)s' % {
'file': main_input,
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from librarian import ValidationError, NoDublinCore, ParseError, NoProvider
+from librarian import ValidationError, NoDublinCore, ParseError
from librarian import RDFNS, IOFile
from librarian import dcparser
import re
from StringIO import StringIO
+
class WLDocument(object):
LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
provider = None
_edoc = None
+
@property
def edoc(self):
if self._edoc is None:
data = data.decode('utf-8')
data = data.replace(u'\ufeff', '')
try:
- parser = etree.XMLParser(remove_blank_text=False)
+ parser = etree.XMLParser()
self._edoc = etree.parse(StringIO(data.encode('utf-8')), parser)
except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
raise ParseError(e)
return self._edoc
_rdf_elem = None
+
@property
def rdf_elem(self):
if self._rdf_elem is None:
return self._rdf_elem
_book_info = None
+
@property
def book_info(self):
if not self.parse_dublincore:
self.rdf_elem, fallbacks=self.meta_fallbacks, strict=self.strict)
return self._book_info
- def __init__(self, iofile, provider=None,
- parse_dublincore=True, # shouldn't it be in a subclass?
- strict=False, # ?
- meta_fallbacks=None # ?
- ):
+ def __init__(self, iofile, provider=None, parse_dublincore=True, # shouldn't it be in a subclass?
+ strict=False, # ?
+ meta_fallbacks=None): # ?
self.source = iofile
self.provider = provider
self.parse_dublincore = parse_dublincore
self.strict = strict
self.meta_fallbacks = meta_fallbacks
- if self.edoc.getroot().tag != 'utwor':
+ root_elem = self.edoc.getroot()
+ if root_elem.tag != 'utwor':
raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
if parse_dublincore:
- self.book_info
+ self.book_info()
@classmethod
def from_string(cls, xml, *args, **kwargs):
iofile = IOFile.from_filename(xmlfile)
return cls(iofile, *args, **kwargs)
-
def swap_endlines(self):
"""Converts line breaks in stanzas into <br/> tags."""
# only swap inside stanzas
parts.append(part)
else:
tag, n = match.groups()
- parts.append("*[%d][name() = '%s']" % (int(n)+1, tag) )
+ parts.append("*[%d][name() = '%s']" % (int(n)+1, tag))
if parts[0] == '.':
parts[0] = ''
def update_dc(self):
if self.book_info:
parent = self.rdf_elem.getparent()
- parent.replace( self.rdf_elem, self.book_info.to_etree(parent) )
+ parent.replace(self.rdf_elem, self.book_info.to_etree(parent))
def serialize(self):
self.update_dc()
try:
xpath = self.path_to_xpath(key)
node = self.edoc.xpath(xpath)[0]
- repl = etree.fromstring(u"<%s>%s</%s>" %(node.tag, data, node.tag) )
+ repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
node.getparent().replace(node, repl)
except Exception, e:
- unmerged.append( repr( (key, xpath, e) ) )
+ # WTF xpath may be unused; also: too broad except
+ unmerged.append(repr((key, xpath, e)))
return unmerged
def clean_ed_note(self):
""" deletes forbidden tags from nota_red """
- for node in self.edoc.xpath('|'.join('//nota_red//%s' % tag for tag in
- ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))):
+ forbidden_tags = ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw')
+ for node in self.edoc.xpath('|'.join('//nota_red//%s' % tag for tag in forbidden_tags)):
tail = node.tail
node.clear()
node.tag = 'span'
cover_class = WLCover
return cover_class(self.book_info, *args, **kwargs).output_file()
- def save_output_file(self, output_file, output_path=None,
- output_dir_path=None, make_author_dir=False, ext=None):
+ def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None):
if output_dir_path:
save_path = output_dir_path
if make_author_dir:
- save_path = os.path.join(save_path,
- unicode(self.book_info.author).encode('utf-8'))
- save_path = os.path.join(save_path,
- self.book_info.uri.slug)
+ save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8'))
+ save_path = os.path.join(save_path, self.book_info.uri.slug)
if ext:
save_path += '.%s' % ext
else:
from Texml.processor import process
from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
from librarian.dcparser import Person
from librarian.parser import WLDocument
'wl2tex': 'pdf/wl2tex.xslt',
}
+
def insert_tags(doc, split_re, tagname, exclude=None):
""" inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
- >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>');
- >>> insert_tags(t, re.compile('-'), 'd');
+ >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>')
+ >>> insert_tags(t, re.compile('-'), 'd')
>>> print etree.tostring(t)
<a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
"""
def move_motifs_inside(doc):
""" moves motifs to be into block elements """
- for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'):
+ main_tags = ('powiesc', 'opowiadanie', 'liryka_l', 'liryka_lp',
+ 'dramat_wierszowany_l', 'dramat_wierszowany_lp', 'dramat_wspolczesny')
+ for master in doc.xpath('|'.join('//' + tag for tag in main_tags)):
for motif in master.xpath('motyw'):
for sib in motif.itersiblings():
- if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia', 'begin', 'end', 'motyw', 'extra', 'uwaga'):
+ special_tags = ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia',
+ 'begin', 'end', 'motyw', 'extra', 'uwaga')
+ if sib.tag not in special_tags:
# motif shouldn't have a tail - it would be untagged text
motif.tail = None
motif.getparent().remove(motif)
Finds all dc:creator and dc.contributor.translator tags
and adds *_parsed versions with forenames first.
"""
- for person in doc.xpath("|".join('//dc:'+(tag) for tag in (
- 'creator', 'contributor.translator')),
- namespaces = {'dc': str(DCNS)})[::-1]:
+ persons = doc.xpath(
+ "|".join('//dc:' + tag for tag in ('creator', 'contributor.translator')),
+ namespaces={'dc': str(DCNS)})[::-1]
+ for person in persons:
if not person.text:
continue
p = Person.from_text(person.text)
text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
- document = WLDocument.from_string(text,
- parse_dublincore=True, provider=provider)
+ document = WLDocument.from_string(text, parse_dublincore=True, provider=provider)
document.swap_endlines()
for child_uri in document.book_info.parts:
# Copy style
shutil.copy(get_resource('pdf/wl.cls'), temp)
shutil.copy(self.style, os.path.join(temp, 'style.sty'))
- #for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
- # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
+ # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
+ # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
# Save attachments
if self.cover:
cwd = None
os.chdir(temp)
+ p = None
if self.verbose:
- for i in range(self.tex_passes):
+ for i in xrange(self.tex_passes):
p = call(['xelatex', tex_path])
else:
- for i in range(self.tex_passes):
- p = call(['xelatex', '-interaction=batchmode', tex_path],
- stdout=PIPE, stderr=PIPE)
+ for i in xrange(self.tex_passes):
+ p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
if p:
raise ParseError("Error parsing .tex file: %s" % tex_path)
-
-from dcparser import (as_person, as_date, Field, WorkInfo, DCNS)
+# -*- coding: utf-8 -*-
+from dcparser import Field, WorkInfo, DCNS
from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
from xml.parsers.expat import ExpatError
from os import path
class WLPictureURI(WLURI):
- _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
- '(?P<slug>[-a-z0-9]+)/?$')
+ _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/(?P<slug>[-a-z0-9]+)/?$')
@classmethod
def from_slug(cls, slug):
uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
return cls(uri)
+
def as_wlpictureuri_strict(text):
return WLPictureURI.strict(text)
Field(DCNS('description.medium'), 'medium', required=False),
Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
Field(DCNS('format'), 'mime_type', required=False),
- Field(DCNS('identifier.url'), 'url', WLPictureURI,
- strict=as_wlpictureuri_strict),
- )
+ Field(DCNS('identifier.url'), 'url', WLPictureURI, strict=as_wlpictureuri_strict),
+ )
class ImageStore(object):
- EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
- 'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
- 'aiff', 'wbmp', 'xbm']
+ EXT = [
+ 'gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
+ 'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
+ 'aiff', 'wbmp', 'xbm']
MIME = ['image/gif', 'image/jpeg', 'image/png',
'application/x-shockwave-flash', 'image/psd', 'image/bmp',
'image/tiff', 'image/tiff', 'application/octet-stream',
def __init__(self, dir_):
self.dir = dir_
- return super(ImageStore, self).__init__()
+ super(ImageStore, self).__init__()
def path(self, slug, mime_type):
"""
else:
self.picture_info = None
- @classmethod
- def from_string(cls, xml, *args, **kwargs):
- return cls.from_file(StringIO(xml), *args, **kwargs)
-
@classmethod
def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
# first, prepare for parsing
if isinstance(xmlfile, basestring):
- file = open(xmlfile, 'rb')
+ xmlfile = open(xmlfile, 'rb')
try:
- data = file.read()
+ data = xmlfile.read()
finally:
- file.close()
+ xmlfile.close()
else:
data = xmlfile.read()
image_store = ImageStore(path.dirname(xmlfile.name))
try:
- parser = etree.XMLParser(remove_blank_text=False)
+ parser = etree.XMLParser()
tree = etree.parse(StringIO(data.encode('utf-8')), parser)
return cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
Iterates the parts of this picture and returns them and their metadata
"""
for part in self.edoc.iter("div"):
- pd = {}
- pd['type'] = part.get('type')
+ pd = {'themes': [], 'object': None, 'type': part.get('type')}
if pd['type'] == 'area':
pd['coords'] = ((int(part.get('x1')), int(part.get('y1'))),
(int(part.get('x2')), int(part.get('y2'))))
- pd['themes'] = []
- pd['object'] = None
parent = part
while True:
parent = parent.getparent()
subgen = EduModule(self.options)
definiens_s = subgen.generate(definiens)
else:
- print "!! Missing definiendum in source: '%s'" % element.text
+ print ("!! Missing definiendum in source: '%s'" % element.text).encode('utf-8')
return u"<dt id='%s'>" % self.naglowek_to_anchor(element), u"</dt>" + definiens_s
from lxml import etree
-from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
-from librarian.dcparser import Person
+from xmlutils import Xmill, ifoption, tag_open_close
from librarian import DCNS, get_resource, IOFile
from librarian import functions
from pdf import PDFFormat, substitute_hyphens, fix_hanging
prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
postfix = u'</TeXML>'
if isinstance(value, list):
- import pdb; pdb.set_trace()
+ import pdb
+ pdb.set_trace()
if isinstance(value, tuple):
return prefix + value[0], value[1] + postfix
else:
return values
def handle_rdf__RDF(self, _):
- "skip metadata in generation"
+ """skip metadata in generation"""
return
@escape(True)
def get_rightsinfo(self, element):
rights_lic = self.get_dc(element, 'rights.license', True)
- return u'<cmd name="rightsinfostr">' + \
- (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
- u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
- u'</cmd>'
+ return u'<cmd name="rightsinfostr">' + (rights_lic and u'<opt>%s</opt>' % rights_lic or '') + \
+ u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) + \
+ u'</cmd>'
@escape(True)
def get_authors(self, element, which=None):
def handle_utwor(self, element):
lines = [
u'''
- <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
- <TeXML escape="0">
- \\documentclass[%s]{wl}
- \\usepackage{style}''' % self.options['customization_str'],
- self.options['has_cover'] and '\usepackage{makecover}',
- (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
- (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
- (self.options['morefloats'] == 'none' and
- u'''\\IfFileExists{morefloats.sty}{
- \\usepackage{morefloats}
- }{}'''),
- u'''\\def\\authors{%s}''' % self.get_authors(element),
- u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
- u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
- u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
-
- u'''\\author{\\authors}''',
- u'''\\title{%s}''' % self.get_title(element),
- u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
- u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
- u'</TeXML>']
+ <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
+ <TeXML escape="0">
+ \\documentclass[%s]{wl}
+ \\usepackage{style}''' % self.options['customization_str'],
+ self.options['has_cover'] and '\usepackage{makecover}',
+ (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
+ (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
+ (self.options['morefloats'] == 'none' and
+ u'''\\IfFileExists{morefloats.sty}{
+ \\usepackage{morefloats}
+ }{}'''),
+ u'''\\def\\authors{%s}''' % self.get_authors(element),
+ u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
+ u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
+ u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
+
+ u'''\\author{\\authors}''',
+ u'''\\title{%s}''' % self.get_title(element),
+ u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
+ u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
+ u'</TeXML>'
+ ]
return u"".join(filter(None, lines)), u'</TeXML>'
-
@escape(1)
def handle_powiesc(self, element):
return u"""
return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
handle_akap = \
- handle_akap = \
- handle_akap_cd = \
- handle_akap_cd = \
- handle_akap_dialog = \
- handle_akap_dialog = \
- handle_autor_utworu = \
- handle_dedykacja = \
- handle_didaskalia = \
- handle_didask_tekst = \
- handle_dlugi_cytat = \
- handle_dzielo_nadrzedne = \
- handle_lista_osoba = \
- handle_mat = \
- handle_miejsce_czas = \
- handle_motto = \
- handle_motto_podpis = \
- handle_naglowek_akt = \
- handle_naglowek_czesc = \
- handle_naglowek_listy = \
- handle_naglowek_osoba = \
- handle_naglowek_scena = \
- handle_nazwa_utworu = \
- handle_nota = \
- handle_osoba = \
- handle_pa = \
- handle_pe = \
- handle_podtytul = \
- handle_poezja_cyt = \
- handle_pr = \
- handle_pt = \
- handle_sekcja_asterysk = \
- handle_sekcja_swiatlo = \
- handle_separator_linia = \
- handle_slowo_obce = \
- handle_srodtytul = \
- handle_tytul_dziela = \
- handle_wyroznienie = \
- handle_dywiz = \
- handle_texcommand
+ handle_akap_cd = \
+ handle_akap_dialog = \
+ handle_autor_utworu = \
+ handle_dedykacja = \
+ handle_didaskalia = \
+ handle_didask_tekst = \
+ handle_dlugi_cytat = \
+ handle_dzielo_nadrzedne = \
+ handle_lista_osoba = \
+ handle_mat = \
+ handle_miejsce_czas = \
+ handle_motto = \
+ handle_motto_podpis = \
+ handle_naglowek_akt = \
+ handle_naglowek_czesc = \
+ handle_naglowek_listy = \
+ handle_naglowek_osoba = \
+ handle_naglowek_scena = \
+ handle_nazwa_utworu = \
+ handle_nota = \
+ handle_osoba = \
+ handle_pa = \
+ handle_pe = \
+ handle_podtytul = \
+ handle_poezja_cyt = \
+ handle_pr = \
+ handle_pt = \
+ handle_sekcja_asterysk = \
+ handle_sekcja_swiatlo = \
+ handle_separator_linia = \
+ handle_slowo_obce = \
+ handle_srodtytul = \
+ handle_tytul_dziela = \
+ handle_wyroznienie = \
+ handle_dywiz = \
+ handle_texcommand
def handle_naglowek_rozdzial(self, element):
if not self.options['teacher']:
def handle_uwaga(self, _e):
return None
+
def handle_extra(self, _e):
return None
opis = ''
n = element.xpath('wskazowki')
- if n: wskazowki = submill.generate(n[0])
-
- else: wskazowki = ''
+ if n:
+ wskazowki = submill.generate(n[0])
+ else:
+ wskazowki = ''
n = element.xpath('pomoce')
- if n: pomoce = submill.generate(n[0])
- else: pomoce = ''
+ if n:
+ pomoce = submill.generate(n[0])
+ else:
+ pomoce = ''
forma = ''.join(element.xpath('forma/text()'))
def handle_forma(self, *_):
return
- def handle_lista(self, element, attrs={}):
+ def handle_lista(self, element, attrs=None):
ltype = element.attrib.get('typ', 'punkt')
if not element.findall("punkt"):
if ltype == 'czytelnia':
# print '** missing src on <slowniczek>, setting default'
surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
- self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
+ self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
- listcmd = {'num': 'enumerate',
- 'punkt': 'itemize',
- 'alfa': 'itemize',
- 'slowniczek': 'itemize',
- 'czytelnia': 'itemize'}[ltype]
+ listcmd = {
+ 'num': 'enumerate',
+ 'punkt': 'itemize',
+ 'alfa': 'itemize',
+ 'slowniczek': 'itemize',
+ 'czytelnia': 'itemize'
+ }[ltype]
return u'<env name="%s">' % listcmd, u'</env>'
typ = element.attrib['typ']
self.exercise_counter += 1
- if not typ in exercise_handlers:
+ if typ not in exercise_handlers:
return '(no handler)'
self.options = {'exercise_counter': self.exercise_counter}
handler = exercise_handlers[typ](self.options, self.state)
max_col = len(ks)
self.options = {'columnts': max_col}
# styling:
- # has_frames = int(element.attrib.get("ramki", "0"))
- # if has_frames: frames_c = "framed"
- # else: frames_c = ""
- # return u"""<table class="%s">""" % frames_c, u"</table>"
+ # has_frames = int(element.attrib.get("ramki", "0"))
+ # if has_frames: frames_c = "framed"
+ # else: frames_c = ""
+ # return u"""<table class="%s">""" % frames_c, u"</table>"
return u'''
<cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
- ''' % ('l' * max_col), \
- u'''<cmd name="end"><parm>tabular</parm></cmd>'''
+ ''' % ('l' * max_col), u'''<cmd name="end"><parm>tabular</parm></cmd>'''
@escape(1)
def handle_wiersz(self, element):
print '!! unknown <video> url scheme:', url
return
name = m.group(1)
- thumb = IOFile.from_string(urlopen
- ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
+ thumb = IOFile.from_string(urlopen("http://img.youtube.com/vi/%s/0.jpg" % name).read())
img_path = "video/%s.jpg" % name.replace("_", "")
self.options['format'].attachments[img_path] = thumb
canon_url = "https://www.youtube.com/watch?v=%s" % name
def __init__(self, *args, **kw):
self.question_counter = 0
super(Exercise, self).__init__(*args, **kw)
+ self.piece_counter = None
handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
# Add a single <pytanie> tag if it's not there
if not element.xpath(".//pytanie"):
qpre, qpost = self.handle_pytanie(element)
- pre = pre + qpre
+ pre += qpre
post = qpost + post
return pre, post
return self.solution_header() + etree.tostring(par)
-
class Wybor(Exercise):
def handle_cwiczenie(self, element):
pre, post = super(Wybor, self).handle_cwiczenie(element)
break
choices = p.xpath(".//*[@nazwa]")
uniq = set()
- for n in choices: uniq.add(n.attrib.get('nazwa', ''))
+ for n in choices:
+ uniq.add(n.attrib.get('nazwa', ''))
if len(choices) != len(uniq):
is_single_choice = False
break
return pre, post
-
def fix_lists(tree):
lists = tree.xpath(".//lista")
for l in lists:
if l.text:
p = l.getprevious()
if p is not None:
- if p.tail is None: p.tail = ''
+ if p.tail is None:
+ p.tail = ''
p.tail += l.text
else:
p = l.getparent()
- if p.text is None: p.text = ''
+ if p.text is None:
+ p.text = ''
p.text += l.text
l.text = ''
return tree
def get_image(self, name):
return self.wldoc.source.attachments[name]
-
font=self.author_font,
line_height=self.author_lineskip,
color=self.author_color,
- shadow_color=self.author_shadow,
- )
+ shadow_color=self.author_shadow)
box.skip(10)
- box.draw.line((75, box.height, 275, box.height),
- fill=self.author_color, width=2)
+ box.draw.line((75, box.height, 275, box.height), fill=self.author_color, width=2)
box.skip(15)
box.text(self.pretty_title(),
line_height=self.title_lineskip,
font=self.title_font,
color=epoch_color,
- shadow_color=self.title_shadow,
- )
+ shadow_color=self.title_shadow)
box_img = box.image()
if self.kind == 'Liryka':
# center
box_top = (self.height - box_img.size[1]) / 2
- box_left = self.bar_width + (self.width - self.bar_width -
- box_img.size[0]) / 2
- draw.rectangle((box_left, box_top,
+ box_left = self.bar_width + (self.width - self.bar_width - box_img.size[0]) / 2
+ draw.rectangle((
+ box_left, box_top,
box_left + box_img.size[0], box_top + box_img.size[1]),
fill='#fff')
img.paste(box_img, (box_left, box_top), box_img)
+++ /dev/null
-import shutil
-from librarian import get_resource
-from librarian.pdf import PDFFormat
-from librarian.styles.wolnelektury.cover import WLCover
-
-class WLPDFFormat(PDFFormat):
- cover_class = WLCover
- style = get_resource('res/styles/wolnelektury/pdf/wolnelektury.sty')
-
- def get_tex_dir(self):
- temp = super(WLPDFFormat, self).get_tex_dir()
- shutil.copy(get_resource('res/wl-logo.png'), temp)
- return temp
%(description)s%(contributors)s
"""
+
def transform(wldoc, flags=None, **options):
"""
Transforms input_file in XML to output_file in TXT.
parsed_dc = document.book_info
description = parsed_dc.description
url = document.book_info.url
-
+
license_description = parsed_dc.license_description
license = parsed_dc.license
if license:
- license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (license_description, license)
+ license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % \
+ (license_description, license)
else:
- license_description = u"Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)"
-
+ license_description = (
+ u"Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, "
+ u"co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. "
+ u"Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), "
+ u"które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji "
+ u"Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL "
+ u"(http://creativecommons.org/licenses/by-sa/3.0/)")
+
source = parsed_dc.source_name
if source:
source = "\n\nTekst opracowany na podstawie: " + source
else:
source = ''
-
- contributors = ', '.join(person.readable() for person in
+
+ contributors = ', '.join(person.readable() for person in
sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p)))
if contributors:
contributors = "\n\nOpracowanie redakcyjne i przypisy: %s" % contributors
}).encode('utf-8'))
else:
return IOFile.from_string(unicode(result).encode('utf-8'))
-
output = flt(output)
return output
-
def generate(self, document):
"""Generate text from node using handlers defined in class."""
output = self._handle_element(document)
"""
self._options.append(opts)
-
def _handle_for_element(self, element):
ns = None
tagname = None
-# from nose.tools import set_trace
+ # from nose.tools import set_trace
if element.tag[0] == '{':
for nshort, nhref in element.nsmap.items():
try:
if element.tag.index('{%s}' % nhref) == 0:
ns = nshort
- tagname = element.tag[len('{%s}' % nhref):]
+ tagname = element.tag[len('{%s}' % nhref):]
break
except ValueError:
pass
while True:
sibling = element.getnext()
- if sibling is not None: return sibling # found a new branch to dig into
+ if sibling is not None:
+ return sibling # found a new branch to dig into
element = element.getparent()
- if element is None: return None # end of tree
+ if element is None:
+ return None # end of tree
def _handle_element(self, element):
- if isinstance(element, etree._Comment): return None
-
+ if isinstance(element, etree._Comment):
+ return None
+
handler = self._handle_for_element(element)
- if self.state.get('mute') and not getattr(handler, 'unmuter', False): return None
+ if self.state.get('mute') and not getattr(handler, 'unmuter', False):
+ return None
# How many scopes
+ options_scopes = len(self._options)
try:
- options_scopes = len(self._options)
-
if handler is None:
pre = [self.filter_text(element.text)]
post = [self.filter_text(element.tail)]
finally:
# clean up option scopes if necessary
self._options = self._options[0:options_scopes]
-
+
return out
def tag_open_close(name_, classes_=None, **attrs):
u"""Creates tag beginning and end.
-
+
>>> tag_open_close("a", "klass", x=u"ą<")
(u'<a x="\\u0105<" class="klass">', u'</a>')
"""
if classes_:
- if isinstance(classes_, (tuple, list)): classes_ = ' '.join(classes_)
+ if isinstance(classes_, (tuple, list)):
+ classes_ = ' '.join(classes_)
attrs['class'] = classes_
e = etree.Element(name_)
pre, post = etree.tostring(e, encoding=unicode).split(u"> <")
return pre + u">", u"<" + post
+
def tag(name_, classes_=None, **attrs):
"""Returns a handler which wraps node contents in tag `name', with class attribute
set to `classes' and other attributes according to keyword paramters
set to `classes' and other attributes according to keyword paramters
"""
if classes:
- if isinstance(classes, (tuple,list)): classes = ' '.join(classes)
+ if isinstance(classes, (tuple, list)):
+ classes = ' '.join(classes)
attrs['class'] = classes
- a = ''.join([' %s="%s"' % (k,v) for (k,v) in attrs.items()])
+ a = ''.join([' %s="%s"' % (k, v) for (k, v) in attrs.items()])
+
def _decor(f):
def _wrap(self, element):
r = f(self, element)
- if r is None: return
+ if r is None:
+ return
prepend = "<%s%s>" % (name, a)
append = "</%s>" % name
return _handler
return _decor
+
def flatten(l, ltypes=(list, tuple)):
"""flatten function from BasicPropery/BasicTypes package
"""
import os.path
from distutils.core import setup
+
def whole_tree(prefix, path):
files = []
- for f in (f for f in os.listdir(os.path.join(prefix, path)) if not f[0]=='.'):
+ for f in (f for f in os.listdir(os.path.join(prefix, path)) if not f[0] == '.'):
new_path = os.path.join(path, f)
if os.path.isdir(os.path.join(prefix, new_path)):
files.extend(whole_tree(prefix, new_path))
'librarian.styles.wolnelektury',
'librarian.styles.wolnelektury.partners',
],
- package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
- whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer') +
- whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res')},
+ package_data={
+ 'librarian': (
+ ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
+ whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer') +
+ whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res'))
+ },
include_package_data=True,
install_requires=['lxml>=2.2'],
scripts=['scripts/book2html',
def test_serialize():
for fixture in get_all_fixtures('dcparser', '*.xml'):
yield check_serialize, fixture
-
for par in tree.findall("//p"):
if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'):
editors_attribution = True
- assert_equal(par.text.rstrip(),
+ assert_equal(
+ par.text.rstrip(),
u'Opracowanie redakcyjne i przypisy: '
u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
assert_true(editors_attribution)
parse_dublincore=False,
).as_html()
+
def test_empty():
assert not WLDocument.from_string(
'<utwor />',
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
import os
-from StringIO import StringIO
from tempfile import NamedTemporaryFile
from nose.tools import *
from librarian import IOFile
+
def test_iofile_from_string_reusable():
some_file = IOFile.from_string("test")
some_file.get_file().read()
assert_equal(some_file.get_file().read(), "test")
+
def test_iofile_from_filename_reusable():
temp = NamedTemporaryFile(delete=False)
try:
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-import re
from tempfile import NamedTemporaryFile
from nose.tools import *
from librarian import DirDocProvider
print tex
# Check contributor list.
- editors = re.search(ur'\\def\\editors\{'
- ur'Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
- assert_equal(editors.group(1),
- u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
+ editors = re.search(
+ ur'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
+ assert_equal(editors.group(1), u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from librarian import picture, dcparser
-from lxml import etree
from nose.tools import *
-from os.path import splitext
from tests.utils import get_all_fixtures, get_fixture
-import codecs
from os import path
+
def test_wlpictureuri():
uri = picture.WLPictureURI('http://wolnelektury.pl/katalog/obraz/angelus-novus')
+
def check_load(xml_file):
pi = dcparser.parse(xml_file, picture.PictureInfo)
assert pi is not None
assert isinstance(pi, picture.PictureInfo)
-
+
def test_load():
for fixture in get_all_fixtures('picture', '*.xml'):
assert wlp.slug == 'angelus-novus'
assert path.exists(wlp.image_path)
-
+
f = wlp.image_file('r')
f.close()
+
def test_picture_parts():
wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml')))
parts = list(wlp.partiter())
if p['object']:
names.add(p['object'])
- assert motifs == set([u'anioł historii', u'spojrzenie']), "missing motifs, got: %s" % motifs
- assert names == set([u'obraz cały', u'skrzydło']), 'missing objects, got: %s' % names
-
-
+ assert motifs == {u'anioł historii', u'spojrzenie'}, "missing motifs, got: %s" % motifs
+ assert names == {u'obraz cały', u'skrzydło'}, 'missing objects, got: %s' % names
-
+# -*- coding: utf-8 -*-
from librarian import xmlutils
from lxml import etree
from librarian.pyhtml import EduModule
from nose.tools import *
from tests.utils import get_fixture
+
def test_traversal():
xml = etree.fromstring("<a><b>BBBB</b><c>CCCC</c></a>")
hg = xmlutils.Xmill()
assert_equals(hg.next(xml[1]), None)
-
class Foo(xmlutils.Xmill):
def __init__(self):
super(Foo, self).__init__()
def handle_song(self, ele):
if ele.getnext() is not None:
- return "\n","--------------------\n"
-
+ return "\n", "--------------------\n"
def test_xml_generation():
xml = u"""<root>
-<songs>
-<song>
-<title>Oursoul</title>
-<artist>Hindi Zahra</artist>
-</song>
-<song>
-<title>Visitor</title>
-<artist>Portico Quartet</artist>
-</song>
-</songs>
-</root>
-"""
+ <songs>
+ <song>
+ <title>Oursoul</title>
+ <artist>Hindi Zahra</artist>
+ </song>
+ <song>
+ <title>Visitor</title>
+ <artist>Portico Quartet</artist>
+ </song>
+ </songs>
+ </root>
+ """
txt = Foo().generate(etree.fromstring(xml))
print txt
from __future__ import with_statement
from os.path import realpath, join, dirname
import glob
-import os
+
def get_fixture_dir(dir_name):
"""Returns path to fixtures directory dir_name."""