pass
+# was deleted, but still used???
+class NoDublinCore(ValidationError):
+ pass
+
+
class BuildError(Exception):
pass
Subclass it for any format you want to convert to.
"""
- format_cls = None # A formats.Format subclass
- document_options = [] # List of Option objects for document options.
- format_options = [] # List of Option objects for format customization.
- build_options = [] # List of Option objects for build options.
+ format_cls = None # A formats.Format subclass
+ document_options = [] # List of Option objects for document options.
+ format_options = [] # List of Option objects for format customization.
+ build_options = [] # List of Option objects for build options.
@classmethod
def run(cls):
parser = optparse.OptionParser(usage=usage)
- parser.add_option('-v', '--verbose',
- action='store_true', dest='verbose', default=False,
- help='print status messages to stdout')
- parser.add_option('-o', '--output-file',
- dest='output_file', metavar='FILE',
- help='specifies the output file')
+ parser.add_option(
+ '-v', '--verbose',
+ action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+ parser.add_option(
+ '-o', '--output-file',
+ dest='output_file', metavar='FILE',
+ help='specifies the output file')
for option in cls.document_options + cls.format_options + cls.build_options:
option.add(parser)
if len(input_filenames) < 1:
parser.print_help()
- return(1)
+ return 1
# Prepare additional args for document.
document_args = {}
if options.verbose:
print main_input
- # Do the transformation.
- doc = Document.from_file(main_input, **document_args)
- format_ = cls.format_cls(doc, **format_args)
+ # Do the transformation.
+ doc = Document.from_file(main_input, **document_args)
+ format_ = cls.format_cls(doc, **format_args)
- # Where to write output?
- if not options.output_file:
- output_file = os.path.splitext(main_input)[0] + '.' + format_.format_ext
- else:
- output_file = None
+ # Where to write output?
+ if not options.output_file:
+ output_file = os.path.splitext(main_input)[0] + '.' + format_.format_ext
+ else:
+ output_file = None
- output = format_.build(**build_args)
- output.save_as(output_file)
+ output = format_.build(**build_args)
+ output.save_as(output_file)
except ParseError, e:
print '%(file)s:%(name)s:%(message)s' % {
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from xml.parsers.expat import ExpatError
-from datetime import date
-import time
-
-from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
- WLURI)
-
-import lxml.etree as etree # ElementTree API using libxml2
-from lxml.etree import XMLSyntaxError
-
-
-# ==============
-# = Converters =
-# ==============
-class Person(object):
- """Single person with last name and a list of first names."""
- def __init__(self, last_name, *first_names):
- self.last_name = last_name
- self.first_names = first_names
-
- @classmethod
- def from_text(cls, text):
- parts = [ token.strip() for token in text.split(',') ]
- if len(parts) == 1:
- surname = parts[0]
- names = []
- elif len(parts) != 2:
- raise ValueError("Invalid person name. There should be at most one comma: \"%s\"." % text)
- else:
- surname = parts[0]
- if len(parts[1]) == 0:
- # there is no non-whitespace data after the comma
- raise ValueError("Found a comma, but no names given: \"%s\" -> %r." % (text, parts))
- names = [ name for name in parts[1].split() if len(name) ] # all non-whitespace tokens
- return cls(surname, *names)
-
- def readable(self):
- return u" ".join(self.first_names + (self.last_name,))
-
- def __eq__(self, right):
- return self.last_name == right.last_name and self.first_names == right.first_names
-
- def __cmp__(self, other):
- return cmp((self.last_name, self.first_names), (other.last_name, other.first_names))
-
- def __hash__(self):
- return hash((self.last_name, self.first_names))
-
- def __unicode__(self):
- if len(self.first_names) > 0:
- return '%s, %s' % (self.last_name, ' '.join(self.first_names))
- else:
- return self.last_name
-
- def __repr__(self):
- return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names)
-
-def as_date(text):
- try:
- try:
- t = time.strptime(text, '%Y-%m-%d')
- except ValueError:
- t = time.strptime(text, '%Y')
- return date(t[0], t[1], t[2])
- except ValueError, e:
- raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
-
-def as_person(text):
- return Person.from_text(text)
-
-def as_unicode(text):
- if isinstance(text, unicode):
- return text
- else:
- return text.decode('utf-8')
-
-def as_wluri_strict(text):
- return WLURI.strict(text)
-
-class Field(object):
- def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs):
- self.uri = uri
- self.name = attr_name
- self.validator = validator
- self.strict = strict
- self.multiple = multiple
- self.salias = salias
-
- self.required = kwargs.get('required', True) and not kwargs.has_key('default')
- self.default = kwargs.get('default', [] if multiple else [None])
-
- def validate_value(self, val, strict=False):
- if strict and self.strict is not None:
- validator = self.strict
- else:
- validator = self.validator
- try:
- if self.multiple:
- if validator is None:
- return val
- return [ validator(v) if v is not None else v for v in val ]
- elif len(val) > 1:
- raise ValidationError("Multiple values not allowed for field '%s'" % self.uri)
- elif len(val) == 0:
- raise ValidationError("Field %s has no value to assign. Check your defaults." % self.uri)
- else:
- if validator is None or val[0] is None:
- return val[0]
- return validator(val[0])
- except ValueError, e:
- raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
-
- def validate(self, fdict, fallbacks=None, strict=False):
- if fallbacks is None:
- fallbacks = {}
- if not fdict.has_key(self.uri):
- if not self.required:
- # Accept single value for single fields and saliases.
- if self.name in fallbacks:
- if self.multiple:
- f = fallbacks[self.name]
- else:
- f = [fallbacks[self.name]]
- elif self.salias and self.salias in fallbacks:
- f = [fallbacks[self.salias]]
- else:
- f = self.default
- else:
- raise ValidationError("Required field %s not found" % self.uri)
- else:
- f = fdict[self.uri]
-
- return self.validate_value(f, strict=strict)
-
- def __eq__(self, other):
- if isinstance(other, Field) and other.name == self.name:
- return True
- return False
-
-
-class DCInfo(type):
- def __new__(meta, classname, bases, class_dict):
- fields = list(class_dict['FIELDS'])
-
- for base in bases[::-1]:
- if hasattr(base, 'FIELDS'):
- for field in base.FIELDS[::-1]:
- try:
- fields.index(field)
- except ValueError:
- fields.insert(0, field)
-
- class_dict['FIELDS'] = tuple(fields)
- return super(DCInfo, meta).__new__(meta, classname, bases, class_dict)
-
-
-class WorkInfo(object):
- __metaclass__ = DCInfo
-
- FIELDS = (
- Field( DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
- Field( DCNS('title'), 'title'),
- Field( DCNS('type'), 'type', required=False, multiple=True),
-
- Field( DCNS('contributor.editor'), 'editors', \
- as_person, salias='editor', multiple=True, default=[]),
- Field( DCNS('contributor.technical_editor'), 'technical_editors',
- as_person, salias='technical_editor', multiple=True, default=[]),
-
- Field( DCNS('date'), 'created_at', as_date),
- Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
- Field( DCNS('publisher'), 'publisher'),
-
- Field( DCNS('language'), 'language'),
- Field( DCNS('description'), 'description', required=False),
-
- Field( DCNS('source'), 'source_name', required=False),
- Field( DCNS('source.URL'), 'source_url', required=False),
- Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
- Field( DCNS('rights.license'), 'license', required=False),
- Field( DCNS('rights'), 'license_description'),
- )
-
- @classmethod
- def from_string(cls, xml, *args, **kwargs):
- from StringIO import StringIO
- return cls.from_file(StringIO(xml), *args, **kwargs)
-
- @classmethod
- def from_file(cls, xmlfile, *args, **kwargs):
- desc_tag = None
- try:
- iter = etree.iterparse(xmlfile, ['start', 'end'])
- for (event, element) in iter:
- if element.tag == RDFNS('RDF') and event == 'start':
- desc_tag = element
- break
-
- if desc_tag is None:
- raise NoDublinCore("DublinCore section not found. \
- Check if there are rdf:RDF and rdf:Description tags.")
-
- # continue 'till the end of RDF section
- for (event, element) in iter:
- if element.tag == RDFNS('RDF') and event == 'end':
- break
-
- # if there is no end, Expat should yell at us with an ExpatError
-
- # extract data from the element and make the info
- return cls.from_element(desc_tag, *args, **kwargs)
- except XMLSyntaxError, e:
- raise ParseError(e)
- except ExpatError, e:
- raise ParseError(e)
-
- @classmethod
- def from_element(cls, rdf_tag, *args, **kwargs):
- # the tree is already parsed, so we don't need to worry about Expat errors
- field_dict = {}
- desc = rdf_tag.find(".//" + RDFNS('Description'))
-
- if desc is None:
- raise NoDublinCore("No DublinCore section found.")
-
- for e in desc.getchildren():
- fv = field_dict.get(e.tag, [])
- fv.append(e.text)
- field_dict[e.tag] = fv
-
- return cls(desc.attrib, field_dict, *args, **kwargs)
-
- def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False):
- """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description.
- dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the
- given field. """
-
- self.about = rdf_attrs.get(RDFNS('about'))
- self.fmap = {}
-
- for field in self.FIELDS:
- value = field.validate(dc_fields, fallbacks=fallbacks,
- strict=strict)
- setattr(self, 'prop_' + field.name, value)
- self.fmap[field.name] = field
- if field.salias: self.fmap[field.salias] = field
-
- def __getattribute__(self, name):
- try:
- field = object.__getattribute__(self, 'fmap')[name]
- value = object.__getattribute__(self, 'prop_'+field.name)
- if field.name == name:
- return value
- else: # singular alias
- if not field.multiple:
- raise "OUCH!! for field %s" % name
-
- return value[0] if value else None
- except (KeyError, AttributeError):
- return object.__getattribute__(self, name)
-
- def __setattr__(self, name, newvalue):
- try:
- field = object.__getattribute__(self, 'fmap')[name]
- if field.name == name:
- object.__setattr__(self, 'prop_'+field.name, newvalue)
- else: # singular alias
- if not field.multiple:
- raise "OUCH! while setting field %s" % name
-
- object.__setattr__(self, 'prop_'+field.name, [newvalue])
- except (KeyError, AttributeError):
- return object.__setattr__(self, name, newvalue)
-
- def update(self, field_dict):
- """Update using field_dict. Verify correctness, but don't check if all
- required fields are present."""
- for field in self.FIELDS:
- if field_dict.has_key(field.name):
- setattr(self, field.name, field_dict[field.name])
-
- def to_etree(self, parent = None):
- """XML representation of this object."""
- #etree._namespace_map[str(self.RDF)] = 'rdf'
- #etree._namespace_map[str(self.DC)] = 'dc'
-
- if parent is None:
- root = etree.Element(RDFNS('RDF'))
- else:
- root = parent.makeelement(RDFNS('RDF'))
-
- description = etree.SubElement(root, RDFNS('Description'))
-
- if self.about:
- description.set(RDFNS('about'), self.about)
-
- for field in self.FIELDS:
- v = getattr(self, field.name, None)
- if v is not None:
- if field.multiple:
- if len(v) == 0: continue
- for x in v:
- e = etree.Element(field.uri)
- if x is not None:
- e.text = unicode(x)
- description.append(e)
- else:
- e = etree.Element(field.uri)
- e.text = unicode(v)
- description.append(e)
-
- return root
-
- def serialize(self):
- rdf = {}
- rdf['about'] = { 'uri': RDFNS('about'), 'value': self.about }
-
- dc = {}
- for field in self.FIELDS:
- v = getattr(self, field.name, None)
- if v is not None:
- if field.multiple:
- if len(v) == 0: continue
- v = [ unicode(x) for x in v if x is not None ]
- else:
- v = unicode(v)
-
- dc[field.name] = {'uri': field.uri, 'value': v}
- rdf['fields'] = dc
- return rdf
-
- def to_dict(self):
- result = {'about': self.about}
- for field in self.FIELDS:
- v = getattr(self, field.name, None)
-
- if v is not None:
- if field.multiple:
- if len(v) == 0: continue
- v = [ unicode(x) for x in v if x is not None ]
- else:
- v = unicode(v)
- result[field.name] = v
-
- if field.salias:
- v = getattr(self, field.salias)
- if v is not None: result[field.salias] = unicode(v)
-
- return result
-
-
-class BookInfo(WorkInfo):
- FIELDS = (
- Field( DCNS('audience'), 'audiences', salias='audience', multiple=True,
- required=False),
-
- Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True,
- required=False),
- Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True,
- required=False),
- Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
- required=False),
-
- Field( DCNS('contributor.translator'), 'translators', \
- as_person, salias='translator', multiple=True, default=[]),
- Field( DCNS('relation.hasPart'), 'parts',
- WLURI, strict=as_wluri_strict, multiple=True, required=False),
- Field( DCNS('relation.isVariantOf'), 'variant_of',
- WLURI, strict=as_wluri_strict, required=False),
-
- Field( DCNS('relation.coverImage.url'), 'cover_url', required=False),
- Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
- Field( DCNS('relation.coverImage.source'), 'cover_source', required=False),
- )
-
-
-def parse(file_name, cls=BookInfo):
- return cls.from_file(file_name)
raise ValueError("Invalid root element. Found '%s', should be '%s'" % (
root_elem.tag, SSTNS('section')))
else:
- raise ValueError("Invalid class of root element. "
- "Use librarian.parser.SSTParser.")
- #print etree.tostring(self.edoc.getroot())
+ raise ValueError("Invalid class of root element. Use librarian.parser.SSTParser.")
+ # print etree.tostring(self.edoc.getroot())
@classmethod
def from_string(cls, xml, *args, **kwargs):
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from __future__ import with_statement
-
-import os
-import os.path
-import re
-import subprocess
-from StringIO import StringIO
-from copy import deepcopy
-from lxml import etree
-import zipfile
-from tempfile import mkdtemp, NamedTemporaryFile
-from shutil import rmtree
-
-from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile
-from librarian.cover import WLCover
-
-from librarian import functions, get_resource
-
-functions.reg_person_name()
-
-
-def inner_xml(node):
- """ returns node's text and children as a string
-
- >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
- x<b>y</b>z
- """
-
- nt = node.text if node.text is not None else ''
- return ''.join([nt] + [etree.tostring(child) for child in node])
-
-def set_inner_xml(node, text):
- """ sets node's text and children from a string
-
- >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
- >>> set_inner_xml(e, 'x<b>y</b>z')
- >>> print etree.tostring(e)
- <a>x<b>y</b>z</a>
- """
-
- p = etree.fromstring('<x>%s</x>' % text)
- node.text = p.text
- node[:] = p[:]
-
-
-def node_name(node):
- """ Find out a node's name
-
- >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
- XYZ
- """
-
- tempnode = deepcopy(node)
-
- for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
- for e in tempnode.findall('.//%s' % p):
- t = e.tail
- e.clear()
- e.tail = t
- etree.strip_tags(tempnode, '*')
- return tempnode.text
-
-
-def xslt(xml, sheet):
- if isinstance(xml, etree._Element):
- xml = etree.ElementTree(xml)
- with open(sheet) as xsltf:
- return xml.xslt(etree.parse(xsltf))
-
-
-def replace_characters(node):
- def replace_chars(text):
- if text is None:
- return None
- return text.replace(u"\ufeff", u"")\
- .replace("---", u"\u2014")\
- .replace("--", u"\u2013")\
- .replace(",,", u"\u201E")\
- .replace('"', u"\u201D")\
- .replace("'", u"\u2019")
- if node.tag in ('uwaga', 'extra'):
- t = node.tail
- node.clear()
- node.tail = t
- node.text = replace_chars(node.text)
- node.tail = replace_chars(node.tail)
- for child in node:
- replace_characters(child)
-
-
-def find_annotations(annotations, source, part_no):
- for child in source:
- if child.tag in ('pe', 'pa', 'pt', 'pr'):
- annotation = deepcopy(child)
- number = str(len(annotations)+1)
- annotation.set('number', number)
- annotation.set('part', str(part_no))
- annotation.tail = ''
- annotations.append(annotation)
- tail = child.tail
- child.clear()
- child.tail = tail
- child.text = number
- if child.tag not in ('extra', 'uwaga'):
- find_annotations(annotations, child, part_no)
-
-
-class Stanza(object):
- """
- Converts / verse endings into verse elements in a stanza.
-
- Slashes may only occur directly in the stanza. Any slashes in subelements
- will be ignored, and the subelements will be put inside verse elements.
-
- >>> s = etree.fromstring("<strofa>a/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
- >>> Stanza(s).versify()
- >>> print etree.tostring(s)
- <strofa><wers_normalny>a</wers_normalny><wers_normalny>b<x>x/
- y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
-
- """
- def __init__(self, stanza_elem):
- self.stanza = stanza_elem
- self.verses = []
- self.open_verse = None
-
- def versify(self):
- self.push_text(self.stanza.text)
- for elem in self.stanza:
- self.push_elem(elem)
- self.push_text(elem.tail)
- tail = self.stanza.tail
- self.stanza.clear()
- self.stanza.tail = tail
- self.stanza.extend(self.verses)
-
- def open_normal_verse(self):
- self.open_verse = self.stanza.makeelement("wers_normalny")
- self.verses.append(self.open_verse)
-
- def get_open_verse(self):
- if self.open_verse is None:
- self.open_normal_verse()
- return self.open_verse
-
- def push_text(self, text):
- if not text or not text.strip():
- return
- for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
- if i:
- self.open_normal_verse()
- verse = self.get_open_verse()
- if len(verse):
- verse[-1].tail = (verse[-1].tail or "") + verse_text.strip()
- else:
- verse.text = (verse.text or "") + verse_text.strip()
-
- def push_elem(self, elem):
- if elem.tag.startswith("wers"):
- verse = deepcopy(elem)
- verse.tail = None
- self.verses.append(verse)
- self.open_verse = verse
- else:
- appended = deepcopy(elem)
- appended.tail = None
- self.get_open_verse().append(appended)
-
-
-def replace_by_verse(tree):
- """ Find stanzas and create new verses in place of a '/' character """
-
- stanzas = tree.findall('.//' + WLNS('strofa'))
- for stanza in stanzas:
- Stanza(stanza).versify()
-
-
-def add_to_manifest(manifest, partno):
- """ Adds a node to the manifest section in content.opf file """
-
- partstr = 'part%d' % partno
- e = manifest.makeelement(OPFNS('item'), attrib={
- 'id': partstr,
- 'href': partstr + '.html',
- 'media-type': 'application/xhtml+xml',
- })
- manifest.append(e)
-
-
-def add_to_spine(spine, partno):
- """ Adds a node to the spine section in content.opf file """
-
- e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno});
- spine.append(e)
-
-
-class TOC(object):
- def __init__(self, name=None, part_href=None):
- self.children = []
- self.name = name
- self.part_href = part_href
- self.sub_number = None
-
- def add(self, name, part_href, level=0, is_part=True, index=None):
- assert level == 0 or index is None
- if level > 0 and self.children:
- return self.children[-1].add(name, part_href, level-1, is_part)
- else:
- t = TOC(name)
- t.part_href = part_href
- if index is not None:
- self.children.insert(index, t)
- else:
- self.children.append(t)
- if not is_part:
- t.sub_number = len(self.children) + 1
- return t.sub_number
-
- def append(self, toc):
- self.children.append(toc)
-
- def extend(self, toc):
- self.children.extend(toc.children)
-
- def depth(self):
- if self.children:
- return max((c.depth() for c in self.children)) + 1
- else:
- return 0
-
- def href(self):
- src = self.part_href
- if self.sub_number is not None:
- src += '#sub%d' % self.sub_number
- return src
-
- def write_to_xml(self, nav_map, counter=1):
- for child in self.children:
- nav_point = nav_map.makeelement(NCXNS('navPoint'))
- nav_point.set('id', 'NavPoint-%d' % counter)
- nav_point.set('playOrder', str(counter))
-
- nav_label = nav_map.makeelement(NCXNS('navLabel'))
- text = nav_map.makeelement(NCXNS('text'))
- text.text = child.name
- nav_label.append(text)
- nav_point.append(nav_label)
-
- content = nav_map.makeelement(NCXNS('content'))
- content.set('src', child.href())
- nav_point.append(content)
- nav_map.append(nav_point)
- counter = child.write_to_xml(nav_point, counter + 1)
- return counter
-
- def html_part(self, depth=0):
- texts = []
- for child in self.children:
- texts.append(
- "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
- (depth, child.href(), child.name))
- texts.append(child.html_part(depth+1))
- return "\n".join(texts)
-
- def html(self):
- with open(get_resource('epub/toc.html')) as f:
- t = unicode(f.read(), 'utf-8')
- return t % self.html_part()
-
-
-def used_chars(element):
- """ Lists characters used in an ETree Element """
- chars = set((element.text or '') + (element.tail or ''))
- for child in element:
- chars = chars.union(used_chars(child))
- return chars
-
-
-def chop(main_text):
- """ divide main content of the XML file into chunks """
-
- # prepare a container for each chunk
- part_xml = etree.Element('utwor')
- etree.SubElement(part_xml, 'master')
- main_xml_part = part_xml[0] # master
-
- last_node_part = False
- for one_part in main_text:
- name = one_part.tag
- if name == 'naglowek_czesc':
- yield part_xml
- last_node_part = True
- main_xml_part[:] = [deepcopy(one_part)]
- elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
- yield part_xml
- main_xml_part[:] = [deepcopy(one_part)]
- else:
- main_xml_part.append(deepcopy(one_part))
- last_node_part = False
- yield part_xml
-
-
-def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
- """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
-
- toc = TOC()
- for element in chunk_xml[0]:
- if element.tag in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"):
- toc.add(node_name(element), "part%d.html" % chunk_no)
- elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
- subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
- element.set('sub', str(subnumber))
- if empty:
- if not _empty_html_static:
- _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
- chars = set()
- output_html = _empty_html_static[0]
- else:
- find_annotations(annotations, chunk_xml, chunk_no)
- replace_by_verse(chunk_xml)
- html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
- chars = used_chars(html_tree.getroot())
- output_html = etree.tostring(html_tree, method="html", pretty_print=True)
- return output_html, toc, chars
-
-
-def transform(wldoc, verbose=False,
- style=None, html_toc=False,
- sample=None, cover=None, flags=None):
- """ produces a EPUB file
-
- sample=n: generate sample e-book (with at least n paragraphs)
- cover: a cover.Cover factory or True for default
- flags: less-advertising, without-fonts, working-copy
- """
-
- def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
- """ processes one input file and proceeds to its children """
-
- replace_characters(wldoc.edoc.getroot())
-
- # every input file will have a TOC entry,
- # pointing to starting chunk
- toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
- chars = set()
- if first:
- # write book title page
- html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
- chars = used_chars(html_tree.getroot())
- zip.writestr('OPS/title.html',
- etree.tostring(html_tree, method="html", pretty_print=True))
- # add a title page TOC entry
- toc.add(u"Strona tytułowa", "title.html")
- elif wldoc.book_info.parts:
- # write title page for every parent
- if sample is not None and sample <= 0:
- chars = set()
- html_string = open(get_resource('epub/emptyChunk.html')).read()
- else:
- html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
- chars = used_chars(html_tree.getroot())
- html_string = etree.tostring(html_tree, method="html", pretty_print=True)
- zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
- add_to_manifest(manifest, chunk_counter)
- add_to_spine(spine, chunk_counter)
- chunk_counter += 1
-
- if len(wldoc.edoc.getroot()) > 1:
- # rdf before style master
- main_text = wldoc.edoc.getroot()[1]
- else:
- # rdf in style master
- main_text = wldoc.edoc.getroot()[0]
- if main_text.tag == RDFNS('RDF'):
- main_text = None
-
- if main_text is not None:
- for chunk_xml in chop(main_text):
- empty = False
- if sample is not None:
- if sample <= 0:
- empty = True
- else:
- sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
- chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
-
- toc.extend(chunk_toc)
- chars = chars.union(chunk_chars)
- zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
- add_to_manifest(manifest, chunk_counter)
- add_to_spine(spine, chunk_counter)
- chunk_counter += 1
-
- for child in wldoc.parts():
- child_toc, chunk_counter, chunk_chars, sample = transform_file(
- child, chunk_counter, first=False, sample=sample)
- toc.append(child_toc)
- chars = chars.union(chunk_chars)
-
- return toc, chunk_counter, chars, sample
-
-
- document = deepcopy(wldoc)
- del wldoc
-
- if flags:
- for flag in flags:
- document.edoc.getroot().set(flag, 'yes')
-
- # add editors info
- document.edoc.getroot().set('editors', u', '.join(sorted(
- editor.readable() for editor in document.editors())))
-
- opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
- manifest = opf.find('.//' + OPFNS('manifest'))
- guide = opf.find('.//' + OPFNS('guide'))
- spine = opf.find('.//' + OPFNS('spine'))
-
- output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
- zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
-
- # write static elements
- mime = zipfile.ZipInfo()
- mime.filename = 'mimetype'
- mime.compress_type = zipfile.ZIP_STORED
- mime.extra = ''
- zip.writestr(mime, 'application/epub+zip')
- zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
- 'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
- '<rootfiles><rootfile full-path="OPS/content.opf" ' \
- 'media-type="application/oebps-package+xml" />' \
- '</rootfiles></container>')
- zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png'))
- zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png'))
- if not style:
- style = get_resource('epub/style.css')
- zip.write(style, os.path.join('OPS', 'style.css'))
-
- if cover:
- if cover is True:
- cover = WLCover
-
- cover_file = StringIO()
- bound_cover = cover(document.book_info)
- bound_cover.save(cover_file)
- cover_name = 'cover.%s' % bound_cover.ext()
- zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
- del cover_file
-
- cover_tree = etree.parse(get_resource('epub/cover.html'))
- cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
- zip.writestr('OPS/cover.html', etree.tostring(
- cover_tree, method="html", pretty_print=True))
-
- if bound_cover.uses_dc_cover:
- if document.book_info.cover_by:
- document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
- if document.book_info.cover_source:
- document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
-
- manifest.append(etree.fromstring(
- '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
- manifest.append(etree.fromstring(
- '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
- spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
- opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
- guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
-
-
- annotations = etree.Element('annotations')
-
- toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
- '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
- '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
- 'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
- '</navMap></ncx>')
- nav_map = toc_file[-1]
-
- if html_toc:
- manifest.append(etree.fromstring(
- '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
- spine.append(etree.fromstring(
- '<itemref idref="html_toc" />'))
- guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
-
- toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
-
- if len(toc.children) < 2:
- toc.add(u"Początek utworu", "part1.html")
-
- # Last modifications in container files and EPUB creation
- if len(annotations) > 0:
- toc.add("Przypisy", "annotations.html")
- manifest.append(etree.fromstring(
- '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
- spine.append(etree.fromstring(
- '<itemref idref="annotations" />'))
- replace_by_verse(annotations)
- html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
- chars = chars.union(used_chars(html_tree.getroot()))
- zip.writestr('OPS/annotations.html', etree.tostring(
- html_tree, method="html", pretty_print=True))
-
- toc.add("Strona redakcyjna", "last.html")
- manifest.append(etree.fromstring(
- '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
- spine.append(etree.fromstring(
- '<itemref idref="last" />'))
- html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
- chars.update(used_chars(html_tree.getroot()))
- zip.writestr('OPS/last.html', etree.tostring(
- html_tree, method="html", pretty_print=True))
-
- if not flags or not 'without-fonts' in flags:
- # strip fonts
- tmpdir = mkdtemp('-librarian-epub')
- try:
- cwd = os.getcwd()
- except OSError:
- cwd = None
-
- os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
- for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
- optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'),
- get_resource('fonts/' + fname), os.path.join(tmpdir, fname)]
- if verbose:
- print "Running font-optimizer"
- subprocess.check_call(optimizer_call)
- else:
- subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
- manifest.append(etree.fromstring(
- '<item id="%s" href="%s" media-type="font/ttf" />' % (fname, fname)))
- rmtree(tmpdir)
- if cwd is not None:
- os.chdir(cwd)
-
- zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
- title = document.book_info.title
- attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
- for st in attributes:
- meta = toc_file.makeelement(NCXNS('meta'))
- meta.set('name', st)
- meta.set('content', '0')
- toc_file[0].append(meta)
- toc_file[0][0].set('content', ''.join((title, 'WolneLektury.pl')))
- toc_file[0][1].set('content', str(toc.depth()))
- set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
-
- # write TOC
- if html_toc:
- toc.add(u"Spis treści", "toc.html", index=1)
- zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
- toc.write_to_xml(nav_map)
- zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
- zip.close()
-
- return OutputFile.from_filename(output_file.name)
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import os.path
-from copy import deepcopy
-from lxml import etree
-
-from librarian import functions, OutputFile
-from .epub import replace_by_verse
-
-
-functions.reg_substitute_entities()
-functions.reg_person_name()
-
-
-def sectionify(tree):
- """Finds section headers and adds a tree of _section tags."""
- sections = ['naglowek_czesc',
- 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena',
- 'naglowek_podrozdzial']
- section_level = dict((v,k) for (k,v) in enumerate(sections))
-
- # We can assume there are just subelements an no text at section level.
- for level, section_name in reversed(list(enumerate(sections))):
- for header in tree.findall('//' + section_name):
- section = header.makeelement("_section")
- header.addprevious(section)
- section.append(header)
- sibling = section.getnext()
- while (sibling is not None and
- section_level.get(sibling.tag, 1000) > level):
- section.append(sibling)
- sibling = section.getnext()
-
-
-def transform(wldoc, verbose=False,
- cover=None, flags=None):
- """ produces a FB2 file
-
- cover: a cover.Cover object or True for default
- flags: less-advertising, working-copy
- """
-
- document = deepcopy(wldoc)
- del wldoc
-
- if flags:
- for flag in flags:
- document.edoc.getroot().set(flag, 'yes')
-
- style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt')
- style = etree.parse(style_filename)
-
- replace_by_verse(document.edoc)
- sectionify(document.edoc)
-
- result = document.transform(style)
-
- return OutputFile.from_string(unicode(result).encode('utf-8'))
-
-# vim:et
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+
+
class Format(object):
""" Generic format class. """
def __init__(self, doc):
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
import re
-from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance
+from PIL import Image, ImageFont, ImageDraw, ImageFilter
from StringIO import StringIO
-from librarian import DCNS, URLOpener
+from librarian import DCNS
from librarian.output import OutputFile
from librarian.utils import get_resource
from librarian.formats import Format
}
def __init__(self, doc, format=None, width=None, height=None):
+ super(Cover, self).__init__(doc)
self.author = ", ".join(auth for auth in doc.meta.get(DCNS('creator')))
self.title = doc.meta.title()
if format is not None:
author_font = ImageFont.truetype(
self.author_font_ttf, metr.author_font_size)
- tbox.text(self.pretty_author(), self.author_color, author_font,
+ tbox.text(
+ self.pretty_author(), self.author_color, author_font,
metr.author_lineskip, self.author_shadow)
text_img = tbox.image()
img.paste(text_img, (metr.author_margin_left, top), text_img)
)
title_font = ImageFont.truetype(
self.title_font_ttf, metr.title_font_size)
- tbox.text(self.pretty_title(), self.title_color, title_font,
+ tbox.text(
+ self.pretty_title(), self.title_color, title_font,
metr.title_lineskip, self.title_shadow)
text_img = tbox.image()
img.paste(text_img, (metr.title_margin_left, top), text_img)
return img
- imgstr = StringIO()
- img.save(imgstr, format=self.format, quality=95)
- OutputFile.from_string(imgstr.getvalue())
+ # imgstr = StringIO()
+ # img.save(imgstr, format=self.format, quality=95)
+ # OutputFile.from_stringing(imgstr.getvalue())
def mime_type(self):
return self.mime_types[self.format]
title_top = 30
logo_bottom = 100
- def __init__(self, doc, format=None, width=None, height=None):
- super(EvensCover, self).__init__(doc, format=format, width=width, height=height)
- self.doc = doc
-
def set_images(self, ctx):
cover_url = self.doc.meta.get(DCNS('relation.coverimage.url'))[0]
if cover_url.startswith('file://'):
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from PIL import Image, ImageFont, ImageDraw
+from PIL import ImageEnhance
+
from librarian.utils import get_resource
from .. import Cover, Metric, TextBox
self.epoch = doc.meta.get_one('epoch')
self.with_logo = with_logo
# TODO
- if doc.meta.get('cover_url'):
- url = doc.meta.get('cover_url')[0]
- bg_src = None
- if bg_src is None:
- bg_src = URLOpener().open(url)
- self.background_img = StringIO(bg_src.read())
- bg_src.close()
- else:
- self.background_img = self.default_background
+ # if doc.meta.get('cover_url'):
+ # url = doc.meta.get('cover_url')[0]
+ # bg_src = None
+ # if bg_src is None:
+ # bg_src = URLOpener().open(url)
+ # self.background_img = StringIO(bg_src.read())
+ # bg_src.close()
+ # else:
+ self.background_img = self.default_background
def pretty_author(self):
return self.author.upper()
box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y)
author_font = ImageFont.truetype(
self.author_font_ttf, metr.author_font_size)
- box.text(self.pretty_author(),
- font=author_font,
- line_height=metr.author_lineskip,
- color=self.author_color,
- shadow_color=self.author_shadow,
- )
+ box.text(
+ self.pretty_author(),
+ font=author_font,
+ line_height=metr.author_lineskip,
+ color=self.author_color,
+ shadow_color=self.author_shadow,
+ )
box.skip(metr.box_above_line)
- box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height),
- fill=self.author_color, width=metr.box_line_width)
+ box.draw.line(
+ (metr.box_line_left, box.height, metr.box_line_right, box.height),
+ fill=self.author_color, width=metr.box_line_width)
box.skip(metr.box_below_line)
title_font = ImageFont.truetype(
self.title_font_ttf, metr.title_font_size)
- box.text(self.pretty_title(),
- line_height=metr.title_lineskip,
- font=title_font,
- color=epoch_color,
- shadow_color=self.title_shadow,
- )
+ box.text(
+ self.pretty_title(),
+ line_height=metr.title_lineskip,
+ font=title_font,
+ color=epoch_color,
+ shadow_color=self.title_shadow,
+ )
if self.with_logo:
logo = Image.open(get_resource('res/wl-logo-mono.png'))
# center
box_top = (metr.height - box_img.size[1]) / 2
- box_left = metr.bar_width + (metr.width - metr.bar_width -
- box_img.size[0]) / 2
- draw.rectangle((box_left, box_top,
- box_left + box_img.size[0], box_top + box_img.size[1]),
- fill='#fff')
+ box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) / 2
+ draw.rectangle((box_left, box_top, box_left + box_img.size[0], box_top + box_img.size[1]), fill='#fff')
img.paste(box_img, (box_left, box_top), box_img)
if self.with_logo:
- img.paste(logo,
+ img.paste(
+ logo,
(box_left + (box_img.size[0] - logo.size[0]) / 2,
box_top + box_img.size[1] - metr.box_padding_y - logo.size[1]), mask=logo)
t.find('.//div[@id="content"]').extend(
self.render(self.doc.edoc.getroot(), ctx))
- #t.find('.//div[@id="toc"]').append(ctx.toc.render())
+ # t.find('.//div[@id="toc"]').append(ctx.toc.render())
t.find('.//div[@id="footnotes"]').extend(ctx.footnotes.output)
return OutputFile.from_string(etree.tostring(
def append(self, item):
self.counter += 1
- e = etree.Element("a",
+ e = etree.Element(
+ "a",
href="#footnote-anchor-%d" % self.counter,
id="footnote-%d" % self.counter,
style="float:left;margin-right:1em")
e.tail = " "
self.output.append(e)
self.output.extend(item)
- anchor = etree.Element("a",
+ anchor = etree.Element(
+ "a",
id="footnote-anchor-%d" % self.counter,
href="#footnote-%d" % self.counter)
anchor.text = "[%d]" % self.counter
HtmlFormat.renderers.register(core.Aside, None, NaturalText('aside'))
HtmlFormat.renderers.register(core.Aside, 'comment', Silent())
+
class AsideFootnote(NaturalText):
def render(self, element, ctx):
output = super(AsideFootnote, self).render(element, ctx)
else:
root[0].tag = 'h2'
if root[0].text:
- d = etree.SubElement(root[0], 'a', {'id': root[0].text, 'style': 'pointer: hand; color:#ddd; font-size:.8em'})
- #d.text = "per"
+ d = etree.SubElement(
+ root[0], 'a', {'id': root[0].text, 'style': 'pointer: hand; color:#ddd; font-size:.8em'})
+ # d.text = "per"
return root
-
+
HtmlFormat.renderers.register(core.Header, None, Header('h1'))
HtmlFormat.renderers.register(core.Div, None, NaturalText('div'))
+
class DivDefined(NaturalText):
def render(self, element, ctx):
output = super(DivDefined, self).render(element, ctx)
output[0].text = (output[0].text or '') + ':'
- output[0].attrib['id'] = output[0].text # not so cool?
+ output[0].attrib['id'] = output[0].text # not so cool?
return output
HtmlFormat.renderers.register(core.Div, 'defined', DivDefined('dt', {'style': 'display: inline-block'}))
HtmlFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
HtmlFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol'))
+
class DivListDefinitions(NaturalText):
def render(self, element, ctx):
output = super(DivListDefinitions, self).render(element, ctx)
- #if ctx.toc_level > 2:
- # output[0].attrib['style'] = 'float: right'
+ # if ctx.toc_level > 2:
+ # output[0].attrib['style'] = 'float: right'
return output
HtmlFormat.renderers.register(core.Div, 'list.definitions', DivListDefinitions('ul'))
HtmlFormat.renderers.register(core.Span, 'emph', NaturalText('em'))
HtmlFormat.renderers.register(core.Span, 'emp', NaturalText('strong'))
+
class SpanUri(LiteralText):
def render(self, element, ctx):
root = super(SpanUri, self).render(element, ctx)
return root
HtmlFormat.renderers.register(core.Span, 'uri', SpanUri('a'))
+
class SpanLink(LiteralText):
def render(self, element, ctx):
root = super(SpanLink, self).render(element, ctx)
root[0].attrib['href'] = src
return root
HtmlFormat.renderers.register(core.Span, 'link', SpanLink('a'))
-
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
import os
-import re
import shutil
from subprocess import call, PIPE
from tempfile import NamedTemporaryFile, mkdtemp
from librarian.formats import Format
from librarian.output import OutputFile
from librarian.renderers import Register, TreeRenderer
-from librarian.utils import Context, get_resource, extend_element
+from librarian.utils import Context, get_resource
from librarian import core
from PIL import Image
from ..html import Silent
call(['convert', save_as + '_.' + ext, save_as])
else:
# JPEGs with bad density will break LaTeX with 'Dimension too large'.
- r = call(['convert', '-units', 'PixelsPerInch', save_as + '_.' + ext, '-density', '300', save_as + '_2.' + ext])
+ r = call(['convert', '-units', 'PixelsPerInch', save_as + '_.' + ext, '-density', '300',
+ save_as + '_2.' + ext])
if r:
shutil.move(save_as + '_.' + ext, save_as)
else:
img = Image.open(self.get_file(build_ctx, 'cover.png'))
size = img.size
- if (size[1] > size[0]):
+ if size[1] > size[0]:
img = img.crop((0, 0, size[0], size[0]))
img.save(self.get_file(build_ctx, 'cover.png'), format=img.format, quality=90)
size = img.size
p[0].append(texml_cmd("noindent"))
p[0].append(texml_cmd("nohyphens", author))
p[0].append(texml_cmd("vspace", "1em"))
- #p[0][-1].tail = author
+ # p[0][-1].tail = author
if title:
p = texml_cmd("par", "")
grp.append(p)
p[0].append(texml_cmd("Huge"))
p[0].append(texml_cmd("noindent"))
p[0].append(texml_cmd("nohyphens", title))
- #p[0][-1].tail = title
+ # p[0][-1].tail = title
doc.append(texml_cmd("vfill"))
doc.append(texml_cmd("vfill"))
cover_logo_url = getattr(build_ctx, 'cover_logo', None)
# TEST
# TODO: convert
- #cover_logo_url = 'http://milpeer.mdrn.pl/media/dynamic/people/logo/nowoczesnapolska.org.pl.png'
+ # cover_logo_url = 'http://milpeer.mdrn.pl/media/dynamic/people/logo/nowoczesnapolska.org.pl.png'
if cover_logo_url:
self.add_file(build_ctx, 'coverlogo.png', cover_logo_url, image=True)
size = Image.open(self.get_file(build_ctx, 'coverlogo.png')).size
doc.append(texml_cmd("vspace", "1em"))
for m, f in (
- ('Publisher: ', DCNS('publisher')),
- ('Rights: ', DCNS('rights')),
- ('Intended audience: ', DCNS('audience')),
- ('', DCNS('description')),
- ):
+ ('Publisher: ', DCNS('publisher')),
+ ('Rights: ', DCNS('rights')),
+ ('Intended audience: ', DCNS('audience')),
+ ('', DCNS('description'))):
v = self.doc.meta.get_one(f)
if v:
e = texml_cmd("par", "")
doc.append(e)
doc.append(texml_cmd("vspace", "1em"))
-
e = texml_cmd("par", "")
e[0].append(texml_cmd("noindent"))
e[0][0].tail = "Resource prepared using "
doc.append(e)
source_url = getattr(build_ctx, 'source_url', None)
- #source_url = 'http://milpeer.mdrn.pl/documents/27/'
+ # source_url = 'http://milpeer.mdrn.pl/documents/27/'
if source_url:
e = texml_cmd("par", "")
doc.append(e)
texml = self.get_texml(ctx)
tex_path = os.path.join(ctx.workdir, 'doc.tex')
with open(tex_path, 'w') as fout:
- #print etree.tostring(texml)
+ # print etree.tostring(texml)
process(StringIO(etree.tostring(texml)), fout, 'utf-8')
- #~ if self.save_tex:
- #~ shutil.copy(tex_path, self.save_tex)
-
+ # if self.save_tex:
+ # shutil.copy(tex_path, self.save_tex)
-
- #for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
- # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
+ # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
+ # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
return ctx.workdir
def build(self, ctx=None, verbose=False):
else:
for i in range(self.tex_passes):
p = call(['xelatex', '-interaction=batchmode', tex_path],
- stdout=PIPE, stderr=PIPE)
+ stdout=PIPE, stderr=PIPE)
if p:
- #raise ParseError("Error parsing .tex file: %s" % tex_path)
+ # raise ParseError("Error parsing .tex file: %s" % tex_path)
raise RuntimeError("Error parsing .tex file: %s" % tex_path)
if cwd is not None:
return self.renderers.get_for(element).render(element, ctx)
-
-
class CmdRenderer(TreeRenderer):
def parms(self):
return []
+
def container(self):
root = etree.Element(self.root_name)
root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
inner = root[0][-1]
return root, inner
+
class EnvRenderer(TreeRenderer):
def container(self):
root = etree.Element(self.root_name)
inner = etree.SubElement(root, 'env', name=self.tag_name)
return root, inner
+
class GroupRenderer(CmdRenderer):
def container(self):
root = etree.Element(self.root_name)
PdfFormat.renderers.register(core.Div, None, CmdRenderer('par'))
+
class ImgRenderer(CmdRenderer):
def parms(self):
return ["", ""]
root[0][0].text = 'f%d.png' % nr
try:
size = Image.open(ctx.format.get_file(ctx, 'f%d.png' % nr)).size
- except IOError: # not an image
- del root[0];
+ except IOError: # not an image
+ del root[0]
return root
root[0][1].text = '15cm'
root[0][2].text = '%fcm' % (15.0 * size[1] / size[0])
PdfFormat.renderers.register(core.Div, 'list.enum', EnvRenderer('enumerate'))
-
PdfFormat.renderers.register(core.Span, None, TreeRenderer())
PdfFormat.renderers.register(core.Span, 'cite', CmdRenderer('emph'))
PdfFormat.renderers.register(core.Span, 'cite.code', CmdRenderer('texttt'))
PdfFormat.renderers.register(core.Span, 'emp', CmdRenderer('textbf'))
PdfFormat.renderers.register(core.Span, 'emph', CmdRenderer('emph'))
+
class SpanUri(CmdRenderer):
def parms(self):
return [""]
+
def render(self, element, ctx):
root = super(SpanUri, self).render(element, ctx)
src = element.text
if src.startswith('file://'):
- src = ctx.files_path + src[7:]
+ src = ctx.files_path + src[7:]
root[0][0].text = src
return root
PdfFormat.renderers.register(core.Span, 'uri', SpanUri('href'))
class SpanLink(CmdRenderer):
def parms(self):
return [""]
+
def render(self, element, ctx):
root = super(SpanLink, self).render(element, ctx)
src = element.attrib.get('href', '')
if src.startswith('file://'):
- src = ctx.files_path + src[7:]
+ src = ctx.files_path + src[7:]
root[0][0].text = src
return root
PdfFormat.renderers.register(core.Span, 'link', SpanLink('href'))
-
-
PdfFormat.renderers.register(core.Aside, None, TreeRenderer())
PdfFormat.renderers.register(core.Aside, 'editorial', CmdRenderer('editorialpage'))
PdfFormat.renderers.register(core.Aside, 'comment', Silent())
-
return values[0]
else:
return None
-
# Specials.
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from copy import deepcopy
-import os
-import subprocess
-from tempfile import NamedTemporaryFile
-
-from librarian import OutputFile
-from librarian.cover import WLCover
-from librarian import get_resource
-
-
-def transform(wldoc, verbose=False,
- sample=None, cover=None, flags=None):
- """ produces a MOBI file
-
- wldoc: a WLDocument
- sample=n: generate sample e-book (with at least n paragraphs)
- cover: a cover.Cover factory overriding default
- flags: less-advertising,
- """
-
- document = deepcopy(wldoc)
- del wldoc
- book_info = document.book_info
-
- # provide a cover by default
- if not cover:
- cover = WLCover
- cover_file = NamedTemporaryFile(suffix='.png', delete=False)
- bound_cover = cover(book_info)
- bound_cover.save(cover_file)
-
- if bound_cover.uses_dc_cover:
- if document.book_info.cover_by:
- document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
- if document.book_info.cover_source:
- document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
-
- if not flags:
- flags = []
- flags = list(flags) + ['without-fonts']
- epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True,
- flags=flags, style=get_resource('mobi/style.css'))
-
- if verbose:
- kwargs = {}
- else:
- devnull = open("/dev/null", 'w')
- kwargs = {"stdout": devnull, "stderr": devnull}
-
- output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False)
- output_file.close()
- subprocess.check_call(['ebook-convert', epub.get_filename(), output_file.name,
- '--no-inline-toc', '--cover=%s' % cover_file.name], **kwargs)
- os.unlink(cover_file.name)
- return OutputFile.from_filename(output_file.name)
\ No newline at end of file
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import os
-from copy import deepcopy
-from lxml import etree
-from librarian import pdf, epub, DirDocProvider, ParseError, cover
-from librarian.parser import WLDocument
-
-
-class Packager(object):
- cover = None
- flags = None
-
- @classmethod
- def prepare_file(cls, main_input, output_dir, verbose=False):
- path, fname = os.path.realpath(main_input).rsplit('/', 1)
- provider = DirDocProvider(path)
- slug, ext = os.path.splitext(fname)
-
- if output_dir != '':
- try:
- os.makedirs(output_dir)
- except:
- pass
- outfile = os.path.join(output_dir, slug + '.' + cls.ext)
-
- doc = WLDocument.from_file(main_input, provider=provider)
- output_file = cls.converter.transform(doc,
- cover=cls.cover, flags=cls.flags)
- doc.save_output_file(output_file, output_path=outfile)
-
-
- @classmethod
- def prepare(cls, input_filenames, output_dir='', verbose=False):
- try:
- for main_input in input_filenames:
- if verbose:
- print main_input
- cls.prepare_file(main_input, output_dir, verbose)
- except ParseError, e:
- print '%(file)s:%(name)s:%(message)s' % {
- 'file': main_input,
- 'name': e.__class__.__name__,
- 'message': e.message
- }
-
-
-class EpubPackager(Packager):
- converter = epub
- ext = 'epub'
-
-class PdfPackager(Packager):
- converter = pdf
- ext = 'pdf'
-
-
-class GandalfEpubPackager(EpubPackager):
- cover = cover.GandalfCover
-
-class GandalfPdfPackager(PdfPackager):
- cover = cover.GandalfCover
-
-class BookotekaEpubPackager(EpubPackager):
- cover = cover.BookotekaCover
-
-class PrestigioEpubPackager(EpubPackager):
- cover = cover.PrestigioCover
- flags = ('less-advertising',)
-
-class PrestigioPdfPackager(PdfPackager):
- cover = cover.PrestigioCover
- flags = ('less-advertising',)
-
-
-class VirtualoPackager(Packager):
- @staticmethod
- def utf_trunc(text, limit):
- """ truncates text to at most `limit' bytes in utf-8 """
- if text is None:
- return text
- if len(text.encode('utf-8')) > limit:
- newlimit = limit - 3
- while len(text.encode('utf-8')) > newlimit:
- text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
- text += '...'
- return text
-
- @classmethod
- def prepare(cls, input_filenames, output_dir='', verbose=False):
- xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
- <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
- product = etree.fromstring("""<product>
- <publisherProductId></publisherProductId>
- <title></title>
- <info></info>
- <description></description>
- <authors>
- <author>
- <names>Jan</names>
- <lastName>Kowalski</lastName>
- </author>
- </authors>
- <price>0.0</price>
- <language>PL</language>
- </product>""")
-
- try:
- for main_input in input_filenames:
- if verbose:
- print main_input
- path, fname = os.path.realpath(main_input).rsplit('/', 1)
- provider = DirDocProvider(path)
- slug, ext = os.path.splitext(fname)
-
- outfile_dir = os.path.join(output_dir, slug)
- os.makedirs(os.path.join(output_dir, slug))
-
- doc = WLDocument.from_file(main_input, provider=provider)
- info = doc.book_info
-
- product_elem = deepcopy(product)
- product_elem[0].text = cls.utf_trunc(slug, 100)
- product_elem[1].text = cls.utf_trunc(info.title, 255)
- product_elem[2].text = cls.utf_trunc(info.description, 255)
- product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
- product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
- product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
- xml.append(product_elem)
-
- cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
- outfile = os.path.join(outfile_dir, '1.epub')
- outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
- doc.save_output_file(doc.as_epub(),
- output_path=outfile)
- doc.save_output_file(doc.as_epub(doc, sample=25),
- output_path=outfile_sample)
- outfile = os.path.join(outfile_dir, '1.mobi')
- outfile_sample = os.path.join(outfile_dir, '1.sample.mobi')
- doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover),
- output_path=outfile)
- doc.save_output_file(
- doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
- output_path=outfile_sample)
- except ParseError, e:
- print '%(file)s:%(name)s:%(message)s' % {
- 'file': main_input,
- 'name': e.__class__.__name__,
- 'message': e.message
- }
-
- xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
- xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
- xml_file.close()
class SSTParser(etree.XMLParser):
""" XML parser using relevant element classes. """
def __init__(self):
- super(SSTParser, self).__init__(remove_blank_text=False)
+ super(SSTParser, self).__init__()
lookup = etree.ElementNamespaceClassLookup()
self.set_element_class_lookup(lookup)
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-"""PDF creation library.
-
-Creates one big XML from the book and its children, converts it to LaTeX
-with TeXML, then runs it by XeLaTeX.
-
-"""
-from __future__ import with_statement
-import os
-import os.path
-import shutil
-from StringIO import StringIO
-from tempfile import mkdtemp, NamedTemporaryFile
-import re
-from copy import deepcopy
-from subprocess import call, PIPE
-
-from Texml.processor import process
-from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
-
-from librarian.dcparser import Person
-from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource, OutputFile
-from librarian import functions
-from librarian.cover import WLCover
-
-
-functions.reg_substitute_entities()
-functions.reg_strip()
-functions.reg_starts_white()
-functions.reg_ends_white()
-functions.reg_texcommand()
-
-STYLESHEETS = {
- 'wl2tex': 'pdf/wl2tex.xslt',
-}
-
-#CUSTOMIZATIONS = [
-# 'nofootnotes',
-# 'nothemes',
-# 'defaultleading',
-# 'onehalfleading',
-# 'doubleleading',
-# 'nowlfont',
-# ]
-
-def insert_tags(doc, split_re, tagname, exclude=None):
- """ inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
-
- >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>');
- >>> insert_tags(t, re.compile('-'), 'd');
- >>> print etree.tostring(t)
- <a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
- """
-
- for elem in doc.iter(tag=etree.Element):
- if exclude and elem.tag in exclude:
- continue
- if elem.text:
- chunks = split_re.split(elem.text)
- while len(chunks) > 1:
- ins = etree.Element(tagname)
- ins.tail = chunks.pop()
- elem.insert(0, ins)
- elem.text = chunks.pop(0)
- if elem.tail:
- chunks = split_re.split(elem.tail)
- parent = elem.getparent()
- ins_index = parent.index(elem) + 1
- while len(chunks) > 1:
- ins = etree.Element(tagname)
- ins.tail = chunks.pop()
- parent.insert(ins_index, ins)
- elem.tail = chunks.pop(0)
-
-
-def substitute_hyphens(doc):
- insert_tags(doc,
- re.compile("(?<=[^-\s])-(?=[^-\s])"),
- "dywiz",
- exclude=[DCNS("identifier.url"), DCNS("rights.license")]
- )
-
-
-def fix_hanging(doc):
- insert_tags(doc,
- re.compile("(?<=\s\w)\s+"),
- "nbsp",
- exclude=[DCNS("identifier.url"), DCNS("rights.license")]
- )
-
-
-def move_motifs_inside(doc):
- """ moves motifs to be into block elements """
- for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'):
- for motif in master.xpath('motyw'):
- for sib in motif.itersiblings():
- if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia', 'begin', 'end', 'motyw', 'extra', 'uwaga'):
- # motif shouldn't have a tail - it would be untagged text
- motif.tail = None
- motif.getparent().remove(motif)
- sib.insert(0, motif)
- break
-
-
-def hack_motifs(doc):
- """ dirty hack for the marginpar-creates-orphans LaTeX problem
- see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304
-
- moves motifs in stanzas from first verse to second
- and from next to last to last, then inserts negative vspace before them
- """
- for motif in doc.findall('//strofa//motyw'):
- # find relevant verse-level tag
- verse, stanza = motif, motif.getparent()
- while stanza is not None and stanza.tag != 'strofa':
- verse, stanza = stanza, stanza.getparent()
- breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True))
- breaks_after = sum(1 for i in verse.itersiblings('br'))
- if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1:
- move_by = 1
- if breaks_after == 2:
- move_by += 1
- moved_motif = deepcopy(motif)
- motif.tag = 'span'
- motif.text = None
- moved_motif.tail = None
- moved_motif.set('moved', str(move_by))
-
- for br in verse.itersiblings('br'):
- if move_by > 1:
- move_by -= 1
- continue
- br.addnext(moved_motif)
- break
-
-
-def parse_creator(doc):
- """Generates readable versions of creator and translator tags.
-
- Finds all dc:creator and dc.contributor.translator tags
- and adds *_parsed versions with forenames first.
- """
- for person in doc.xpath("|".join('//dc:'+(tag) for tag in (
- 'creator', 'contributor.translator')),
- namespaces = {'dc': str(DCNS)})[::-1]:
- if not person.text:
- continue
- p = Person.from_text(person.text)
- person_parsed = deepcopy(person)
- person_parsed.tag = person.tag + '_parsed'
- person_parsed.set('sortkey', person.text)
- person_parsed.text = p.readable()
- person.getparent().insert(0, person_parsed)
-
-
-def get_stylesheet(name):
- return get_resource(STYLESHEETS[name])
-
-
-def package_available(package, args='', verbose=False):
- """ check if a verion of a latex package accepting given args is available """
- tempdir = mkdtemp('-wl2pdf-test')
- fpath = os.path.join(tempdir, 'test.tex')
- f = open(fpath, 'w')
- f.write(r"""
- \documentclass{wl}
- \usepackage[%s]{%s}
- \begin{document}
- \end{document}
- """ % (args, package))
- f.close()
- if verbose:
- p = call(['xelatex', '-output-directory', tempdir, fpath])
- else:
- p = call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
- shutil.rmtree(tempdir)
- return p == 0
-
-
-def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
- cover=None, flags=None, customizations=None):
- """ produces a PDF file with XeLaTeX
-
- wldoc: a WLDocument
- verbose: prints all output from LaTeX
- save_tex: path to save the intermediary LaTeX file to
- morefloats (old/new/none): force specific morefloats
- cover: a cover.Cover factory or True for default
- flags: less-advertising,
- customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class)
- """
-
- # Parse XSLT
- try:
- book_info = wldoc.book_info
- document = load_including_children(wldoc)
- root = document.edoc.getroot()
-
- if cover:
- if cover is True:
- cover = WLCover
- bound_cover = cover(book_info)
- root.set('data-cover-width', str(bound_cover.width))
- root.set('data-cover-height', str(bound_cover.height))
- if bound_cover.uses_dc_cover:
- if book_info.cover_by:
- root.set('data-cover-by', book_info.cover_by)
- if book_info.cover_source:
- root.set('data-cover-source',
- book_info.cover_source)
- if flags:
- for flag in flags:
- root.set('flag-' + flag, 'yes')
-
- # check for LaTeX packages
- if morefloats:
- root.set('morefloats', morefloats.lower())
- elif package_available('morefloats', 'maxfloats=19'):
- root.set('morefloats', 'new')
-
- # add customizations
- if customizations is not None:
- root.set('customizations', u','.join(customizations))
-
- # add editors info
- root.set('editors', u', '.join(sorted(
- editor.readable() for editor in document.editors())))
-
- # hack the tree
- move_motifs_inside(document.edoc)
- hack_motifs(document.edoc)
- parse_creator(document.edoc)
- substitute_hyphens(document.edoc)
- fix_hanging(document.edoc)
-
- # wl -> TeXML
- style_filename = get_stylesheet("wl2tex")
- style = etree.parse(style_filename)
-
- texml = document.transform(style)
-
- # TeXML -> LaTeX
- temp = mkdtemp('-wl2pdf')
-
- if cover:
- with open(os.path.join(temp, 'cover.png'), 'w') as f:
- bound_cover.save(f)
-
- del document # no longer needed large object :)
-
- tex_path = os.path.join(temp, 'doc.tex')
- fout = open(tex_path, 'w')
- process(StringIO(texml), fout, 'utf-8')
- fout.close()
- del texml
-
- if save_tex:
- shutil.copy(tex_path, save_tex)
-
- # LaTeX -> PDF
- shutil.copy(get_resource('pdf/wl.cls'), temp)
- shutil.copy(get_resource('res/wl-logo.png'), temp)
-
- try:
- cwd = os.getcwd()
- except OSError:
- cwd = None
- os.chdir(temp)
-
- if verbose:
- p = call(['xelatex', tex_path])
- else:
- p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
- if p:
- raise ParseError("Error parsing .tex file")
-
- if cwd is not None:
- os.chdir(cwd)
-
- output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
- pdf_path = os.path.join(temp, 'doc.pdf')
- shutil.move(pdf_path, output_file.name)
- shutil.rmtree(temp)
- return OutputFile.from_filename(output_file.name)
-
- except (XMLSyntaxError, XSLTApplyError), e:
- raise ParseError(e)
-
-
-def load_including_children(wldoc=None, provider=None, uri=None):
- """ Makes one big xml file with children inserted at end.
-
- Either wldoc or provider and URI must be provided.
- """
-
- if uri and provider:
- f = provider.by_uri(uri)
- text = f.read().decode('utf-8')
- f.close()
- elif wldoc is not None:
- text = etree.tostring(wldoc.edoc, encoding=unicode)
- provider = wldoc.provider
- else:
- raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
-
- text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
-
- document = WLDocument.from_string(text,
- parse_dublincore=True, provider=provider)
- document.swap_endlines()
-
- for child_uri in document.book_info.parts:
- child = load_including_children(provider=provider, uri=child_uri)
- document.edoc.getroot().append(child.edoc.getroot())
- return document
return root
-
class Register(object):
""" Class-renderer register.
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import copy
-from librarian import functions, OutputFile
-from lxml import etree
-import os
-
-
-functions.reg_substitute_entities()
-functions.reg_wrap_words()
-functions.reg_strip()
-functions.reg_person_name()
-
-TEMPLATE = u"""\
-%(text)s
-
-
------
-Ta lektura, podobnie jak tysiące innych, dostępna jest na stronie wolnelektury.pl.
-Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dostępna jest na stronie %(url)s.
-
-Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowoczesna Polska.
-
-%(license_description)s.%(source)s
-
-%(description)s%(contributors)s
-"""
-
-def transform(wldoc, flags=None, **options):
- """
- Transforms input_file in XML to output_file in TXT.
- possible flags: raw-text,
- """
- # Parse XSLT
- style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt')
- style = etree.parse(style_filename)
-
- document = copy.deepcopy(wldoc)
- del wldoc
- document.swap_endlines()
-
- if flags:
- for flag in flags:
- document.edoc.getroot().set(flag, 'yes')
-
- result = document.transform(style, **options)
-
- if not flags or 'raw-text' not in flags:
- if document.book_info:
- parsed_dc = document.book_info
- description = parsed_dc.description
- url = document.book_info.url
-
- license_description = parsed_dc.license_description
- license = parsed_dc.license
- if license:
- license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (license_description, license)
- else:
- license_description = u"Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)"
-
- source = parsed_dc.source_name
- if source:
- source = "\n\nTekst opracowany na podstawie: " + source
- else:
- source = ''
-
- contributors = ', '.join(person.readable() for person in
- sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p)))
- if contributors:
- contributors = "\n\nOpracowanie redakcyjne i przypisy: %s" % contributors
- else:
- description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).'
- url = '*' * 10
- license = ""
- license_description = ""
- source = ""
- contributors = ""
- return OutputFile.from_string((TEMPLATE % {
- 'description': description,
- 'url': url,
- 'license_description': license_description,
- 'text': unicode(result),
- 'source': source,
- 'contributors': contributors,
- }).encode('utf-8'))
- else:
- return OutputFile.from_string(unicode(result).encode('utf-8'))
-
elif self._upctx is not None:
return getattr(self._upctx, name)
else:
- raise AttributeError, "'%s' object has no attribute '%s'" % (type(self), name)
+ raise AttributeError("'%s' object has no attribute '%s'" % (type(self), name))
def __setattr__(self, name, value):
try:
class XMLNamespace(object):
- '''A handy structure to repsent names in an XML namespace.'''
+ """A handy structure to repsent names in an XML namespace."""
def __init__(self, uri):
self.uri = uri
+++ /dev/null
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import optparse
-
-from librarian import packagers
-
-
-if __name__ == '__main__':
- # Parse commandline arguments
- usage = """Usage: %prog [options] SOURCE [SOURCE...]
- Prepare SOURCE files for a partner."""
-
- parser = optparse.OptionParser(usage=usage)
-
- parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
- help='print status messages to stdout')
- parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='',
- help='specifies the directory for output')
- parser.add_option('--bookoteka', action='store_true', dest='bookoteka', default=False,
- help='prepare files for Bookoteka')
- parser.add_option('--gandalf', action='store_true', dest='gandalf', default=False,
- help='prepare EPUB files for Gandalf')
- parser.add_option('--gandalf-pdf', action='store_true', dest='gandalf_pdf', default=False,
- help='prepare PDF files for Gandalf')
- parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False,
- help='prepare files for Virtualo API')
- parser.add_option('--prestigio', action='store_true', dest='prestigio', default=False,
- help='prepare files for Prestigio')
- parser.add_option('--prestigio-pdf', action='store_true', dest='prestigio_pdf', default=False,
- help='prepare PDF files for Prestigio')
-
- options, input_filenames = parser.parse_args()
-
- if len(input_filenames) < 1:
- parser.print_help()
- exit(1)
-
- if options.bookoteka:
- packagers.BookotekaEpubPackager.prepare(input_filenames, options.output_dir, options.verbose)
- if options.gandalf:
- packagers.GandalfEpubPackager.prepare(input_filenames, options.output_dir, options.verbose)
- if options.gandalf_pdf:
- packagers.GandalfPdfPackager.prepare(input_filenames, options.output_dir, options.verbose)
- if options.virtualo:
- packagers.VirtualoPackager.prepare(input_filenames, options.output_dir, options.verbose)
- if options.prestigio:
- packagers.PrestigioEpubPackager.prepare(input_filenames, options.output_dir, options.verbose)
- if options.prestigio_pdf:
- packagers.PrestigioPdfPackager.prepare(input_filenames, options.output_dir, options.verbose)
'scripts/book2mobi',
'scripts/book2pdf',
'scripts/book2fb2',
- 'scripts/book2partner',
'scripts/book2cover',
'scripts/bookfragments',
'scripts/genslugs'],
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from librarian import dcparser
-from lxml import etree
-from nose.tools import *
-from os.path import splitext
-from tests.utils import get_all_fixtures
-import codecs
-
-
-def check_dcparser(xml_file, result_file):
- xml = file(xml_file).read()
- result = codecs.open(result_file, encoding='utf-8').read()
- info = dcparser.BookInfo.from_string(xml).to_dict()
- should_be = eval(result)
- for key in should_be:
- assert_equals(info[key], should_be[key])
-
-
-def test_dcparser():
- for fixture in get_all_fixtures('dcparser', '*.xml'):
- base_name = splitext(fixture)[0]
- yield check_dcparser, fixture, base_name + '.out'
-
-
-def check_serialize(xml_file):
- xml = file(xml_file).read()
- info = dcparser.BookInfo.from_string(xml)
-
- # serialize
- serialized = etree.tostring(info.to_etree(), encoding=unicode).encode('utf-8')
- # then parse again
- info_bis = dcparser.BookInfo.from_string(serialized)
-
- # check if they are the same
- for key in vars(info):
- assert_equals(getattr(info, key), getattr(info_bis, key))
- for key in vars(info_bis):
- assert_equals(getattr(info, key), getattr(info_bis, key))
-
-
-def test_serialize():
- for fixture in get_all_fixtures('dcparser', '*.xml'):
- yield check_serialize, fixture
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from zipfile import ZipFile
-from lxml import html
-from nose.tools import *
-from librarian import DirDocProvider
-from librarian.parser import WLDocument
-from tests.utils import get_fixture
-
-
-def test_transform():
- epub = WLDocument.from_file(
- get_fixture('text', 'asnyk_zbior.xml'),
- provider=DirDocProvider(get_fixture('text', ''))
- ).as_epub(flags=['without_fonts']).get_file()
- zipf = ZipFile(epub)
-
- # Check contributor list.
- last = zipf.open('OPS/last.html')
- tree = html.parse(last)
- editors_attribution = False
- for par in tree.findall("//p"):
- if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'):
- editors_attribution = True
- assert_equal(par.text.rstrip(),
- u'Opracowanie redakcyjne i przypisy: '
- u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
- assert_true(editors_attribution)
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from librarian import NoDublinCore
-from librarian.parser import WLDocument
-from nose.tools import *
-from utils import get_fixture
-
-
-def test_transform():
- expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html')
-
- html = WLDocument.from_file(
- get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
- ).as_html().get_string()
-
- assert_equal(html, file(expected_output_file_path).read())
-
-
-@raises(NoDublinCore)
-def test_no_dublincore():
- WLDocument.from_file(
- get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
- ).as_html()
-
-
-def test_passing_parse_dublincore_to_transform():
- """Passing parse_dublincore=False to transform omits DublinCore parsing."""
- WLDocument.from_file(
- get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
- parse_dublincore=False,
- ).as_html()
-
-def test_empty():
- assert not WLDocument.from_string(
- '<utwor />',
- parse_dublincore=False,
- ).as_html()
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import re
-from tempfile import NamedTemporaryFile
-from nose.tools import *
-from librarian import DirDocProvider
-from librarian.parser import WLDocument
-from utils import get_fixture
-
-
-def test_transform():
- temp = NamedTemporaryFile(delete=False)
- temp.close()
- WLDocument.from_file(
- get_fixture('text', 'asnyk_zbior.xml'),
- provider=DirDocProvider(get_fixture('text', ''))
- ).as_pdf(save_tex=temp.name)
- tex = open(temp.name).read().decode('utf-8')
- print tex
-
- # Check contributor list.
- editors = re.search(ur'\\def\\editors\{'
- ur'Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
- assert_equal(editors.group(1),
- u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from librarian import picture, dcparser
-from lxml import etree
-from nose.tools import *
-from os.path import splitext
-from tests.utils import get_all_fixtures, get_fixture
-import codecs
-from os import path
-
-def test_wlpictureuri():
- uri = picture.WLPictureURI('http://wolnelektury.pl/katalog/obraz/angelus-novus')
-
-def check_load(xml_file):
- pi = dcparser.parse(xml_file, picture.PictureInfo)
- assert pi is not None
- assert isinstance(pi, picture.PictureInfo)
-
-
-def test_load():
- for fixture in get_all_fixtures('picture', '*.xml'):
- yield check_load, fixture
-
-
-def test_wlpicture():
- wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml')))
- pi = wlp.picture_info
-
- # from nose.tools import set_trace; set_trace()
- assert pi.type[0] == u"Image"
- assert pi.mime_type == u'image/jpeg' == wlp.mime_type
- assert wlp.slug == 'angelus-novus'
-
- assert path.exists(wlp.image_path)
-
- f = wlp.image_file('r')
- f.close()
-
-def test_picture_parts():
- wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml')))
- parts = list(wlp.partiter())
- assert len(parts) == 5, "there should be %d parts of the picture" % 5
- motifs = set()
- names = set()
-
- print parts
- for p in parts:
- for m in p['themes']:
- motifs.add(m)
- for p in parts:
- if p['object']:
- names.add(p['object'])
-
- assert motifs == set([u'anioł historii', u'spojrzenie']), "missing motifs, got: %s" % motifs
- assert names == set([u'obraz cały', u'skrzydło']), 'missing objects, got: %s' % names
-
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from librarian import NoDublinCore
-from librarian.parser import WLDocument
-from nose.tools import *
-from utils import get_fixture
-
-
-def test_transform():
- expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
-
- text = WLDocument.from_file(
- get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
- ).as_text().get_string()
-
- assert_equal(text, file(expected_output_file_path).read())
-
-
-@raises(NoDublinCore)
-def test_no_dublincore():
- WLDocument.from_file(
- get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
- ).as_text()
-
-
-def test_passing_parse_dublincore_to_transform():
- """Passing parse_dublincore=False to the constructor omits DublinCore parsing."""
- WLDocument.from_file(
- get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
- parse_dublincore=False,
- ).as_text()
from __future__ import with_statement
from os.path import realpath, join, dirname
import glob
-import os
+
def get_fixture_dir(dir_name):
"""Returns path to fixtures directory dir_name."""