From 3f24ff6b4246a5206555952f6e6c53f6ed5231d8 Mon Sep 17 00:00:00 2001 From: Jan Szejko Date: Thu, 15 Dec 2016 12:43:14 +0100 Subject: [PATCH] pep8, style, dead code cleanup etc. --- librarian/__init__.py | 5 + librarian/book2anything.py | 44 +- librarian/dcparser.py | 383 ------------ librarian/document.py | 5 +- librarian/epub.py | 563 ------------------ librarian/fb2.py | 63 -- librarian/formats/__init__.py | 2 + librarian/formats/cover/__init__.py | 17 +- librarian/formats/cover/evens/__init__.py | 4 - .../formats/cover/wolnelektury/__init__.py | 61 +- librarian/formats/html/__init__.py | 27 +- librarian/formats/pdf/__init__.py | 63 +- librarian/meta.py | 1 - librarian/mobi.py | 60 -- librarian/packagers.py | 156 ----- librarian/parser.py | 2 +- librarian/pdf.py | 321 ---------- librarian/renderers.py | 1 - librarian/text.py | 91 --- librarian/utils.py | 4 +- scripts/book2partner | 53 -- setup.py | 1 - tests/test_dcparser.py | 48 -- tests/test_epub.py | 31 - tests/test_html.py | 40 -- tests/test_pdf.py | 28 - tests/test_picture.py | 60 -- tests/test_text.py | 34 -- tests/utils.py | 2 +- 29 files changed, 125 insertions(+), 2045 deletions(-) delete mode 100644 librarian/dcparser.py delete mode 100644 librarian/epub.py delete mode 100644 librarian/fb2.py delete mode 100644 librarian/mobi.py delete mode 100644 librarian/packagers.py delete mode 100644 librarian/pdf.py delete mode 100644 librarian/text.py delete mode 100755 scripts/book2partner delete mode 100644 tests/test_dcparser.py delete mode 100644 tests/test_epub.py delete mode 100644 tests/test_html.py delete mode 100644 tests/test_pdf.py delete mode 100644 tests/test_picture.py delete mode 100644 tests/test_text.py diff --git a/librarian/__init__.py b/librarian/__init__.py index 02464ef..a0d70e7 100644 --- a/librarian/__init__.py +++ b/librarian/__init__.py @@ -31,6 +31,11 @@ class ValidationError(UnicodeException): pass +# was deleted, but still used??? +class NoDublinCore(ValidationError): + pass + + class BuildError(Exception): pass diff --git a/librarian/book2anything.py b/librarian/book2anything.py index e46a4b4..d4b9a78 100755 --- a/librarian/book2anything.py +++ b/librarian/book2anything.py @@ -32,10 +32,10 @@ class Book2Anything(object): Subclass it for any format you want to convert to. """ - format_cls = None # A formats.Format subclass - document_options = [] # List of Option objects for document options. - format_options = [] # List of Option objects for format customization. - build_options = [] # List of Option objects for build options. + format_cls = None # A formats.Format subclass + document_options = [] # List of Option objects for document options. + format_options = [] # List of Option objects for format customization. + build_options = [] # List of Option objects for build options. @classmethod def run(cls): @@ -45,12 +45,14 @@ class Book2Anything(object): parser = optparse.OptionParser(usage=usage) - parser.add_option('-v', '--verbose', - action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-o', '--output-file', - dest='output_file', metavar='FILE', - help='specifies the output file') + parser.add_option( + '-v', '--verbose', + action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + parser.add_option( + '-o', '--output-file', + dest='output_file', metavar='FILE', + help='specifies the output file') for option in cls.document_options + cls.format_options + cls.build_options: option.add(parser) @@ -58,7 +60,7 @@ class Book2Anything(object): if len(input_filenames) < 1: parser.print_help() - return(1) + return 1 # Prepare additional args for document. document_args = {} @@ -79,18 +81,18 @@ class Book2Anything(object): if options.verbose: print main_input - # Do the transformation. - doc = Document.from_file(main_input, **document_args) - format_ = cls.format_cls(doc, **format_args) + # Do the transformation. + doc = Document.from_file(main_input, **document_args) + format_ = cls.format_cls(doc, **format_args) - # Where to write output? - if not options.output_file: - output_file = os.path.splitext(main_input)[0] + '.' + format_.format_ext - else: - output_file = None + # Where to write output? + if not options.output_file: + output_file = os.path.splitext(main_input)[0] + '.' + format_.format_ext + else: + output_file = None - output = format_.build(**build_args) - output.save_as(output_file) + output = format_.build(**build_args) + output.save_as(output_file) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { diff --git a/librarian/dcparser.py b/librarian/dcparser.py deleted file mode 100644 index eddd8e5..0000000 --- a/librarian/dcparser.py +++ /dev/null @@ -1,383 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -from xml.parsers.expat import ExpatError -from datetime import date -import time - -from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS, - WLURI) - -import lxml.etree as etree # ElementTree API using libxml2 -from lxml.etree import XMLSyntaxError - - -# ============== -# = Converters = -# ============== -class Person(object): - """Single person with last name and a list of first names.""" - def __init__(self, last_name, *first_names): - self.last_name = last_name - self.first_names = first_names - - @classmethod - def from_text(cls, text): - parts = [ token.strip() for token in text.split(',') ] - if len(parts) == 1: - surname = parts[0] - names = [] - elif len(parts) != 2: - raise ValueError("Invalid person name. There should be at most one comma: \"%s\"." % text) - else: - surname = parts[0] - if len(parts[1]) == 0: - # there is no non-whitespace data after the comma - raise ValueError("Found a comma, but no names given: \"%s\" -> %r." % (text, parts)) - names = [ name for name in parts[1].split() if len(name) ] # all non-whitespace tokens - return cls(surname, *names) - - def readable(self): - return u" ".join(self.first_names + (self.last_name,)) - - def __eq__(self, right): - return self.last_name == right.last_name and self.first_names == right.first_names - - def __cmp__(self, other): - return cmp((self.last_name, self.first_names), (other.last_name, other.first_names)) - - def __hash__(self): - return hash((self.last_name, self.first_names)) - - def __unicode__(self): - if len(self.first_names) > 0: - return '%s, %s' % (self.last_name, ' '.join(self.first_names)) - else: - return self.last_name - - def __repr__(self): - return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names) - -def as_date(text): - try: - try: - t = time.strptime(text, '%Y-%m-%d') - except ValueError: - t = time.strptime(text, '%Y') - return date(t[0], t[1], t[2]) - except ValueError, e: - raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.") - -def as_person(text): - return Person.from_text(text) - -def as_unicode(text): - if isinstance(text, unicode): - return text - else: - return text.decode('utf-8') - -def as_wluri_strict(text): - return WLURI.strict(text) - -class Field(object): - def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs): - self.uri = uri - self.name = attr_name - self.validator = validator - self.strict = strict - self.multiple = multiple - self.salias = salias - - self.required = kwargs.get('required', True) and not kwargs.has_key('default') - self.default = kwargs.get('default', [] if multiple else [None]) - - def validate_value(self, val, strict=False): - if strict and self.strict is not None: - validator = self.strict - else: - validator = self.validator - try: - if self.multiple: - if validator is None: - return val - return [ validator(v) if v is not None else v for v in val ] - elif len(val) > 1: - raise ValidationError("Multiple values not allowed for field '%s'" % self.uri) - elif len(val) == 0: - raise ValidationError("Field %s has no value to assign. Check your defaults." % self.uri) - else: - if validator is None or val[0] is None: - return val[0] - return validator(val[0]) - except ValueError, e: - raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message)) - - def validate(self, fdict, fallbacks=None, strict=False): - if fallbacks is None: - fallbacks = {} - if not fdict.has_key(self.uri): - if not self.required: - # Accept single value for single fields and saliases. - if self.name in fallbacks: - if self.multiple: - f = fallbacks[self.name] - else: - f = [fallbacks[self.name]] - elif self.salias and self.salias in fallbacks: - f = [fallbacks[self.salias]] - else: - f = self.default - else: - raise ValidationError("Required field %s not found" % self.uri) - else: - f = fdict[self.uri] - - return self.validate_value(f, strict=strict) - - def __eq__(self, other): - if isinstance(other, Field) and other.name == self.name: - return True - return False - - -class DCInfo(type): - def __new__(meta, classname, bases, class_dict): - fields = list(class_dict['FIELDS']) - - for base in bases[::-1]: - if hasattr(base, 'FIELDS'): - for field in base.FIELDS[::-1]: - try: - fields.index(field) - except ValueError: - fields.insert(0, field) - - class_dict['FIELDS'] = tuple(fields) - return super(DCInfo, meta).__new__(meta, classname, bases, class_dict) - - -class WorkInfo(object): - __metaclass__ = DCInfo - - FIELDS = ( - Field( DCNS('creator'), 'authors', as_person, salias='author', multiple=True), - Field( DCNS('title'), 'title'), - Field( DCNS('type'), 'type', required=False, multiple=True), - - Field( DCNS('contributor.editor'), 'editors', \ - as_person, salias='editor', multiple=True, default=[]), - Field( DCNS('contributor.technical_editor'), 'technical_editors', - as_person, salias='technical_editor', multiple=True, default=[]), - - Field( DCNS('date'), 'created_at', as_date), - Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False), - Field( DCNS('publisher'), 'publisher'), - - Field( DCNS('language'), 'language'), - Field( DCNS('description'), 'description', required=False), - - Field( DCNS('source'), 'source_name', required=False), - Field( DCNS('source.URL'), 'source_url', required=False), - Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict), - Field( DCNS('rights.license'), 'license', required=False), - Field( DCNS('rights'), 'license_description'), - ) - - @classmethod - def from_string(cls, xml, *args, **kwargs): - from StringIO import StringIO - return cls.from_file(StringIO(xml), *args, **kwargs) - - @classmethod - def from_file(cls, xmlfile, *args, **kwargs): - desc_tag = None - try: - iter = etree.iterparse(xmlfile, ['start', 'end']) - for (event, element) in iter: - if element.tag == RDFNS('RDF') and event == 'start': - desc_tag = element - break - - if desc_tag is None: - raise NoDublinCore("DublinCore section not found. \ - Check if there are rdf:RDF and rdf:Description tags.") - - # continue 'till the end of RDF section - for (event, element) in iter: - if element.tag == RDFNS('RDF') and event == 'end': - break - - # if there is no end, Expat should yell at us with an ExpatError - - # extract data from the element and make the info - return cls.from_element(desc_tag, *args, **kwargs) - except XMLSyntaxError, e: - raise ParseError(e) - except ExpatError, e: - raise ParseError(e) - - @classmethod - def from_element(cls, rdf_tag, *args, **kwargs): - # the tree is already parsed, so we don't need to worry about Expat errors - field_dict = {} - desc = rdf_tag.find(".//" + RDFNS('Description')) - - if desc is None: - raise NoDublinCore("No DublinCore section found.") - - for e in desc.getchildren(): - fv = field_dict.get(e.tag, []) - fv.append(e.text) - field_dict[e.tag] = fv - - return cls(desc.attrib, field_dict, *args, **kwargs) - - def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False): - """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description. - dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the - given field. """ - - self.about = rdf_attrs.get(RDFNS('about')) - self.fmap = {} - - for field in self.FIELDS: - value = field.validate(dc_fields, fallbacks=fallbacks, - strict=strict) - setattr(self, 'prop_' + field.name, value) - self.fmap[field.name] = field - if field.salias: self.fmap[field.salias] = field - - def __getattribute__(self, name): - try: - field = object.__getattribute__(self, 'fmap')[name] - value = object.__getattribute__(self, 'prop_'+field.name) - if field.name == name: - return value - else: # singular alias - if not field.multiple: - raise "OUCH!! for field %s" % name - - return value[0] if value else None - except (KeyError, AttributeError): - return object.__getattribute__(self, name) - - def __setattr__(self, name, newvalue): - try: - field = object.__getattribute__(self, 'fmap')[name] - if field.name == name: - object.__setattr__(self, 'prop_'+field.name, newvalue) - else: # singular alias - if not field.multiple: - raise "OUCH! while setting field %s" % name - - object.__setattr__(self, 'prop_'+field.name, [newvalue]) - except (KeyError, AttributeError): - return object.__setattr__(self, name, newvalue) - - def update(self, field_dict): - """Update using field_dict. Verify correctness, but don't check if all - required fields are present.""" - for field in self.FIELDS: - if field_dict.has_key(field.name): - setattr(self, field.name, field_dict[field.name]) - - def to_etree(self, parent = None): - """XML representation of this object.""" - #etree._namespace_map[str(self.RDF)] = 'rdf' - #etree._namespace_map[str(self.DC)] = 'dc' - - if parent is None: - root = etree.Element(RDFNS('RDF')) - else: - root = parent.makeelement(RDFNS('RDF')) - - description = etree.SubElement(root, RDFNS('Description')) - - if self.about: - description.set(RDFNS('about'), self.about) - - for field in self.FIELDS: - v = getattr(self, field.name, None) - if v is not None: - if field.multiple: - if len(v) == 0: continue - for x in v: - e = etree.Element(field.uri) - if x is not None: - e.text = unicode(x) - description.append(e) - else: - e = etree.Element(field.uri) - e.text = unicode(v) - description.append(e) - - return root - - def serialize(self): - rdf = {} - rdf['about'] = { 'uri': RDFNS('about'), 'value': self.about } - - dc = {} - for field in self.FIELDS: - v = getattr(self, field.name, None) - if v is not None: - if field.multiple: - if len(v) == 0: continue - v = [ unicode(x) for x in v if x is not None ] - else: - v = unicode(v) - - dc[field.name] = {'uri': field.uri, 'value': v} - rdf['fields'] = dc - return rdf - - def to_dict(self): - result = {'about': self.about} - for field in self.FIELDS: - v = getattr(self, field.name, None) - - if v is not None: - if field.multiple: - if len(v) == 0: continue - v = [ unicode(x) for x in v if x is not None ] - else: - v = unicode(v) - result[field.name] = v - - if field.salias: - v = getattr(self, field.salias) - if v is not None: result[field.salias] = unicode(v) - - return result - - -class BookInfo(WorkInfo): - FIELDS = ( - Field( DCNS('audience'), 'audiences', salias='audience', multiple=True, - required=False), - - Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, - required=False), - Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True, - required=False), - Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True, - required=False), - - Field( DCNS('contributor.translator'), 'translators', \ - as_person, salias='translator', multiple=True, default=[]), - Field( DCNS('relation.hasPart'), 'parts', - WLURI, strict=as_wluri_strict, multiple=True, required=False), - Field( DCNS('relation.isVariantOf'), 'variant_of', - WLURI, strict=as_wluri_strict, required=False), - - Field( DCNS('relation.coverImage.url'), 'cover_url', required=False), - Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False), - Field( DCNS('relation.coverImage.source'), 'cover_source', required=False), - ) - - -def parse(file_name, cls=BookInfo): - return cls.from_file(file_name) diff --git a/librarian/document.py b/librarian/document.py index acc80ae..a3251a6 100755 --- a/librarian/document.py +++ b/librarian/document.py @@ -34,9 +34,8 @@ class Document(object): raise ValueError("Invalid root element. Found '%s', should be '%s'" % ( root_elem.tag, SSTNS('section'))) else: - raise ValueError("Invalid class of root element. " - "Use librarian.parser.SSTParser.") - #print etree.tostring(self.edoc.getroot()) + raise ValueError("Invalid class of root element. Use librarian.parser.SSTParser.") + # print etree.tostring(self.edoc.getroot()) @classmethod def from_string(cls, xml, *args, **kwargs): diff --git a/librarian/epub.py b/librarian/epub.py deleted file mode 100644 index 10922d4..0000000 --- a/librarian/epub.py +++ /dev/null @@ -1,563 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -from __future__ import with_statement - -import os -import os.path -import re -import subprocess -from StringIO import StringIO -from copy import deepcopy -from lxml import etree -import zipfile -from tempfile import mkdtemp, NamedTemporaryFile -from shutil import rmtree - -from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile -from librarian.cover import WLCover - -from librarian import functions, get_resource - -functions.reg_person_name() - - -def inner_xml(node): - """ returns node's text and children as a string - - >>> print inner_xml(etree.fromstring('xyz')) - xyz - """ - - nt = node.text if node.text is not None else '' - return ''.join([nt] + [etree.tostring(child) for child in node]) - -def set_inner_xml(node, text): - """ sets node's text and children from a string - - >>> e = etree.fromstring('bxx') - >>> set_inner_xml(e, 'xyz') - >>> print etree.tostring(e) - xyz - """ - - p = etree.fromstring('%s' % text) - node.text = p.text - node[:] = p[:] - - -def node_name(node): - """ Find out a node's name - - >>> print node_name(etree.fromstring('XYZ')) - XYZ - """ - - tempnode = deepcopy(node) - - for p in ('pe', 'pa', 'pt', 'pr', 'motyw'): - for e in tempnode.findall('.//%s' % p): - t = e.tail - e.clear() - e.tail = t - etree.strip_tags(tempnode, '*') - return tempnode.text - - -def xslt(xml, sheet): - if isinstance(xml, etree._Element): - xml = etree.ElementTree(xml) - with open(sheet) as xsltf: - return xml.xslt(etree.parse(xsltf)) - - -def replace_characters(node): - def replace_chars(text): - if text is None: - return None - return text.replace(u"\ufeff", u"")\ - .replace("---", u"\u2014")\ - .replace("--", u"\u2013")\ - .replace(",,", u"\u201E")\ - .replace('"', u"\u201D")\ - .replace("'", u"\u2019") - if node.tag in ('uwaga', 'extra'): - t = node.tail - node.clear() - node.tail = t - node.text = replace_chars(node.text) - node.tail = replace_chars(node.tail) - for child in node: - replace_characters(child) - - -def find_annotations(annotations, source, part_no): - for child in source: - if child.tag in ('pe', 'pa', 'pt', 'pr'): - annotation = deepcopy(child) - number = str(len(annotations)+1) - annotation.set('number', number) - annotation.set('part', str(part_no)) - annotation.tail = '' - annotations.append(annotation) - tail = child.tail - child.clear() - child.tail = tail - child.text = number - if child.tag not in ('extra', 'uwaga'): - find_annotations(annotations, child, part_no) - - -class Stanza(object): - """ - Converts / verse endings into verse elements in a stanza. - - Slashes may only occur directly in the stanza. Any slashes in subelements - will be ignored, and the subelements will be put inside verse elements. - - >>> s = etree.fromstring("a/\\nbx/\\nyc/ \\nd") - >>> Stanza(s).versify() - >>> print etree.tostring(s) - abx/ - ycd - - """ - def __init__(self, stanza_elem): - self.stanza = stanza_elem - self.verses = [] - self.open_verse = None - - def versify(self): - self.push_text(self.stanza.text) - for elem in self.stanza: - self.push_elem(elem) - self.push_text(elem.tail) - tail = self.stanza.tail - self.stanza.clear() - self.stanza.tail = tail - self.stanza.extend(self.verses) - - def open_normal_verse(self): - self.open_verse = self.stanza.makeelement("wers_normalny") - self.verses.append(self.open_verse) - - def get_open_verse(self): - if self.open_verse is None: - self.open_normal_verse() - return self.open_verse - - def push_text(self, text): - if not text or not text.strip(): - return - for i, verse_text in enumerate(re.split(r"/\s*\n", text)): - if i: - self.open_normal_verse() - verse = self.get_open_verse() - if len(verse): - verse[-1].tail = (verse[-1].tail or "") + verse_text.strip() - else: - verse.text = (verse.text or "") + verse_text.strip() - - def push_elem(self, elem): - if elem.tag.startswith("wers"): - verse = deepcopy(elem) - verse.tail = None - self.verses.append(verse) - self.open_verse = verse - else: - appended = deepcopy(elem) - appended.tail = None - self.get_open_verse().append(appended) - - -def replace_by_verse(tree): - """ Find stanzas and create new verses in place of a '/' character """ - - stanzas = tree.findall('.//' + WLNS('strofa')) - for stanza in stanzas: - Stanza(stanza).versify() - - -def add_to_manifest(manifest, partno): - """ Adds a node to the manifest section in content.opf file """ - - partstr = 'part%d' % partno - e = manifest.makeelement(OPFNS('item'), attrib={ - 'id': partstr, - 'href': partstr + '.html', - 'media-type': 'application/xhtml+xml', - }) - manifest.append(e) - - -def add_to_spine(spine, partno): - """ Adds a node to the spine section in content.opf file """ - - e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno}); - spine.append(e) - - -class TOC(object): - def __init__(self, name=None, part_href=None): - self.children = [] - self.name = name - self.part_href = part_href - self.sub_number = None - - def add(self, name, part_href, level=0, is_part=True, index=None): - assert level == 0 or index is None - if level > 0 and self.children: - return self.children[-1].add(name, part_href, level-1, is_part) - else: - t = TOC(name) - t.part_href = part_href - if index is not None: - self.children.insert(index, t) - else: - self.children.append(t) - if not is_part: - t.sub_number = len(self.children) + 1 - return t.sub_number - - def append(self, toc): - self.children.append(toc) - - def extend(self, toc): - self.children.extend(toc.children) - - def depth(self): - if self.children: - return max((c.depth() for c in self.children)) + 1 - else: - return 0 - - def href(self): - src = self.part_href - if self.sub_number is not None: - src += '#sub%d' % self.sub_number - return src - - def write_to_xml(self, nav_map, counter=1): - for child in self.children: - nav_point = nav_map.makeelement(NCXNS('navPoint')) - nav_point.set('id', 'NavPoint-%d' % counter) - nav_point.set('playOrder', str(counter)) - - nav_label = nav_map.makeelement(NCXNS('navLabel')) - text = nav_map.makeelement(NCXNS('text')) - text.text = child.name - nav_label.append(text) - nav_point.append(nav_label) - - content = nav_map.makeelement(NCXNS('content')) - content.set('src', child.href()) - nav_point.append(content) - nav_map.append(nav_point) - counter = child.write_to_xml(nav_point, counter + 1) - return counter - - def html_part(self, depth=0): - texts = [] - for child in self.children: - texts.append( - "
%s
" % - (depth, child.href(), child.name)) - texts.append(child.html_part(depth+1)) - return "\n".join(texts) - - def html(self): - with open(get_resource('epub/toc.html')) as f: - t = unicode(f.read(), 'utf-8') - return t % self.html_part() - - -def used_chars(element): - """ Lists characters used in an ETree Element """ - chars = set((element.text or '') + (element.tail or '')) - for child in element: - chars = chars.union(used_chars(child)) - return chars - - -def chop(main_text): - """ divide main content of the XML file into chunks """ - - # prepare a container for each chunk - part_xml = etree.Element('utwor') - etree.SubElement(part_xml, 'master') - main_xml_part = part_xml[0] # master - - last_node_part = False - for one_part in main_text: - name = one_part.tag - if name == 'naglowek_czesc': - yield part_xml - last_node_part = True - main_xml_part[:] = [deepcopy(one_part)] - elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"): - yield part_xml - main_xml_part[:] = [deepcopy(one_part)] - else: - main_xml_part.append(deepcopy(one_part)) - last_node_part = False - yield part_xml - - -def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]): - """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """ - - toc = TOC() - for element in chunk_xml[0]: - if element.tag in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"): - toc.add(node_name(element), "part%d.html" % chunk_no) - elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'): - subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False) - element.set('sub', str(subnumber)) - if empty: - if not _empty_html_static: - _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read()) - chars = set() - output_html = _empty_html_static[0] - else: - find_annotations(annotations, chunk_xml, chunk_no) - replace_by_verse(chunk_xml) - html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl')) - chars = used_chars(html_tree.getroot()) - output_html = etree.tostring(html_tree, method="html", pretty_print=True) - return output_html, toc, chars - - -def transform(wldoc, verbose=False, - style=None, html_toc=False, - sample=None, cover=None, flags=None): - """ produces a EPUB file - - sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover factory or True for default - flags: less-advertising, without-fonts, working-copy - """ - - def transform_file(wldoc, chunk_counter=1, first=True, sample=None): - """ processes one input file and proceeds to its children """ - - replace_characters(wldoc.edoc.getroot()) - - # every input file will have a TOC entry, - # pointing to starting chunk - toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter) - chars = set() - if first: - # write book title page - html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl')) - chars = used_chars(html_tree.getroot()) - zip.writestr('OPS/title.html', - etree.tostring(html_tree, method="html", pretty_print=True)) - # add a title page TOC entry - toc.add(u"Strona tytułowa", "title.html") - elif wldoc.book_info.parts: - # write title page for every parent - if sample is not None and sample <= 0: - chars = set() - html_string = open(get_resource('epub/emptyChunk.html')).read() - else: - html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl')) - chars = used_chars(html_tree.getroot()) - html_string = etree.tostring(html_tree, method="html", pretty_print=True) - zip.writestr('OPS/part%d.html' % chunk_counter, html_string) - add_to_manifest(manifest, chunk_counter) - add_to_spine(spine, chunk_counter) - chunk_counter += 1 - - if len(wldoc.edoc.getroot()) > 1: - # rdf before style master - main_text = wldoc.edoc.getroot()[1] - else: - # rdf in style master - main_text = wldoc.edoc.getroot()[0] - if main_text.tag == RDFNS('RDF'): - main_text = None - - if main_text is not None: - for chunk_xml in chop(main_text): - empty = False - if sample is not None: - if sample <= 0: - empty = True - else: - sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog')) - chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty) - - toc.extend(chunk_toc) - chars = chars.union(chunk_chars) - zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html) - add_to_manifest(manifest, chunk_counter) - add_to_spine(spine, chunk_counter) - chunk_counter += 1 - - for child in wldoc.parts(): - child_toc, chunk_counter, chunk_chars, sample = transform_file( - child, chunk_counter, first=False, sample=sample) - toc.append(child_toc) - chars = chars.union(chunk_chars) - - return toc, chunk_counter, chars, sample - - - document = deepcopy(wldoc) - del wldoc - - if flags: - for flag in flags: - document.edoc.getroot().set(flag, 'yes') - - # add editors info - document.edoc.getroot().set('editors', u', '.join(sorted( - editor.readable() for editor in document.editors()))) - - opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl')) - manifest = opf.find('.//' + OPFNS('manifest')) - guide = opf.find('.//' + OPFNS('guide')) - spine = opf.find('.//' + OPFNS('spine')) - - output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False) - zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) - - # write static elements - mime = zipfile.ZipInfo() - mime.filename = 'mimetype' - mime.compress_type = zipfile.ZIP_STORED - mime.extra = '' - zip.writestr(mime, 'application/epub+zip') - zip.writestr('META-INF/container.xml', '' \ - '' \ - '') - zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png')) - zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png')) - if not style: - style = get_resource('epub/style.css') - zip.write(style, os.path.join('OPS', 'style.css')) - - if cover: - if cover is True: - cover = WLCover - - cover_file = StringIO() - bound_cover = cover(document.book_info) - bound_cover.save(cover_file) - cover_name = 'cover.%s' % bound_cover.ext() - zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue()) - del cover_file - - cover_tree = etree.parse(get_resource('epub/cover.html')) - cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name) - zip.writestr('OPS/cover.html', etree.tostring( - cover_tree, method="html", pretty_print=True)) - - if bound_cover.uses_dc_cover: - if document.book_info.cover_by: - document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) - if document.book_info.cover_source: - document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) - - manifest.append(etree.fromstring( - '')) - manifest.append(etree.fromstring( - '' % (cover_name, bound_cover.mime_type()))) - spine.insert(0, etree.fromstring('')) - opf.getroot()[0].append(etree.fromstring('')) - guide.append(etree.fromstring('')) - - - annotations = etree.Element('annotations') - - toc_file = etree.fromstring('' \ - '' \ - '') - nav_map = toc_file[-1] - - if html_toc: - manifest.append(etree.fromstring( - '')) - spine.append(etree.fromstring( - '')) - guide.append(etree.fromstring('')) - - toc, chunk_counter, chars, sample = transform_file(document, sample=sample) - - if len(toc.children) < 2: - toc.add(u"Początek utworu", "part1.html") - - # Last modifications in container files and EPUB creation - if len(annotations) > 0: - toc.add("Przypisy", "annotations.html") - manifest.append(etree.fromstring( - '')) - spine.append(etree.fromstring( - '')) - replace_by_verse(annotations) - html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl')) - chars = chars.union(used_chars(html_tree.getroot())) - zip.writestr('OPS/annotations.html', etree.tostring( - html_tree, method="html", pretty_print=True)) - - toc.add("Strona redakcyjna", "last.html") - manifest.append(etree.fromstring( - '')) - spine.append(etree.fromstring( - '')) - html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl')) - chars.update(used_chars(html_tree.getroot())) - zip.writestr('OPS/last.html', etree.tostring( - html_tree, method="html", pretty_print=True)) - - if not flags or not 'without-fonts' in flags: - # strip fonts - tmpdir = mkdtemp('-librarian-epub') - try: - cwd = os.getcwd() - except OSError: - cwd = None - - os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) - for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': - optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), - get_resource('fonts/' + fname), os.path.join(tmpdir, fname)] - if verbose: - print "Running font-optimizer" - subprocess.check_call(optimizer_call) - else: - subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) - manifest.append(etree.fromstring( - '' % (fname, fname))) - rmtree(tmpdir) - if cwd is not None: - os.chdir(cwd) - - zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True)) - title = document.book_info.title - attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber" - for st in attributes: - meta = toc_file.makeelement(NCXNS('meta')) - meta.set('name', st) - meta.set('content', '0') - toc_file[0].append(meta) - toc_file[0][0].set('content', ''.join((title, 'WolneLektury.pl'))) - toc_file[0][1].set('content', str(toc.depth())) - set_inner_xml(toc_file[1], ''.join(('', title, ''))) - - # write TOC - if html_toc: - toc.add(u"Spis treści", "toc.html", index=1) - zip.writestr('OPS/toc.html', toc.html().encode('utf-8')) - toc.write_to_xml(nav_map) - zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True)) - zip.close() - - return OutputFile.from_filename(output_file.name) diff --git a/librarian/fb2.py b/librarian/fb2.py deleted file mode 100644 index d979566..0000000 --- a/librarian/fb2.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -import os.path -from copy import deepcopy -from lxml import etree - -from librarian import functions, OutputFile -from .epub import replace_by_verse - - -functions.reg_substitute_entities() -functions.reg_person_name() - - -def sectionify(tree): - """Finds section headers and adds a tree of _section tags.""" - sections = ['naglowek_czesc', - 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena', - 'naglowek_podrozdzial'] - section_level = dict((v,k) for (k,v) in enumerate(sections)) - - # We can assume there are just subelements an no text at section level. - for level, section_name in reversed(list(enumerate(sections))): - for header in tree.findall('//' + section_name): - section = header.makeelement("_section") - header.addprevious(section) - section.append(header) - sibling = section.getnext() - while (sibling is not None and - section_level.get(sibling.tag, 1000) > level): - section.append(sibling) - sibling = section.getnext() - - -def transform(wldoc, verbose=False, - cover=None, flags=None): - """ produces a FB2 file - - cover: a cover.Cover object or True for default - flags: less-advertising, working-copy - """ - - document = deepcopy(wldoc) - del wldoc - - if flags: - for flag in flags: - document.edoc.getroot().set(flag, 'yes') - - style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt') - style = etree.parse(style_filename) - - replace_by_verse(document.edoc) - sectionify(document.edoc) - - result = document.transform(style) - - return OutputFile.from_string(unicode(result).encode('utf-8')) - -# vim:et diff --git a/librarian/formats/__init__.py b/librarian/formats/__init__.py index cfe4fc2..8f8556f 100644 --- a/librarian/formats/__init__.py +++ b/librarian/formats/__init__.py @@ -3,6 +3,8 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # + + class Format(object): """ Generic format class. """ def __init__(self, doc): diff --git a/librarian/formats/cover/__init__.py b/librarian/formats/cover/__init__.py index b9b515a..d410058 100644 --- a/librarian/formats/cover/__init__.py +++ b/librarian/formats/cover/__init__.py @@ -4,9 +4,9 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import re -from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance +from PIL import Image, ImageFont, ImageDraw, ImageFilter from StringIO import StringIO -from librarian import DCNS, URLOpener +from librarian import DCNS from librarian.output import OutputFile from librarian.utils import get_resource from librarian.formats import Format @@ -143,6 +143,7 @@ class Cover(Format): } def __init__(self, doc, format=None, width=None, height=None): + super(Cover, self).__init__(doc) self.author = ", ".join(auth for auth in doc.meta.get(DCNS('creator'))) self.title = doc.meta.title() if format is not None: @@ -185,7 +186,8 @@ class Cover(Format): author_font = ImageFont.truetype( self.author_font_ttf, metr.author_font_size) - tbox.text(self.pretty_author(), self.author_color, author_font, + tbox.text( + self.pretty_author(), self.author_color, author_font, metr.author_lineskip, self.author_shadow) text_img = tbox.image() img.paste(text_img, (metr.author_margin_left, top), text_img) @@ -197,15 +199,16 @@ class Cover(Format): ) title_font = ImageFont.truetype( self.title_font_ttf, metr.title_font_size) - tbox.text(self.pretty_title(), self.title_color, title_font, + tbox.text( + self.pretty_title(), self.title_color, title_font, metr.title_lineskip, self.title_shadow) text_img = tbox.image() img.paste(text_img, (metr.title_margin_left, top), text_img) return img - imgstr = StringIO() - img.save(imgstr, format=self.format, quality=95) - OutputFile.from_string(imgstr.getvalue()) + # imgstr = StringIO() + # img.save(imgstr, format=self.format, quality=95) + # OutputFile.from_stringing(imgstr.getvalue()) def mime_type(self): return self.mime_types[self.format] diff --git a/librarian/formats/cover/evens/__init__.py b/librarian/formats/cover/evens/__init__.py index 4207d46..e470001 100644 --- a/librarian/formats/cover/evens/__init__.py +++ b/librarian/formats/cover/evens/__init__.py @@ -18,10 +18,6 @@ class EvensCover(Cover): title_top = 30 logo_bottom = 100 - def __init__(self, doc, format=None, width=None, height=None): - super(EvensCover, self).__init__(doc, format=format, width=width, height=height) - self.doc = doc - def set_images(self, ctx): cover_url = self.doc.meta.get(DCNS('relation.coverimage.url'))[0] if cover_url.startswith('file://'): diff --git a/librarian/formats/cover/wolnelektury/__init__.py b/librarian/formats/cover/wolnelektury/__init__.py index 4218770..0824d51 100644 --- a/librarian/formats/cover/wolnelektury/__init__.py +++ b/librarian/formats/cover/wolnelektury/__init__.py @@ -4,6 +4,8 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # from PIL import Image, ImageFont, ImageDraw +from PIL import ImageEnhance + from librarian.utils import get_resource from .. import Cover, Metric, TextBox @@ -60,15 +62,15 @@ class WLCover(Cover): self.epoch = doc.meta.get_one('epoch') self.with_logo = with_logo # TODO - if doc.meta.get('cover_url'): - url = doc.meta.get('cover_url')[0] - bg_src = None - if bg_src is None: - bg_src = URLOpener().open(url) - self.background_img = StringIO(bg_src.read()) - bg_src.close() - else: - self.background_img = self.default_background + # if doc.meta.get('cover_url'): + # url = doc.meta.get('cover_url')[0] + # bg_src = None + # if bg_src is None: + # bg_src = URLOpener().open(url) + # self.background_img = StringIO(bg_src.read()) + # bg_src.close() + # else: + self.background_img = self.default_background def pretty_author(self): return self.author.upper() @@ -110,26 +112,29 @@ class WLCover(Cover): box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y) author_font = ImageFont.truetype( self.author_font_ttf, metr.author_font_size) - box.text(self.pretty_author(), - font=author_font, - line_height=metr.author_lineskip, - color=self.author_color, - shadow_color=self.author_shadow, - ) + box.text( + self.pretty_author(), + font=author_font, + line_height=metr.author_lineskip, + color=self.author_color, + shadow_color=self.author_shadow, + ) box.skip(metr.box_above_line) - box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height), - fill=self.author_color, width=metr.box_line_width) + box.draw.line( + (metr.box_line_left, box.height, metr.box_line_right, box.height), + fill=self.author_color, width=metr.box_line_width) box.skip(metr.box_below_line) title_font = ImageFont.truetype( self.title_font_ttf, metr.title_font_size) - box.text(self.pretty_title(), - line_height=metr.title_lineskip, - font=title_font, - color=epoch_color, - shadow_color=self.title_shadow, - ) + box.text( + self.pretty_title(), + line_height=metr.title_lineskip, + font=title_font, + color=epoch_color, + shadow_color=self.title_shadow, + ) if self.with_logo: logo = Image.open(get_resource('res/wl-logo-mono.png')) @@ -151,15 +156,13 @@ class WLCover(Cover): # center box_top = (metr.height - box_img.size[1]) / 2 - box_left = metr.bar_width + (metr.width - metr.bar_width - - box_img.size[0]) / 2 - draw.rectangle((box_left, box_top, - box_left + box_img.size[0], box_top + box_img.size[1]), - fill='#fff') + box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) / 2 + draw.rectangle((box_left, box_top, box_left + box_img.size[0], box_top + box_img.size[1]), fill='#fff') img.paste(box_img, (box_left, box_top), box_img) if self.with_logo: - img.paste(logo, + img.paste( + logo, (box_left + (box_img.size[0] - logo.size[0]) / 2, box_top + box_img.size[1] - metr.box_padding_y - logo.size[1]), mask=logo) diff --git a/librarian/formats/html/__init__.py b/librarian/formats/html/__init__.py index 2cf2601..ae6470a 100644 --- a/librarian/formats/html/__init__.py +++ b/librarian/formats/html/__init__.py @@ -40,7 +40,7 @@ class HtmlFormat(Format): t.find('.//div[@id="content"]').extend( self.render(self.doc.edoc.getroot(), ctx)) - #t.find('.//div[@id="toc"]').append(ctx.toc.render()) + # t.find('.//div[@id="toc"]').append(ctx.toc.render()) t.find('.//div[@id="footnotes"]').extend(ctx.footnotes.output) return OutputFile.from_string(etree.tostring( @@ -81,7 +81,8 @@ class Footnotes(object): def append(self, item): self.counter += 1 - e = etree.Element("a", + e = etree.Element( + "a", href="#footnote-anchor-%d" % self.counter, id="footnote-%d" % self.counter, style="float:left;margin-right:1em") @@ -89,7 +90,8 @@ class Footnotes(object): e.tail = " " self.output.append(e) self.output.extend(item) - anchor = etree.Element("a", + anchor = etree.Element( + "a", id="footnote-anchor-%d" % self.counter, href="#footnote-%d" % self.counter) anchor.text = "[%d]" % self.counter @@ -131,6 +133,7 @@ class TOC(object): HtmlFormat.renderers.register(core.Aside, None, NaturalText('aside')) HtmlFormat.renderers.register(core.Aside, 'comment', Silent()) + class AsideFootnote(NaturalText): def render(self, element, ctx): output = super(AsideFootnote, self).render(element, ctx) @@ -150,21 +153,23 @@ class Header(NaturalText): else: root[0].tag = 'h2' if root[0].text: - d = etree.SubElement(root[0], 'a', {'id': root[0].text, 'style': 'pointer: hand; color:#ddd; font-size:.8em'}) - #d.text = "per" + d = etree.SubElement( + root[0], 'a', {'id': root[0].text, 'style': 'pointer: hand; color:#ddd; font-size:.8em'}) + # d.text = "per" return root - + HtmlFormat.renderers.register(core.Header, None, Header('h1')) HtmlFormat.renderers.register(core.Div, None, NaturalText('div')) + class DivDefined(NaturalText): def render(self, element, ctx): output = super(DivDefined, self).render(element, ctx) output[0].text = (output[0].text or '') + ':' - output[0].attrib['id'] = output[0].text # not so cool? + output[0].attrib['id'] = output[0].text # not so cool? return output HtmlFormat.renderers.register(core.Div, 'defined', DivDefined('dt', {'style': 'display: inline-block'})) @@ -186,11 +191,12 @@ HtmlFormat.renderers.register(core.Div, 'item', NaturalText('li')) HtmlFormat.renderers.register(core.Div, 'list', NaturalText('ul')) HtmlFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol')) + class DivListDefinitions(NaturalText): def render(self, element, ctx): output = super(DivListDefinitions, self).render(element, ctx) - #if ctx.toc_level > 2: - # output[0].attrib['style'] = 'float: right' + # if ctx.toc_level > 2: + # output[0].attrib['style'] = 'float: right' return output HtmlFormat.renderers.register(core.Div, 'list.definitions', DivListDefinitions('ul')) @@ -215,6 +221,7 @@ HtmlFormat.renderers.register(core.Span, 'cite.code', LiteralText('code')) HtmlFormat.renderers.register(core.Span, 'emph', NaturalText('em')) HtmlFormat.renderers.register(core.Span, 'emp', NaturalText('strong')) + class SpanUri(LiteralText): def render(self, element, ctx): root = super(SpanUri, self).render(element, ctx) @@ -222,6 +229,7 @@ class SpanUri(LiteralText): return root HtmlFormat.renderers.register(core.Span, 'uri', SpanUri('a')) + class SpanLink(LiteralText): def render(self, element, ctx): root = super(SpanLink, self).render(element, ctx) @@ -231,4 +239,3 @@ class SpanLink(LiteralText): root[0].attrib['href'] = src return root HtmlFormat.renderers.register(core.Span, 'link', SpanLink('a')) - diff --git a/librarian/formats/pdf/__init__.py b/librarian/formats/pdf/__init__.py index 298db09..e8e936b 100644 --- a/librarian/formats/pdf/__init__.py +++ b/librarian/formats/pdf/__init__.py @@ -4,7 +4,6 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import os -import re import shutil from subprocess import call, PIPE from tempfile import NamedTemporaryFile, mkdtemp @@ -16,7 +15,7 @@ from librarian import DCNS, XMLNamespace from librarian.formats import Format from librarian.output import OutputFile from librarian.renderers import Register, TreeRenderer -from librarian.utils import Context, get_resource, extend_element +from librarian.utils import Context, get_resource from librarian import core from PIL import Image from ..html import Silent @@ -80,7 +79,8 @@ class PdfFormat(Format): call(['convert', save_as + '_.' + ext, save_as]) else: # JPEGs with bad density will break LaTeX with 'Dimension too large'. - r = call(['convert', '-units', 'PixelsPerInch', save_as + '_.' + ext, '-density', '300', save_as + '_2.' + ext]) + r = call(['convert', '-units', 'PixelsPerInch', save_as + '_.' + ext, '-density', '300', + save_as + '_2.' + ext]) if r: shutil.move(save_as + '_.' + ext, save_as) else: @@ -124,7 +124,7 @@ class PdfFormat(Format): img = Image.open(self.get_file(build_ctx, 'cover.png')) size = img.size - if (size[1] > size[0]): + if size[1] > size[0]: img = img.crop((0, 0, size[0], size[0])) img.save(self.get_file(build_ctx, 'cover.png'), format=img.format, quality=90) size = img.size @@ -146,14 +146,14 @@ class PdfFormat(Format): p[0].append(texml_cmd("noindent")) p[0].append(texml_cmd("nohyphens", author)) p[0].append(texml_cmd("vspace", "1em")) - #p[0][-1].tail = author + # p[0][-1].tail = author if title: p = texml_cmd("par", "") grp.append(p) p[0].append(texml_cmd("Huge")) p[0].append(texml_cmd("noindent")) p[0].append(texml_cmd("nohyphens", title)) - #p[0][-1].tail = title + # p[0][-1].tail = title doc.append(texml_cmd("vfill")) doc.append(texml_cmd("vfill")) @@ -161,7 +161,7 @@ class PdfFormat(Format): cover_logo_url = getattr(build_ctx, 'cover_logo', None) # TEST # TODO: convert - #cover_logo_url = 'http://milpeer.mdrn.pl/media/dynamic/people/logo/nowoczesnapolska.org.pl.png' + # cover_logo_url = 'http://milpeer.mdrn.pl/media/dynamic/people/logo/nowoczesnapolska.org.pl.png' if cover_logo_url: self.add_file(build_ctx, 'coverlogo.png', cover_logo_url, image=True) size = Image.open(self.get_file(build_ctx, 'coverlogo.png')).size @@ -183,11 +183,10 @@ class PdfFormat(Format): doc.append(texml_cmd("vspace", "1em")) for m, f in ( - ('Publisher: ', DCNS('publisher')), - ('Rights: ', DCNS('rights')), - ('Intended audience: ', DCNS('audience')), - ('', DCNS('description')), - ): + ('Publisher: ', DCNS('publisher')), + ('Rights: ', DCNS('rights')), + ('Intended audience: ', DCNS('audience')), + ('', DCNS('description'))): v = self.doc.meta.get_one(f) if v: e = texml_cmd("par", "") @@ -196,7 +195,6 @@ class PdfFormat(Format): doc.append(e) doc.append(texml_cmd("vspace", "1em")) - e = texml_cmd("par", "") e[0].append(texml_cmd("noindent")) e[0][0].tail = "Resource prepared using " @@ -205,7 +203,7 @@ class PdfFormat(Format): doc.append(e) source_url = getattr(build_ctx, 'source_url', None) - #source_url = 'http://milpeer.mdrn.pl/documents/27/' + # source_url = 'http://milpeer.mdrn.pl/documents/27/' if source_url: e = texml_cmd("par", "") doc.append(e) @@ -220,16 +218,14 @@ class PdfFormat(Format): texml = self.get_texml(ctx) tex_path = os.path.join(ctx.workdir, 'doc.tex') with open(tex_path, 'w') as fout: - #print etree.tostring(texml) + # print etree.tostring(texml) process(StringIO(etree.tostring(texml)), fout, 'utf-8') - #~ if self.save_tex: - #~ shutil.copy(tex_path, self.save_tex) - + # if self.save_tex: + # shutil.copy(tex_path, self.save_tex) - - #for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']: - # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp) + # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']: + # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp) return ctx.workdir def build(self, ctx=None, verbose=False): @@ -247,9 +243,9 @@ class PdfFormat(Format): else: for i in range(self.tex_passes): p = call(['xelatex', '-interaction=batchmode', tex_path], - stdout=PIPE, stderr=PIPE) + stdout=PIPE, stderr=PIPE) if p: - #raise ParseError("Error parsing .tex file: %s" % tex_path) + # raise ParseError("Error parsing .tex file: %s" % tex_path) raise RuntimeError("Error parsing .tex file: %s" % tex_path) if cwd is not None: @@ -266,23 +262,24 @@ class PdfFormat(Format): return self.renderers.get_for(element).render(element, ctx) - - class CmdRenderer(TreeRenderer): def parms(self): return [] + def container(self): root = etree.Element(self.root_name) root.append(texml_cmd(self.tag_name, *(self.parms() + [""]))) inner = root[0][-1] return root, inner + class EnvRenderer(TreeRenderer): def container(self): root = etree.Element(self.root_name) inner = etree.SubElement(root, 'env', name=self.tag_name) return root, inner + class GroupRenderer(CmdRenderer): def container(self): root = etree.Element(self.root_name) @@ -311,6 +308,7 @@ PdfFormat.renderers.register(core.Header, None, CmdRenderer('section*')) PdfFormat.renderers.register(core.Div, None, CmdRenderer('par')) + class ImgRenderer(CmdRenderer): def parms(self): return ["", ""] @@ -324,8 +322,8 @@ class ImgRenderer(CmdRenderer): root[0][0].text = 'f%d.png' % nr try: size = Image.open(ctx.format.get_file(ctx, 'f%d.png' % nr)).size - except IOError: # not an image - del root[0]; + except IOError: # not an image + del root[0] return root root[0][1].text = '15cm' root[0][2].text = '%fcm' % (15.0 * size[1] / size[0]) @@ -340,21 +338,22 @@ PdfFormat.renderers.register(core.Div, 'list', EnvRenderer('itemize')) PdfFormat.renderers.register(core.Div, 'list.enum', EnvRenderer('enumerate')) - PdfFormat.renderers.register(core.Span, None, TreeRenderer()) PdfFormat.renderers.register(core.Span, 'cite', CmdRenderer('emph')) PdfFormat.renderers.register(core.Span, 'cite.code', CmdRenderer('texttt')) PdfFormat.renderers.register(core.Span, 'emp', CmdRenderer('textbf')) PdfFormat.renderers.register(core.Span, 'emph', CmdRenderer('emph')) + class SpanUri(CmdRenderer): def parms(self): return [""] + def render(self, element, ctx): root = super(SpanUri, self).render(element, ctx) src = element.text if src.startswith('file://'): - src = ctx.files_path + src[7:] + src = ctx.files_path + src[7:] root[0][0].text = src return root PdfFormat.renderers.register(core.Span, 'uri', SpanUri('href')) @@ -363,19 +362,17 @@ PdfFormat.renderers.register(core.Span, 'uri', SpanUri('href')) class SpanLink(CmdRenderer): def parms(self): return [""] + def render(self, element, ctx): root = super(SpanLink, self).render(element, ctx) src = element.attrib.get('href', '') if src.startswith('file://'): - src = ctx.files_path + src[7:] + src = ctx.files_path + src[7:] root[0][0].text = src return root PdfFormat.renderers.register(core.Span, 'link', SpanLink('href')) - - PdfFormat.renderers.register(core.Aside, None, TreeRenderer()) PdfFormat.renderers.register(core.Aside, 'editorial', CmdRenderer('editorialpage')) PdfFormat.renderers.register(core.Aside, 'comment', Silent()) - diff --git a/librarian/meta.py b/librarian/meta.py index 5b50d92..a16b73e 100755 --- a/librarian/meta.py +++ b/librarian/meta.py @@ -53,7 +53,6 @@ class Metadata(etree.ElementBase): return values[0] else: return None - # Specials. diff --git a/librarian/mobi.py b/librarian/mobi.py deleted file mode 100644 index d98b838..0000000 --- a/librarian/mobi.py +++ /dev/null @@ -1,60 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -from copy import deepcopy -import os -import subprocess -from tempfile import NamedTemporaryFile - -from librarian import OutputFile -from librarian.cover import WLCover -from librarian import get_resource - - -def transform(wldoc, verbose=False, - sample=None, cover=None, flags=None): - """ produces a MOBI file - - wldoc: a WLDocument - sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover factory overriding default - flags: less-advertising, - """ - - document = deepcopy(wldoc) - del wldoc - book_info = document.book_info - - # provide a cover by default - if not cover: - cover = WLCover - cover_file = NamedTemporaryFile(suffix='.png', delete=False) - bound_cover = cover(book_info) - bound_cover.save(cover_file) - - if bound_cover.uses_dc_cover: - if document.book_info.cover_by: - document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) - if document.book_info.cover_source: - document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) - - if not flags: - flags = [] - flags = list(flags) + ['without-fonts'] - epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True, - flags=flags, style=get_resource('mobi/style.css')) - - if verbose: - kwargs = {} - else: - devnull = open("/dev/null", 'w') - kwargs = {"stdout": devnull, "stderr": devnull} - - output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False) - output_file.close() - subprocess.check_call(['ebook-convert', epub.get_filename(), output_file.name, - '--no-inline-toc', '--cover=%s' % cover_file.name], **kwargs) - os.unlink(cover_file.name) - return OutputFile.from_filename(output_file.name) \ No newline at end of file diff --git a/librarian/packagers.py b/librarian/packagers.py deleted file mode 100644 index ddfd7c8..0000000 --- a/librarian/packagers.py +++ /dev/null @@ -1,156 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -import os -from copy import deepcopy -from lxml import etree -from librarian import pdf, epub, DirDocProvider, ParseError, cover -from librarian.parser import WLDocument - - -class Packager(object): - cover = None - flags = None - - @classmethod - def prepare_file(cls, main_input, output_dir, verbose=False): - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - slug, ext = os.path.splitext(fname) - - if output_dir != '': - try: - os.makedirs(output_dir) - except: - pass - outfile = os.path.join(output_dir, slug + '.' + cls.ext) - - doc = WLDocument.from_file(main_input, provider=provider) - output_file = cls.converter.transform(doc, - cover=cls.cover, flags=cls.flags) - doc.save_output_file(output_file, output_path=outfile) - - - @classmethod - def prepare(cls, input_filenames, output_dir='', verbose=False): - try: - for main_input in input_filenames: - if verbose: - print main_input - cls.prepare_file(main_input, output_dir, verbose) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e.message - } - - -class EpubPackager(Packager): - converter = epub - ext = 'epub' - -class PdfPackager(Packager): - converter = pdf - ext = 'pdf' - - -class GandalfEpubPackager(EpubPackager): - cover = cover.GandalfCover - -class GandalfPdfPackager(PdfPackager): - cover = cover.GandalfCover - -class BookotekaEpubPackager(EpubPackager): - cover = cover.BookotekaCover - -class PrestigioEpubPackager(EpubPackager): - cover = cover.PrestigioCover - flags = ('less-advertising',) - -class PrestigioPdfPackager(PdfPackager): - cover = cover.PrestigioCover - flags = ('less-advertising',) - - -class VirtualoPackager(Packager): - @staticmethod - def utf_trunc(text, limit): - """ truncates text to at most `limit' bytes in utf-8 """ - if text is None: - return text - if len(text.encode('utf-8')) > limit: - newlimit = limit - 3 - while len(text.encode('utf-8')) > newlimit: - text = text[:(newlimit - len(text.encode('utf-8'))) / 4] - text += '...' - return text - - @classmethod - def prepare(cls, input_filenames, output_dir='', verbose=False): - xml = etree.fromstring(""" - """) - product = etree.fromstring(""" - - - - - - - Jan - Kowalski - - - 0.0 - PL - """) - - try: - for main_input in input_filenames: - if verbose: - print main_input - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - slug, ext = os.path.splitext(fname) - - outfile_dir = os.path.join(output_dir, slug) - os.makedirs(os.path.join(output_dir, slug)) - - doc = WLDocument.from_file(main_input, provider=provider) - info = doc.book_info - - product_elem = deepcopy(product) - product_elem[0].text = cls.utf_trunc(slug, 100) - product_elem[1].text = cls.utf_trunc(info.title, 255) - product_elem[2].text = cls.utf_trunc(info.description, 255) - product_elem[3].text = cls.utf_trunc(info.source_name, 3000) - product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100) - product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100) - xml.append(product_elem) - - cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg')) - outfile = os.path.join(outfile_dir, '1.epub') - outfile_sample = os.path.join(outfile_dir, '1.sample.epub') - doc.save_output_file(doc.as_epub(), - output_path=outfile) - doc.save_output_file(doc.as_epub(doc, sample=25), - output_path=outfile_sample) - outfile = os.path.join(outfile_dir, '1.mobi') - outfile_sample = os.path.join(outfile_dir, '1.sample.mobi') - doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), - output_path=outfile) - doc.save_output_file( - doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), - output_path=outfile_sample) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e.message - } - - xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w') - xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8')) - xml_file.close() diff --git a/librarian/parser.py b/librarian/parser.py index a0b8a7f..7b48624 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -11,7 +11,7 @@ from . import core, meta class SSTParser(etree.XMLParser): """ XML parser using relevant element classes. """ def __init__(self): - super(SSTParser, self).__init__(remove_blank_text=False) + super(SSTParser, self).__init__() lookup = etree.ElementNamespaceClassLookup() self.set_element_class_lookup(lookup) diff --git a/librarian/pdf.py b/librarian/pdf.py deleted file mode 100644 index 9fb92b1..0000000 --- a/librarian/pdf.py +++ /dev/null @@ -1,321 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -"""PDF creation library. - -Creates one big XML from the book and its children, converts it to LaTeX -with TeXML, then runs it by XeLaTeX. - -""" -from __future__ import with_statement -import os -import os.path -import shutil -from StringIO import StringIO -from tempfile import mkdtemp, NamedTemporaryFile -import re -from copy import deepcopy -from subprocess import call, PIPE - -from Texml.processor import process -from lxml import etree -from lxml.etree import XMLSyntaxError, XSLTApplyError - -from librarian.dcparser import Person -from librarian.parser import WLDocument -from librarian import ParseError, DCNS, get_resource, OutputFile -from librarian import functions -from librarian.cover import WLCover - - -functions.reg_substitute_entities() -functions.reg_strip() -functions.reg_starts_white() -functions.reg_ends_white() -functions.reg_texcommand() - -STYLESHEETS = { - 'wl2tex': 'pdf/wl2tex.xslt', -} - -#CUSTOMIZATIONS = [ -# 'nofootnotes', -# 'nothemes', -# 'defaultleading', -# 'onehalfleading', -# 'doubleleading', -# 'nowlfont', -# ] - -def insert_tags(doc, split_re, tagname, exclude=None): - """ inserts for every occurence of `split_re' in text nodes in the `doc' tree - - >>> t = etree.fromstring('A-B-CX-Y-Z'); - >>> insert_tags(t, re.compile('-'), 'd'); - >>> print etree.tostring(t) - ABCXYZ - """ - - for elem in doc.iter(tag=etree.Element): - if exclude and elem.tag in exclude: - continue - if elem.text: - chunks = split_re.split(elem.text) - while len(chunks) > 1: - ins = etree.Element(tagname) - ins.tail = chunks.pop() - elem.insert(0, ins) - elem.text = chunks.pop(0) - if elem.tail: - chunks = split_re.split(elem.tail) - parent = elem.getparent() - ins_index = parent.index(elem) + 1 - while len(chunks) > 1: - ins = etree.Element(tagname) - ins.tail = chunks.pop() - parent.insert(ins_index, ins) - elem.tail = chunks.pop(0) - - -def substitute_hyphens(doc): - insert_tags(doc, - re.compile("(?<=[^-\s])-(?=[^-\s])"), - "dywiz", - exclude=[DCNS("identifier.url"), DCNS("rights.license")] - ) - - -def fix_hanging(doc): - insert_tags(doc, - re.compile("(?<=\s\w)\s+"), - "nbsp", - exclude=[DCNS("identifier.url"), DCNS("rights.license")] - ) - - -def move_motifs_inside(doc): - """ moves motifs to be into block elements """ - for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'): - for motif in master.xpath('motyw'): - for sib in motif.itersiblings(): - if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia', 'begin', 'end', 'motyw', 'extra', 'uwaga'): - # motif shouldn't have a tail - it would be untagged text - motif.tail = None - motif.getparent().remove(motif) - sib.insert(0, motif) - break - - -def hack_motifs(doc): - """ dirty hack for the marginpar-creates-orphans LaTeX problem - see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304 - - moves motifs in stanzas from first verse to second - and from next to last to last, then inserts negative vspace before them - """ - for motif in doc.findall('//strofa//motyw'): - # find relevant verse-level tag - verse, stanza = motif, motif.getparent() - while stanza is not None and stanza.tag != 'strofa': - verse, stanza = stanza, stanza.getparent() - breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True)) - breaks_after = sum(1 for i in verse.itersiblings('br')) - if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1: - move_by = 1 - if breaks_after == 2: - move_by += 1 - moved_motif = deepcopy(motif) - motif.tag = 'span' - motif.text = None - moved_motif.tail = None - moved_motif.set('moved', str(move_by)) - - for br in verse.itersiblings('br'): - if move_by > 1: - move_by -= 1 - continue - br.addnext(moved_motif) - break - - -def parse_creator(doc): - """Generates readable versions of creator and translator tags. - - Finds all dc:creator and dc.contributor.translator tags - and adds *_parsed versions with forenames first. - """ - for person in doc.xpath("|".join('//dc:'+(tag) for tag in ( - 'creator', 'contributor.translator')), - namespaces = {'dc': str(DCNS)})[::-1]: - if not person.text: - continue - p = Person.from_text(person.text) - person_parsed = deepcopy(person) - person_parsed.tag = person.tag + '_parsed' - person_parsed.set('sortkey', person.text) - person_parsed.text = p.readable() - person.getparent().insert(0, person_parsed) - - -def get_stylesheet(name): - return get_resource(STYLESHEETS[name]) - - -def package_available(package, args='', verbose=False): - """ check if a verion of a latex package accepting given args is available """ - tempdir = mkdtemp('-wl2pdf-test') - fpath = os.path.join(tempdir, 'test.tex') - f = open(fpath, 'w') - f.write(r""" - \documentclass{wl} - \usepackage[%s]{%s} - \begin{document} - \end{document} - """ % (args, package)) - f.close() - if verbose: - p = call(['xelatex', '-output-directory', tempdir, fpath]) - else: - p = call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE) - shutil.rmtree(tempdir) - return p == 0 - - -def transform(wldoc, verbose=False, save_tex=None, morefloats=None, - cover=None, flags=None, customizations=None): - """ produces a PDF file with XeLaTeX - - wldoc: a WLDocument - verbose: prints all output from LaTeX - save_tex: path to save the intermediary LaTeX file to - morefloats (old/new/none): force specific morefloats - cover: a cover.Cover factory or True for default - flags: less-advertising, - customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class) - """ - - # Parse XSLT - try: - book_info = wldoc.book_info - document = load_including_children(wldoc) - root = document.edoc.getroot() - - if cover: - if cover is True: - cover = WLCover - bound_cover = cover(book_info) - root.set('data-cover-width', str(bound_cover.width)) - root.set('data-cover-height', str(bound_cover.height)) - if bound_cover.uses_dc_cover: - if book_info.cover_by: - root.set('data-cover-by', book_info.cover_by) - if book_info.cover_source: - root.set('data-cover-source', - book_info.cover_source) - if flags: - for flag in flags: - root.set('flag-' + flag, 'yes') - - # check for LaTeX packages - if morefloats: - root.set('morefloats', morefloats.lower()) - elif package_available('morefloats', 'maxfloats=19'): - root.set('morefloats', 'new') - - # add customizations - if customizations is not None: - root.set('customizations', u','.join(customizations)) - - # add editors info - root.set('editors', u', '.join(sorted( - editor.readable() for editor in document.editors()))) - - # hack the tree - move_motifs_inside(document.edoc) - hack_motifs(document.edoc) - parse_creator(document.edoc) - substitute_hyphens(document.edoc) - fix_hanging(document.edoc) - - # wl -> TeXML - style_filename = get_stylesheet("wl2tex") - style = etree.parse(style_filename) - - texml = document.transform(style) - - # TeXML -> LaTeX - temp = mkdtemp('-wl2pdf') - - if cover: - with open(os.path.join(temp, 'cover.png'), 'w') as f: - bound_cover.save(f) - - del document # no longer needed large object :) - - tex_path = os.path.join(temp, 'doc.tex') - fout = open(tex_path, 'w') - process(StringIO(texml), fout, 'utf-8') - fout.close() - del texml - - if save_tex: - shutil.copy(tex_path, save_tex) - - # LaTeX -> PDF - shutil.copy(get_resource('pdf/wl.cls'), temp) - shutil.copy(get_resource('res/wl-logo.png'), temp) - - try: - cwd = os.getcwd() - except OSError: - cwd = None - os.chdir(temp) - - if verbose: - p = call(['xelatex', tex_path]) - else: - p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE) - if p: - raise ParseError("Error parsing .tex file") - - if cwd is not None: - os.chdir(cwd) - - output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False) - pdf_path = os.path.join(temp, 'doc.pdf') - shutil.move(pdf_path, output_file.name) - shutil.rmtree(temp) - return OutputFile.from_filename(output_file.name) - - except (XMLSyntaxError, XSLTApplyError), e: - raise ParseError(e) - - -def load_including_children(wldoc=None, provider=None, uri=None): - """ Makes one big xml file with children inserted at end. - - Either wldoc or provider and URI must be provided. - """ - - if uri and provider: - f = provider.by_uri(uri) - text = f.read().decode('utf-8') - f.close() - elif wldoc is not None: - text = etree.tostring(wldoc.edoc, encoding=unicode) - provider = wldoc.provider - else: - raise ValueError('Neither a WLDocument, nor provider and URI were provided.') - - text = re.sub(ur"([\u0400-\u04ff]+)", ur"\1", text) - - document = WLDocument.from_string(text, - parse_dublincore=True, provider=provider) - document.swap_endlines() - - for child_uri in document.book_info.parts: - child = load_including_children(provider=provider, uri=child_uri) - document.edoc.getroot().append(child.edoc.getroot()) - return document diff --git a/librarian/renderers.py b/librarian/renderers.py index 59ed8a4..fd4ec16 100755 --- a/librarian/renderers.py +++ b/librarian/renderers.py @@ -71,7 +71,6 @@ class TreeRenderer(Renderer): return root - class Register(object): """ Class-renderer register. diff --git a/librarian/text.py b/librarian/text.py deleted file mode 100644 index d99e7cf..0000000 --- a/librarian/text.py +++ /dev/null @@ -1,91 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -import copy -from librarian import functions, OutputFile -from lxml import etree -import os - - -functions.reg_substitute_entities() -functions.reg_wrap_words() -functions.reg_strip() -functions.reg_person_name() - -TEMPLATE = u"""\ -%(text)s - - ------ -Ta lektura, podobnie jak tysiące innych, dostępna jest na stronie wolnelektury.pl. -Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dostępna jest na stronie %(url)s. - -Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowoczesna Polska. - -%(license_description)s.%(source)s - -%(description)s%(contributors)s -""" - -def transform(wldoc, flags=None, **options): - """ - Transforms input_file in XML to output_file in TXT. - possible flags: raw-text, - """ - # Parse XSLT - style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt') - style = etree.parse(style_filename) - - document = copy.deepcopy(wldoc) - del wldoc - document.swap_endlines() - - if flags: - for flag in flags: - document.edoc.getroot().set(flag, 'yes') - - result = document.transform(style, **options) - - if not flags or 'raw-text' not in flags: - if document.book_info: - parsed_dc = document.book_info - description = parsed_dc.description - url = document.book_info.url - - license_description = parsed_dc.license_description - license = parsed_dc.license - if license: - license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (license_description, license) - else: - license_description = u"Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)" - - source = parsed_dc.source_name - if source: - source = "\n\nTekst opracowany na podstawie: " + source - else: - source = '' - - contributors = ', '.join(person.readable() for person in - sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p))) - if contributors: - contributors = "\n\nOpracowanie redakcyjne i przypisy: %s" % contributors - else: - description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).' - url = '*' * 10 - license = "" - license_description = "" - source = "" - contributors = "" - return OutputFile.from_string((TEMPLATE % { - 'description': description, - 'url': url, - 'license_description': license_description, - 'text': unicode(result), - 'source': source, - 'contributors': contributors, - }).encode('utf-8')) - else: - return OutputFile.from_string(unicode(result).encode('utf-8')) - diff --git a/librarian/utils.py b/librarian/utils.py index a2e3522..04b6d69 100755 --- a/librarian/utils.py +++ b/librarian/utils.py @@ -26,7 +26,7 @@ class Context(object): elif self._upctx is not None: return getattr(self._upctx, name) else: - raise AttributeError, "'%s' object has no attribute '%s'" % (type(self), name) + raise AttributeError("'%s' object has no attribute '%s'" % (type(self), name)) def __setattr__(self, name, value): try: @@ -44,7 +44,7 @@ class Context(object): class XMLNamespace(object): - '''A handy structure to repsent names in an XML namespace.''' + """A handy structure to repsent names in an XML namespace.""" def __init__(self, uri): self.uri = uri diff --git a/scripts/book2partner b/scripts/book2partner deleted file mode 100755 index 4b84c2f..0000000 --- a/scripts/book2partner +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -import optparse - -from librarian import packagers - - -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Prepare SOURCE files for a partner.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='', - help='specifies the directory for output') - parser.add_option('--bookoteka', action='store_true', dest='bookoteka', default=False, - help='prepare files for Bookoteka') - parser.add_option('--gandalf', action='store_true', dest='gandalf', default=False, - help='prepare EPUB files for Gandalf') - parser.add_option('--gandalf-pdf', action='store_true', dest='gandalf_pdf', default=False, - help='prepare PDF files for Gandalf') - parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False, - help='prepare files for Virtualo API') - parser.add_option('--prestigio', action='store_true', dest='prestigio', default=False, - help='prepare files for Prestigio') - parser.add_option('--prestigio-pdf', action='store_true', dest='prestigio_pdf', default=False, - help='prepare PDF files for Prestigio') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - if options.bookoteka: - packagers.BookotekaEpubPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.gandalf: - packagers.GandalfEpubPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.gandalf_pdf: - packagers.GandalfPdfPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.virtualo: - packagers.VirtualoPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.prestigio: - packagers.PrestigioEpubPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.prestigio_pdf: - packagers.PrestigioPdfPackager.prepare(input_filenames, options.output_dir, options.verbose) diff --git a/setup.py b/setup.py index a0e4e53..8c4240c 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,6 @@ setup( 'scripts/book2mobi', 'scripts/book2pdf', 'scripts/book2fb2', - 'scripts/book2partner', 'scripts/book2cover', 'scripts/bookfragments', 'scripts/genslugs'], diff --git a/tests/test_dcparser.py b/tests/test_dcparser.py deleted file mode 100644 index ee29bc9..0000000 --- a/tests/test_dcparser.py +++ /dev/null @@ -1,48 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -from librarian import dcparser -from lxml import etree -from nose.tools import * -from os.path import splitext -from tests.utils import get_all_fixtures -import codecs - - -def check_dcparser(xml_file, result_file): - xml = file(xml_file).read() - result = codecs.open(result_file, encoding='utf-8').read() - info = dcparser.BookInfo.from_string(xml).to_dict() - should_be = eval(result) - for key in should_be: - assert_equals(info[key], should_be[key]) - - -def test_dcparser(): - for fixture in get_all_fixtures('dcparser', '*.xml'): - base_name = splitext(fixture)[0] - yield check_dcparser, fixture, base_name + '.out' - - -def check_serialize(xml_file): - xml = file(xml_file).read() - info = dcparser.BookInfo.from_string(xml) - - # serialize - serialized = etree.tostring(info.to_etree(), encoding=unicode).encode('utf-8') - # then parse again - info_bis = dcparser.BookInfo.from_string(serialized) - - # check if they are the same - for key in vars(info): - assert_equals(getattr(info, key), getattr(info_bis, key)) - for key in vars(info_bis): - assert_equals(getattr(info, key), getattr(info_bis, key)) - - -def test_serialize(): - for fixture in get_all_fixtures('dcparser', '*.xml'): - yield check_serialize, fixture - diff --git a/tests/test_epub.py b/tests/test_epub.py deleted file mode 100644 index faa76e7..0000000 --- a/tests/test_epub.py +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -from zipfile import ZipFile -from lxml import html -from nose.tools import * -from librarian import DirDocProvider -from librarian.parser import WLDocument -from tests.utils import get_fixture - - -def test_transform(): - epub = WLDocument.from_file( - get_fixture('text', 'asnyk_zbior.xml'), - provider=DirDocProvider(get_fixture('text', '')) - ).as_epub(flags=['without_fonts']).get_file() - zipf = ZipFile(epub) - - # Check contributor list. - last = zipf.open('OPS/last.html') - tree = html.parse(last) - editors_attribution = False - for par in tree.findall("//p"): - if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'): - editors_attribution = True - assert_equal(par.text.rstrip(), - u'Opracowanie redakcyjne i przypisy: ' - u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.') - assert_true(editors_attribution) diff --git a/tests/test_html.py b/tests/test_html.py deleted file mode 100644 index 51d6acd..0000000 --- a/tests/test_html.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -from librarian import NoDublinCore -from librarian.parser import WLDocument -from nose.tools import * -from utils import get_fixture - - -def test_transform(): - expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html') - - html = WLDocument.from_file( - get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml') - ).as_html().get_string() - - assert_equal(html, file(expected_output_file_path).read()) - - -@raises(NoDublinCore) -def test_no_dublincore(): - WLDocument.from_file( - get_fixture('text', 'asnyk_miedzy_nami_nodc.xml') - ).as_html() - - -def test_passing_parse_dublincore_to_transform(): - """Passing parse_dublincore=False to transform omits DublinCore parsing.""" - WLDocument.from_file( - get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'), - parse_dublincore=False, - ).as_html() - -def test_empty(): - assert not WLDocument.from_string( - '', - parse_dublincore=False, - ).as_html() diff --git a/tests/test_pdf.py b/tests/test_pdf.py deleted file mode 100644 index 75b73bc..0000000 --- a/tests/test_pdf.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -import re -from tempfile import NamedTemporaryFile -from nose.tools import * -from librarian import DirDocProvider -from librarian.parser import WLDocument -from utils import get_fixture - - -def test_transform(): - temp = NamedTemporaryFile(delete=False) - temp.close() - WLDocument.from_file( - get_fixture('text', 'asnyk_zbior.xml'), - provider=DirDocProvider(get_fixture('text', '')) - ).as_pdf(save_tex=temp.name) - tex = open(temp.name).read().decode('utf-8') - print tex - - # Check contributor list. - editors = re.search(ur'\\def\\editors\{' - ur'Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex) - assert_equal(editors.group(1), - u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska") diff --git a/tests/test_picture.py b/tests/test_picture.py deleted file mode 100644 index f64f624..0000000 --- a/tests/test_picture.py +++ /dev/null @@ -1,60 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -from librarian import picture, dcparser -from lxml import etree -from nose.tools import * -from os.path import splitext -from tests.utils import get_all_fixtures, get_fixture -import codecs -from os import path - -def test_wlpictureuri(): - uri = picture.WLPictureURI('http://wolnelektury.pl/katalog/obraz/angelus-novus') - -def check_load(xml_file): - pi = dcparser.parse(xml_file, picture.PictureInfo) - assert pi is not None - assert isinstance(pi, picture.PictureInfo) - - -def test_load(): - for fixture in get_all_fixtures('picture', '*.xml'): - yield check_load, fixture - - -def test_wlpicture(): - wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml'))) - pi = wlp.picture_info - - # from nose.tools import set_trace; set_trace() - assert pi.type[0] == u"Image" - assert pi.mime_type == u'image/jpeg' == wlp.mime_type - assert wlp.slug == 'angelus-novus' - - assert path.exists(wlp.image_path) - - f = wlp.image_file('r') - f.close() - -def test_picture_parts(): - wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml'))) - parts = list(wlp.partiter()) - assert len(parts) == 5, "there should be %d parts of the picture" % 5 - motifs = set() - names = set() - - print parts - for p in parts: - for m in p['themes']: - motifs.add(m) - for p in parts: - if p['object']: - names.add(p['object']) - - assert motifs == set([u'anioł historii', u'spojrzenie']), "missing motifs, got: %s" % motifs - assert names == set([u'obraz cały', u'skrzydło']), 'missing objects, got: %s' % names - - diff --git a/tests/test_text.py b/tests/test_text.py deleted file mode 100644 index 70dfb60..0000000 --- a/tests/test_text.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -from librarian import NoDublinCore -from librarian.parser import WLDocument -from nose.tools import * -from utils import get_fixture - - -def test_transform(): - expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt') - - text = WLDocument.from_file( - get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml') - ).as_text().get_string() - - assert_equal(text, file(expected_output_file_path).read()) - - -@raises(NoDublinCore) -def test_no_dublincore(): - WLDocument.from_file( - get_fixture('text', 'asnyk_miedzy_nami_nodc.xml') - ).as_text() - - -def test_passing_parse_dublincore_to_transform(): - """Passing parse_dublincore=False to the constructor omits DublinCore parsing.""" - WLDocument.from_file( - get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'), - parse_dublincore=False, - ).as_text() diff --git a/tests/utils.py b/tests/utils.py index 3b1f4f5..fc87532 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -6,7 +6,7 @@ from __future__ import with_statement from os.path import realpath, join, dirname import glob -import os + def get_fixture_dir(dir_name): """Returns path to fixtures directory dir_name.""" -- 2.20.1