From: Radek Czajka Date: Mon, 24 Sep 2012 12:43:39 +0000 (+0200) Subject: Merge remote-tracking branch 'mgorny/fb2' X-Git-Tag: 1.7~144 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/cba76114f031d47c93e1af947a350230cbef0a1f?hp=c51d51f2b1fd8d2eaa63224bb0aeac6b85ac3737 Merge remote-tracking branch 'mgorny/fb2' --- diff --git a/librarian/__init__.py b/librarian/__init__.py index 8a69d00..c46d5d1 100644 --- a/librarian/__init__.py +++ b/librarian/__init__.py @@ -8,15 +8,22 @@ from __future__ import with_statement import os import re import shutil +import urllib + class UnicodeException(Exception): def __str__(self): """ Dirty workaround for Python Unicode handling problems. """ - return self.message + return unicode(self).encode('utf-8') def __unicode__(self): """ Dirty workaround for Python Unicode handling problems. """ - return self.message + args = self.args[0] if len(self.args) == 1 else self.args + try: + message = unicode(args) + except UnicodeDecodeError: + message = unicode(args, encoding='utf-8', errors='ignore') + return message class ParseError(UnicodeException): pass @@ -267,3 +274,8 @@ class OutputFile(object): if not os.path.isdir(dirname): os.makedirs(dirname) shutil.copy(self.get_filename(), path) + + +class URLOpener(urllib.FancyURLopener): + version = 'FNP Librarian (http://github.com/fnp/librarian)' +urllib._urlopener = URLOpener() diff --git a/librarian/book2anything.py b/librarian/book2anything.py index 7660ec7..b8b8d27 100755 --- a/librarian/book2anything.py +++ b/librarian/book2anything.py @@ -99,7 +99,9 @@ class Book2Anything(object): # Add cover support, if any. if cls.uses_cover: if options.image_cache: - transform_args['cover'] = lambda x: WLCover(x, image_cache = options.image_cache) + def cover_class(*args, **kwargs): + return WLCover(image_cache=options.image_cache, *args, **kwargs) + transform_args['cover'] = cover_class elif not cls.cover_optional or options.with_cover: transform_args['cover'] = WLCover diff --git a/librarian/cover.py b/librarian/cover.py index 02d76f9..be34e26 100644 --- a/librarian/cover.py +++ b/librarian/cover.py @@ -5,7 +5,8 @@ # import re import Image, ImageFont, ImageDraw, ImageFilter -from librarian import get_resource +from StringIO import StringIO +from librarian import get_resource, OutputFile, URLOpener class TextBox(object): @@ -119,9 +120,11 @@ class Cover(object): 'PNG': 'image/png', } - def __init__(self, book_info): + def __init__(self, book_info, format=None): self.author = ", ".join(auth.readable() for auth in book_info.authors) self.title = book_info.title + if format is not None: + self.format = format def pretty_author(self): """Allows for decorating author's name.""" @@ -180,6 +183,11 @@ class Cover(object): def save(self, *args, **kwargs): return self.image().save(format=self.format, *args, **kwargs) + def output_file(self, *args, **kwargs): + imgstr = StringIO() + self.save(imgstr, *args, **kwargs) + return OutputFile.from_string(imgstr.getvalue()) + class WLCover(Cover): """Default Wolne Lektury cover generator.""" @@ -212,24 +220,21 @@ class WLCover(Cover): u'Współczesność': '#06393d', } - def __init__(self, book_info, image_cache=None): - super(WLCover, self).__init__(book_info) + def __init__(self, book_info, format=None, image_cache=None): + super(WLCover, self).__init__(book_info, format=format) self.kind = book_info.kind self.epoch = book_info.epoch if book_info.cover_url: - from urllib2 import urlopen - from StringIO import StringIO - url = book_info.cover_url bg_src = None if image_cache: from urllib import quote try: - bg_src = urlopen(image_cache + quote(url, safe="")) + bg_src = URLOpener().open(image_cache + quote(url, safe="")) except: pass if bg_src is None: - bg_src = urlopen(url) + bg_src = URLOpener().open(url) self.background_img = StringIO(bg_src.read()) bg_src.close() else: diff --git a/librarian/dcparser.py b/librarian/dcparser.py index 5a571ec..eddd8e5 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -115,10 +115,21 @@ class Field(object): except ValueError, e: raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message)) - def validate(self, fdict, strict=False): + def validate(self, fdict, fallbacks=None, strict=False): + if fallbacks is None: + fallbacks = {} if not fdict.has_key(self.uri): if not self.required: - f = self.default + # Accept single value for single fields and saliases. + if self.name in fallbacks: + if self.multiple: + f = fallbacks[self.name] + else: + f = [fallbacks[self.name]] + elif self.salias and self.salias in fallbacks: + f = [fallbacks[self.salias]] + else: + f = self.default else: raise ValidationError("Required field %s not found" % self.uri) else: @@ -224,7 +235,7 @@ class WorkInfo(object): return cls(desc.attrib, field_dict, *args, **kwargs) - def __init__(self, rdf_attrs, dc_fields, strict=False): + def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False): """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description. dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the given field. """ @@ -233,7 +244,8 @@ class WorkInfo(object): self.fmap = {} for field in self.FIELDS: - value = field.validate(dc_fields, strict=strict) + value = field.validate(dc_fields, fallbacks=fallbacks, + strict=strict) setattr(self, 'prop_' + field.name, value) self.fmap[field.name] = field if field.salias: self.fmap[field.salias] = field diff --git a/librarian/epub.py b/librarian/epub.py index 469ff40..bbeb3d7 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -368,6 +368,10 @@ def transform(wldoc, verbose=False, for flag in flags: document.edoc.getroot().set(flag, 'yes') + # add editors info + document.edoc.getroot().set('editors', u', '.join(sorted( + editor.readable() for editor in document.editors()))) + opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) guide = opf.find('.//' + OPFNS('guide')) diff --git a/librarian/epub/xsltLast.xsl b/librarian/epub/xsltLast.xsl index 751f97a..5288443 100644 --- a/librarian/epub/xsltLast.xsl +++ b/librarian/epub/xsltLast.xsl @@ -103,22 +103,13 @@ - +

Opracowanie redakcyjne i przypisy: - - - , - - . -

+ .

- -
-
- diff --git a/librarian/parser.py b/librarian/parser.py index 6343d21..a9e8c65 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -5,6 +5,7 @@ # from librarian import ValidationError, NoDublinCore, ParseError, NoProvider from librarian import RDFNS +from librarian.cover import WLCover from librarian import dcparser from xml.parsers.expat import ExpatError @@ -19,7 +20,8 @@ class WLDocument(object): LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE) provider = None - def __init__(self, edoc, parse_dublincore=True, provider=None, strict=False): + def __init__(self, edoc, parse_dublincore=True, provider=None, + strict=False, meta_fallbacks=None): self.edoc = edoc self.provider = provider @@ -37,7 +39,7 @@ class WLDocument(object): raise NoDublinCore('Document has no DublinCore - which is required.') self.book_info = dcparser.BookInfo.from_element( - self.rdf_elem, strict=strict) + self.rdf_elem, fallbacks=meta_fallbacks, strict=strict) else: self.book_info = None @@ -46,7 +48,7 @@ class WLDocument(object): return cls.from_file(StringIO(xml), *args, **kwargs) @classmethod - def from_file(cls, xmlfile, parse_dublincore=True, provider=None): + def from_file(cls, xmlfile, *args, **kwargs): # first, prepare for parsing if isinstance(xmlfile, basestring): @@ -67,7 +69,7 @@ class WLDocument(object): parser = etree.XMLParser(remove_blank_text=False) tree = etree.parse(StringIO(data.encode('utf-8')), parser) - return cls(tree, parse_dublincore=parse_dublincore, provider=provider) + return cls(tree, *args, **kwargs) except (ExpatError, XMLSyntaxError, XSLTApplyError), e: raise ParseError(e) @@ -147,7 +149,7 @@ class WLDocument(object): xpath = self.path_to_xpath(key) node = self.edoc.xpath(xpath)[0] repl = etree.fromstring(u"<%s>%s" %(node.tag, data, node.tag) ) - node.getparent().replace(node, repl); + node.getparent().replace(node, repl) except Exception, e: unmerged.append( repr( (key, xpath, e) ) ) @@ -163,6 +165,21 @@ class WLDocument(object): node.tag = 'span' node.tail = tail + def editors(self): + """Returns a set of all editors for book and its children. + + :returns: set of dcparser.Person objects + """ + if self.book_info is None: + raise NoDublinCore('No Dublin Core in document.') + persons = set(self.book_info.editors + + self.book_info.technical_editors) + for child in self.parts(): + persons.update(child.editors()) + if None in persons: + persons.remove(None) + return persons + # Converters def as_html(self, *args, **kwargs): @@ -189,6 +206,11 @@ class WLDocument(object): from librarian import fb2 return fb2.transform(self, *args, **kwargs) + def as_cover(self, cover_class=None, *args, **kwargs): + if cover_class is None: + cover_class = WLCover + return cover_class(self.book_info, *args, **kwargs).output_file() + def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None): if output_dir_path: diff --git a/librarian/pdf.py b/librarian/pdf.py index 3c83cad..9fb92b1 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -3,6 +3,12 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +"""PDF creation library. + +Creates one big XML from the book and its children, converts it to LaTeX +with TeXML, then runs it by XeLaTeX. + +""" from __future__ import with_statement import os import os.path @@ -135,9 +141,13 @@ def hack_motifs(doc): def parse_creator(doc): - """ find all dc:creator and dc.contributor tags and add *_parsed versions with forenames first """ + """Generates readable versions of creator and translator tags. + + Finds all dc:creator and dc.contributor.translator tags + and adds *_parsed versions with forenames first. + """ for person in doc.xpath("|".join('//dc:'+(tag) for tag in ( - 'creator', 'contributor.translator', 'contributor.editor', 'contributor.technical_editor')), + 'creator', 'contributor.translator')), namespaces = {'dc': str(DCNS)})[::-1]: if not person.text: continue @@ -188,32 +198,39 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, # Parse XSLT try: + book_info = wldoc.book_info document = load_including_children(wldoc) + root = document.edoc.getroot() if cover: if cover is True: cover = WLCover - bound_cover = cover(document.book_info) - document.edoc.getroot().set('data-cover-width', str(bound_cover.width)) - document.edoc.getroot().set('data-cover-height', str(bound_cover.height)) + bound_cover = cover(book_info) + root.set('data-cover-width', str(bound_cover.width)) + root.set('data-cover-height', str(bound_cover.height)) if bound_cover.uses_dc_cover: - if document.book_info.cover_by: - document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) - if document.book_info.cover_source: - document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) + if book_info.cover_by: + root.set('data-cover-by', book_info.cover_by) + if book_info.cover_source: + root.set('data-cover-source', + book_info.cover_source) if flags: for flag in flags: - document.edoc.getroot().set('flag-' + flag, 'yes') + root.set('flag-' + flag, 'yes') # check for LaTeX packages if morefloats: - document.edoc.getroot().set('morefloats', morefloats.lower()) + root.set('morefloats', morefloats.lower()) elif package_available('morefloats', 'maxfloats=19'): - document.edoc.getroot().set('morefloats', 'new') + root.set('morefloats', 'new') # add customizations if customizations is not None: - document.edoc.getroot().set('customizations', u','.join(customizations)) + root.set('customizations', u','.join(customizations)) + + # add editors info + root.set('editors', u', '.join(sorted( + editor.readable() for editor in document.editors()))) # hack the tree move_motifs_inside(document.edoc) @@ -294,7 +311,8 @@ def load_including_children(wldoc=None, provider=None, uri=None): text = re.sub(ur"([\u0400-\u04ff]+)", ur"\1", text) - document = WLDocument.from_string(text, parse_dublincore=True) + document = WLDocument.from_string(text, + parse_dublincore=True, provider=provider) document.swap_endlines() for child_uri in document.book_info.parts: diff --git a/librarian/pdf/wl2tex.xslt b/librarian/pdf/wl2tex.xslt index 1a675ba..909cf4b 100644 --- a/librarian/pdf/wl2tex.xslt +++ b/librarian/pdf/wl2tex.xslt @@ -100,9 +100,11 @@ } + \def\editors{} + @@ -163,7 +165,6 @@ \vspace{.6em} } \def\description{} - \def\editors{} @@ -376,13 +377,10 @@ - + Opracowanie redakcyjne i przypisy: - - - , - - . + + . diff --git a/scripts/book2cover b/scripts/book2cover index ae11e60..3cc0ed7 100755 --- a/scripts/book2cover +++ b/scripts/book2cover @@ -17,9 +17,7 @@ class Book2Cover(Book2Anything): @staticmethod def transform(wldoc, cover): - output = StringIO() - cover(wldoc.book_info).save(output) - return OutputFile.from_string(output.getvalue()) + return wldoc.as_cover(cover_class=cover) if __name__ == '__main__': diff --git a/setup.py b/setup.py index b6dbcb4..f88817e 100755 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def whole_tree(prefix, path): setup( name='librarian', - version='1.5', + version='1.5.1', description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats', author="Marek Stępniowski", author_email='marek@stepniowski.com', diff --git a/tests/files/picture/angelus-novus.jpeg b/tests/files/picture/angelus-novus.jpeg new file mode 100644 index 0000000..fd0394f Binary files /dev/null and b/tests/files/picture/angelus-novus.jpeg differ diff --git a/tests/files/picture/angelus-novus.png b/tests/files/picture/angelus-novus.png deleted file mode 100644 index 9925dad..0000000 Binary files a/tests/files/picture/angelus-novus.png and /dev/null differ diff --git a/tests/files/picture/angelus-novus.xml b/tests/files/picture/angelus-novus.xml index 0f26730..964faed 100644 --- a/tests/files/picture/angelus-novus.xml +++ b/tests/files/picture/angelus-novus.xml @@ -18,9 +18,9 @@ Domena publiczna - Paul Klee zm. 1940 1940 Image - image/png - 1645 x 2000 px - d9ead48f3442ac4e1add602aacdffa4638ae8e21 + image/jpeg + 329 x 400 px + 5ed8e8d24d92017c6341c0b8cfcc414dec55b8bf 1920 lat @@ -29,14 +29,14 @@
-
+
-
+
-
-
+
+
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.txt b/tests/files/text/asnyk_miedzy_nami_expected.txt index 70c3185..d300b3e 100644 --- a/tests/files/text/asnyk_miedzy_nami_expected.txt +++ b/tests/files/text/asnyk_miedzy_nami_expected.txt @@ -39,4 +39,4 @@ Tekst opracowany na podstawie: (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. -Opracowanie redakcyjne i przypisy: Aleksandra Sekuła, Olga Sutkowska +Opracowanie redakcyjne i przypisy: Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska diff --git a/tests/files/text/asnyk_zbior.xml b/tests/files/text/asnyk_zbior.xml index c585a8b..6a781f3 100755 --- a/tests/files/text/asnyk_zbior.xml +++ b/tests/files/text/asnyk_zbior.xml @@ -9,9 +9,11 @@ Pozytywizm Liryka Wiersz +Fikcyjny, Adam Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. http://wolnelektury.pl/katalog/lektura/poezye http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo +http://wolnelektury.pl/katalog/lektura/do-mlodych http://www.polona.pl/Content/5164 (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898 Domena publiczna - Adam Asnyk zm. 1897 diff --git a/tests/files/text/do-mlodych.xml b/tests/files/text/do-mlodych.xml new file mode 100755 index 0000000..21fa522 --- /dev/null +++ b/tests/files/text/do-mlodych.xml @@ -0,0 +1,70 @@ + + +Asnyk, Adam +Do młodych +Sekuła, Aleksandra +Sutkowska, Olga +Fundacja Nowoczesna Polska +Pozytywizm +Liryka +Wiersz +Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. +http://wolnelektury.pl/katalog/lektura/do-mlodych +http://www.polona.pl/Content/8616 +El...y (Adam Asnyk), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa 1898 +Domena publiczna - Adam Asnyk zm. 1897 +1897 +xml +text +text +2009-04-07 +L +pol +http://redakcja.wolnelektury.pl/media/dynamic/cover/image/35.jpg +leboski@Flickr, CC BY 2.0 +http://redakcja.wolnelektury.pl/cover/image/35 + + + +Adam Asnyk + +Do młodych + + + + +Szukajcie prawdy jasnego płomienia,/ +Szukajcie nowych, nieodkrytych dróg!/ +Za każdym krokiem w tajniki stworzenia/ +Coraz się dusza ludzka rozprzestrzenia/ +I większym staje się Bóg! + + +Choć otrząśniecie kwiaty barwnych mitów,/ +Choć rozproszycie legendowy mrok,/ +Choć mgłę urojeń zedrzecie z błękitów, ---/ +Ludziom niebiańskich nie zbraknie zachwytów,/ +Lecz dalej sięgnie ich wzrok. + + +Czas, Kondycja ludzka, PrzemijanieKażda epoka ma swe własne cele/ +I zapomina o wczorajszych snach:/ +Nieście więc wiedzy pochodnię na czele/ +I nowy udział bierzcie w wieków dziele,---/ +Przyszłości podnoście gmach! + + +Ale nie depczcie przeszłości ołtarzy,/ +Choć macie sami doskonalsze wznieść:/ +Na nich się jeszcze święty ogień żarzy,/ +I miłość ludzka stoi tam na straży,/ +I wy winniście im cześć! + + +Ze światem, który w ciemność już zachodzi/ +Wraz z całą tęczą idealnych snów,/ +Prawdziwa mądrość niechaj was pogodzi:/ +I wasze gwiazdy, o zdobywcy młodzi,/ +W ciemnościach pogasną znów! + + \ No newline at end of file diff --git a/tests/files/text/miedzy-nami-nic-nie-bylo.xml b/tests/files/text/miedzy-nami-nic-nie-bylo.xml index 124940e..a94b8f0 100644 --- a/tests/files/text/miedzy-nami-nic-nie-bylo.xml +++ b/tests/files/text/miedzy-nami-nic-nie-bylo.xml @@ -9,6 +9,8 @@ Sekuła, Aleksandra Sutkowska, Olga +Fikcyjny, Adam +Fikcyjny, Adam Fundacja Nowoczesna Polska Pozytywizm Liryka diff --git a/tests/test_epub.py b/tests/test_epub.py index 9fc5637..faa76e7 100644 --- a/tests/test_epub.py +++ b/tests/test_epub.py @@ -3,14 +3,29 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from zipfile import ZipFile +from lxml import html +from nose.tools import * from librarian import DirDocProvider from librarian.parser import WLDocument -from nose.tools import * -from utils import get_fixture +from tests.utils import get_fixture def test_transform(): - WLDocument.from_file( + epub = WLDocument.from_file( get_fixture('text', 'asnyk_zbior.xml'), provider=DirDocProvider(get_fixture('text', '')) - ).as_epub(flags=['without_fonts']) + ).as_epub(flags=['without_fonts']).get_file() + zipf = ZipFile(epub) + + # Check contributor list. + last = zipf.open('OPS/last.html') + tree = html.parse(last) + editors_attribution = False + for par in tree.findall("//p"): + if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'): + editors_attribution = True + assert_equal(par.text.rstrip(), + u'Opracowanie redakcyjne i przypisy: ' + u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.') + assert_true(editors_attribution) diff --git a/tests/test_pdf.py b/tests/test_pdf.py new file mode 100644 index 0000000..75b73bc --- /dev/null +++ b/tests/test_pdf.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import re +from tempfile import NamedTemporaryFile +from nose.tools import * +from librarian import DirDocProvider +from librarian.parser import WLDocument +from utils import get_fixture + + +def test_transform(): + temp = NamedTemporaryFile(delete=False) + temp.close() + WLDocument.from_file( + get_fixture('text', 'asnyk_zbior.xml'), + provider=DirDocProvider(get_fixture('text', '')) + ).as_pdf(save_tex=temp.name) + tex = open(temp.name).read().decode('utf-8') + print tex + + # Check contributor list. + editors = re.search(ur'\\def\\editors\{' + ur'Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex) + assert_equal(editors.group(1), + u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska") diff --git a/tests/test_picture.py b/tests/test_picture.py index 71a77dc..f64f624 100644 --- a/tests/test_picture.py +++ b/tests/test_picture.py @@ -31,7 +31,7 @@ def test_wlpicture(): # from nose.tools import set_trace; set_trace() assert pi.type[0] == u"Image" - assert pi.mime_type == u'image/png' == wlp.mime_type + assert pi.mime_type == u'image/jpeg' == wlp.mime_type assert wlp.slug == 'angelus-novus' assert path.exists(wlp.image_path)