From: Radek Czajka Date: Thu, 20 Feb 2014 14:10:10 +0000 (+0100) Subject: Reorganizing partner code a little. X-Git-Tag: 1.7~107 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/ec47c921b719f986fd221c5302c4190f18fdab75 Reorganizing partner code a little. --- diff --git a/librarian/packagers.py b/librarian/packagers.py index ddfd7c8..a32a2a0 100644 --- a/librarian/packagers.py +++ b/librarian/packagers.py @@ -4,9 +4,7 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import os -from copy import deepcopy -from lxml import etree -from librarian import pdf, epub, DirDocProvider, ParseError, cover +from librarian import pdf, epub, mobi, DirDocProvider, ParseError, cover from librarian.parser import WLDocument @@ -15,7 +13,11 @@ class Packager(object): flags = None @classmethod - def prepare_file(cls, main_input, output_dir, verbose=False): + def transform(cls, *args, **kwargs): + return cls.converter.transform(*args, **kwargs) + + @classmethod + def prepare_file(cls, main_input, output_dir, verbose=False, overwrite=False): path, fname = os.path.realpath(main_input).rsplit('/', 1) provider = DirDocProvider(path) slug, ext = os.path.splitext(fname) @@ -26,20 +28,22 @@ class Packager(object): except: pass outfile = os.path.join(output_dir, slug + '.' + cls.ext) + if os.path.exists(outfile) and not overwrite: + return doc = WLDocument.from_file(main_input, provider=provider) - output_file = cls.converter.transform(doc, + output_file = cls.transform(doc, cover=cls.cover, flags=cls.flags) doc.save_output_file(output_file, output_path=outfile) @classmethod - def prepare(cls, input_filenames, output_dir='', verbose=False): + def prepare(cls, input_filenames, output_dir='', verbose=False, overwrite=False): try: for main_input in input_filenames: if verbose: print main_input - cls.prepare_file(main_input, output_dir, verbose) + cls.prepare_file(main_input, output_dir, verbose, overwrite) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': main_input, @@ -52,105 +56,14 @@ class EpubPackager(Packager): converter = epub ext = 'epub' +class MobiPackager(Packager): + converter = mobi + ext = 'mobi' + class PdfPackager(Packager): converter = pdf ext = 'pdf' - -class GandalfEpubPackager(EpubPackager): - cover = cover.GandalfCover - -class GandalfPdfPackager(PdfPackager): - cover = cover.GandalfCover - -class BookotekaEpubPackager(EpubPackager): - cover = cover.BookotekaCover - -class PrestigioEpubPackager(EpubPackager): - cover = cover.PrestigioCover - flags = ('less-advertising',) - -class PrestigioPdfPackager(PdfPackager): - cover = cover.PrestigioCover - flags = ('less-advertising',) - - -class VirtualoPackager(Packager): - @staticmethod - def utf_trunc(text, limit): - """ truncates text to at most `limit' bytes in utf-8 """ - if text is None: - return text - if len(text.encode('utf-8')) > limit: - newlimit = limit - 3 - while len(text.encode('utf-8')) > newlimit: - text = text[:(newlimit - len(text.encode('utf-8'))) / 4] - text += '...' - return text - @classmethod - def prepare(cls, input_filenames, output_dir='', verbose=False): - xml = etree.fromstring(""" - """) - product = etree.fromstring(""" - - - - - - - Jan - Kowalski - - - 0.0 - PL - """) - - try: - for main_input in input_filenames: - if verbose: - print main_input - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - slug, ext = os.path.splitext(fname) - - outfile_dir = os.path.join(output_dir, slug) - os.makedirs(os.path.join(output_dir, slug)) - - doc = WLDocument.from_file(main_input, provider=provider) - info = doc.book_info - - product_elem = deepcopy(product) - product_elem[0].text = cls.utf_trunc(slug, 100) - product_elem[1].text = cls.utf_trunc(info.title, 255) - product_elem[2].text = cls.utf_trunc(info.description, 255) - product_elem[3].text = cls.utf_trunc(info.source_name, 3000) - product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100) - product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100) - xml.append(product_elem) - - cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg')) - outfile = os.path.join(outfile_dir, '1.epub') - outfile_sample = os.path.join(outfile_dir, '1.sample.epub') - doc.save_output_file(doc.as_epub(), - output_path=outfile) - doc.save_output_file(doc.as_epub(doc, sample=25), - output_path=outfile_sample) - outfile = os.path.join(outfile_dir, '1.mobi') - outfile_sample = os.path.join(outfile_dir, '1.sample.mobi') - doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), - output_path=outfile) - doc.save_output_file( - doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), - output_path=outfile_sample) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e.message - } - - xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w') - xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8')) - xml_file.close() + def transform(cls, *args, **kwargs): + return cls.converter.transform(*args, morefloats='new', **kwargs) diff --git a/librarian/partners.py b/librarian/partners.py new file mode 100644 index 0000000..58bc8c5 --- /dev/null +++ b/librarian/partners.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# + +""" +Classes for packaging ebooks for some old partners. +These should be removed from librarian to separate package, +along with custom cover images etc. + +New partners shouldn't be added here, but in the partners repository. +""" + +from librarian import packagers, cover + +class GandalfEpub(packagers.EpubPackager): + cover = cover.GandalfCover + +class GandalfPdf(packagers.PdfPackager): + cover = cover.GandalfCover + +class BookotekaEpub(packagers.EpubPackager): + cover = cover.BookotekaCover + +class PrestigioEpub(packagers.EpubPackager): + cover = cover.PrestigioCover + flags = ('less-advertising',) + +class PrestigioPdf(packagers.PdfPackager): + cover = cover.PrestigioCover + flags = ('less-advertising',) + + +class Virtualo(packagers.Packager): + @staticmethod + def utf_trunc(text, limit): + """ truncates text to at most `limit' bytes in utf-8 """ + if text is None: + return text + if len(text.encode('utf-8')) > limit: + newlimit = limit - 3 + while len(text.encode('utf-8')) > newlimit: + text = text[:(newlimit - len(text.encode('utf-8'))) / 4] + text += '...' + return text + + @classmethod + def prepare(cls, input_filenames, output_dir='', verbose=False): + from lxml import etree + from librarian import DirDocProvider, ParseError + from librarian.parser import WLDocument + from copy import deepcopy + import os + import os.path + + xml = etree.fromstring(""" + """) + product = etree.fromstring(""" + + + + + + + Jan + Kowalski + + + 0.0 + PL + """) + + try: + for main_input in input_filenames: + if verbose: + print main_input + path, fname = os.path.realpath(main_input).rsplit('/', 1) + provider = DirDocProvider(path) + slug, ext = os.path.splitext(fname) + + outfile_dir = os.path.join(output_dir, slug) + os.makedirs(os.path.join(output_dir, slug)) + + doc = WLDocument.from_file(main_input, provider=provider) + info = doc.book_info + + product_elem = deepcopy(product) + product_elem[0].text = cls.utf_trunc(slug, 100) + product_elem[1].text = cls.utf_trunc(info.title, 255) + product_elem[2].text = cls.utf_trunc(info.description, 255) + product_elem[3].text = cls.utf_trunc(info.source_name, 3000) + product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100) + product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100) + xml.append(product_elem) + + cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg')) + outfile = os.path.join(outfile_dir, '1.epub') + outfile_sample = os.path.join(outfile_dir, '1.sample.epub') + doc.save_output_file(doc.as_epub(), + output_path=outfile) + doc.save_output_file(doc.as_epub(doc, sample=25), + output_path=outfile_sample) + outfile = os.path.join(outfile_dir, '1.mobi') + outfile_sample = os.path.join(outfile_dir, '1.sample.mobi') + doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), + output_path=outfile) + doc.save_output_file( + doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), + output_path=outfile_sample) + except ParseError, e: + print '%(file)s:%(name)s:%(message)s' % { + 'file': main_input, + 'name': e.__class__.__name__, + 'message': e.message + } + + xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w') + xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8')) + xml_file.close() diff --git a/scripts/book2partner b/scripts/book2partner index 4b84c2f..3534d2a 100755 --- a/scripts/book2partner +++ b/scripts/book2partner @@ -4,9 +4,18 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +import inspect import optparse from librarian import packagers +try: + from collections import OrderedDict +except ImportError: + try: + from django.utils.datastructures import SortedDict + OrderedDict = SortedDict + except ImportError: + OrderedDict = dict if __name__ == '__main__': @@ -20,34 +29,53 @@ if __name__ == '__main__': help='print status messages to stdout') parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='', help='specifies the directory for output') - parser.add_option('--bookoteka', action='store_true', dest='bookoteka', default=False, - help='prepare files for Bookoteka') - parser.add_option('--gandalf', action='store_true', dest='gandalf', default=False, - help='prepare EPUB files for Gandalf') - parser.add_option('--gandalf-pdf', action='store_true', dest='gandalf_pdf', default=False, - help='prepare PDF files for Gandalf') - parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False, - help='prepare files for Virtualo API') - parser.add_option('--prestigio', action='store_true', dest='prestigio', default=False, - help='prepare files for Prestigio') - parser.add_option('--prestigio-pdf', action='store_true', dest='prestigio_pdf', default=False, - help='prepare PDF files for Prestigio') + parser.add_option('-f', '--force-overwrite', + action='store_true', + dest='overwrite', + default=False, + help='rebuild the file even if it already exists') + parser.add_option('-p', '--packages', + dest='packages', + metavar='package,...', + default='', + help='lists packages to build') + parser.add_option('-m', '--packages-modules', + dest='packages_modules', metavar='path.to.module,...', + default='librarian.partners', + help='modules with packages definitions to load') + parser.add_option('-l', '--list-packages', + action='store_true', + dest='list_packages', + default=False, + help='lists available packages') options, input_filenames = parser.parse_args() + packages = OrderedDict() - if len(input_filenames) < 1: + for module_name in options.packages_modules.split(','): + if not module_name: + continue + module = __import__(module_name, globals(), locals(), ['*']) + for package_name in dir(module): + package = getattr(module, package_name) + if inspect.isclass(package) and issubclass(package, packagers.Packager): + packages[package_name] = package + if not packages: + print 'No packages found!' + + if options.list_packages: + print 'Available packages:' + for package_name, package in packages.items(): + print ' ', package_name + exit(0) + + if len(input_filenames) < 1 or not options.packages: parser.print_help() exit(1) - if options.bookoteka: - packagers.BookotekaEpubPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.gandalf: - packagers.GandalfEpubPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.gandalf_pdf: - packagers.GandalfPdfPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.virtualo: - packagers.VirtualoPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.prestigio: - packagers.PrestigioEpubPackager.prepare(input_filenames, options.output_dir, options.verbose) - if options.prestigio_pdf: - packagers.PrestigioPdfPackager.prepare(input_filenames, options.output_dir, options.verbose) + used_packages = [packages[p] for p in options.packages.split(',')] + for package in used_packages: + if options.verbose: + print 'Package:', package.__name__ + package.prepare(input_filenames, + options.output_dir, options.verbose, options.overwrite)