X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/e868b7e6511dfbb89e5a9dd69bbb7ef13c42620a..59caaf5ca771cd80c74253fd26b2671eb199b8bf:/librarian/packagers.py diff --git a/librarian/packagers.py b/librarian/packagers.py index 054f068..a32a2a0 100644 --- a/librarian/packagers.py +++ b/librarian/packagers.py @@ -4,10 +4,8 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import os -from copy import deepcopy -from lxml import etree -from librarian import epub, pdf, DirDocProvider, ParseError, cover -from librarian.dcparser import BookInfo +from librarian import pdf, epub, mobi, DirDocProvider, ParseError, cover +from librarian.parser import WLDocument class Packager(object): @@ -15,7 +13,11 @@ class Packager(object): flags = None @classmethod - def prepare_file(cls, main_input, output_dir, verbose=False): + def transform(cls, *args, **kwargs): + return cls.converter.transform(*args, **kwargs) + + @classmethod + def prepare_file(cls, main_input, output_dir, verbose=False, overwrite=False): path, fname = os.path.realpath(main_input).rsplit('/', 1) provider = DirDocProvider(path) slug, ext = os.path.splitext(fname) @@ -26,17 +28,22 @@ class Packager(object): except: pass outfile = os.path.join(output_dir, slug + '.' + cls.ext) - cls.converter.transform(provider, file_path=main_input, output_file=outfile, + if os.path.exists(outfile) and not overwrite: + return + + doc = WLDocument.from_file(main_input, provider=provider) + output_file = cls.transform(doc, cover=cls.cover, flags=cls.flags) + doc.save_output_file(output_file, output_path=outfile) @classmethod - def prepare(cls, input_filenames, output_dir='', verbose=False): + def prepare(cls, input_filenames, output_dir='', verbose=False, overwrite=False): try: for main_input in input_filenames: if verbose: print main_input - cls.prepare_file(main_input, output_dir, verbose) + cls.prepare_file(main_input, output_dir, verbose, overwrite) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': main_input, @@ -49,99 +56,14 @@ class EpubPackager(Packager): converter = epub ext = 'epub' +class MobiPackager(Packager): + converter = mobi + ext = 'mobi' + class PdfPackager(Packager): converter = pdf ext = 'pdf' - -class GandalfEpubPackager(EpubPackager): - cover = cover.GandalfCover - -class GandalfPdfPackager(PdfPackager): - cover = cover.GandalfCover - -class BookotekaEpubPackager(EpubPackager): - cover = cover.BookotekaCover - -class PrestigioEpubPackager(EpubPackager): - cover = cover.PrestigioCover - flags = ('less-advertising',) - -class PrestigioPdfPackager(PdfPackager): - cover = cover.PrestigioCover - flags = ('less-advertising',) - - -class VirtualoEpubPackager(Packager): - @staticmethod - def utf_trunc(text, limit): - """ truncates text to at most `limit' bytes in utf-8 """ - if text is None: - return text - orig_text = text - if len(text.encode('utf-8')) > limit: - newlimit = limit - 3 - while len(text.encode('utf-8')) > newlimit: - text = text[:(newlimit - len(text.encode('utf-8'))) / 4] - text += '...' - return text - @classmethod - def prepare(cls, input_filenames, output_dir='', verbose=False): - xml = etree.fromstring(""" - """) - product = etree.fromstring(""" - - - - - - - Jan - Kowalski - - - 0.0 - PL - """) - - try: - for main_input in input_filenames: - if verbose: - print main_input - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - slug, ext = os.path.splitext(fname) - - outfile_dir = os.path.join(output_dir, slug) - os.makedirs(os.path.join(output_dir, slug)) - - info = BookInfo.from_file(main_input) - - product_elem = deepcopy(product) - product_elem[0].text = cls.utf_trunc(slug, 100) - product_elem[1].text = cls.utf_trunc(info.title, 255) - product_elem[2].text = cls.utf_trunc(info.description, 255) - product_elem[3].text = cls.utf_trunc(info.source_name, 3000) - product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100) - product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100) - xml.append(product_elem) - - cover.VirtualoCover( - u' '.join(info.author.first_names + (info.author.last_name,)), - info.title - ).save(os.path.join(outfile_dir, slug+'.jpg')) - outfile = os.path.join(outfile_dir, '1.epub') - outfile_sample = os.path.join(outfile_dir, '1.sample.epub') - epub.transform(provider, file_path=main_input, output_file=outfile) - epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e.message - } - - xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w') - xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8')) - xml_file.close() + def transform(cls, *args, **kwargs): + return cls.converter.transform(*args, morefloats='new', **kwargs)