librarian/packagers.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 import os
   7 from copy import deepcopy
   8 from lxml import etree
   9 from librarian import pdf, epub, DirDocProvider, ParseError, cover
  10 from librarian.parser import WLDocument
  11 from librarian.styles.wolnelektury.partners import cover
  12
  13
  14 class Packager(object):
  15     cover = None
  16     flags = None
  17     converter = NotImplemented
  18     ext = NotImplemented
  19
  20     @classmethod
  21     def prepare_file(cls, main_input, output_dir):
  22         path, fname = os.path.realpath(main_input).rsplit('/', 1)
  23         provider = DirDocProvider(path)
  24         slug, ext = os.path.splitext(fname)
  25
  26         if output_dir != '':
  27             if not os.path.isdir(output_dir):
  28                 os.makedirs(output_dir)
  29         outfile = os.path.join(output_dir, slug + '.' + cls.ext)
  30
  31         doc = WLDocument.from_file(main_input, provider=provider)
  32         output_file = cls.converter.transform(doc, cover=cls.cover, flags=cls.flags)
  33         doc.save_output_file(output_file, output_path=outfile)
  34
  35     @classmethod
  36     def prepare(cls, input_filenames, output_dir='', verbose=False):
  37         main_input = None
  38         try:
  39             for main_input in input_filenames:
  40                 if verbose:
  41                     print main_input
  42                 cls.prepare_file(main_input, output_dir)
  43         except ParseError, e:
  44             print '%(file)s:%(name)s:%(message)s' % {
  45                 'file': main_input,
  46                 'name': e.__class__.__name__,
  47                 'message': e.message
  48             }
  49
  50
  51 class EpubPackager(Packager):
  52     converter = epub
  53     ext = 'epub'
  54
  55
  56 class PdfPackager(Packager):
  57     converter = pdf
  58     ext = 'pdf'
  59
  60
  61 class GandalfEpubPackager(EpubPackager):
  62     cover = cover.GandalfCover
  63
  64
  65 class GandalfPdfPackager(PdfPackager):
  66     cover = cover.GandalfCover
  67
  68
  69 class BookotekaEpubPackager(EpubPackager):
  70     cover = cover.BookotekaCover
  71
  72
  73 class PrestigioEpubPackager(EpubPackager):
  74     cover = cover.PrestigioCover
  75     flags = ('less-advertising',)
  76
  77
  78 class PrestigioPdfPackager(PdfPackager):
  79     cover = cover.PrestigioCover
  80     flags = ('less-advertising',)
  81
  82
  83 class VirtualoPackager(Packager):
  84     @staticmethod
  85     def utf_trunc(text, limit):
  86         """ truncates text to at most `limit' bytes in utf-8 """
  87         if text is None:
  88             return text
  89         if len(text.encode('utf-8')) > limit:
  90             newlimit = limit - 3
  91             while len(text.encode('utf-8')) > newlimit:
  92                 text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
  93             text += '...'
  94         return text
  95
  96     @classmethod
  97     def prepare(cls, input_filenames, output_dir='', verbose=False):
  98         xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
  99             <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
 100         product = etree.fromstring("""<product>
 101                 <publisherProductId></publisherProductId>
 102                 <title></title>
 103                 <info></info>
 104                 <description></description>
 105                 <authors>
 106                     <author>
 107                         <names>Jan</names>
 108                         <lastName>Kowalski</lastName>
 109                     </author>
 110                 </authors>
 111                 <price>0.0</price>
 112                 <language>PL</language>
 113             </product>""")
 114
 115         main_input = None
 116         try:
 117             for main_input in input_filenames:
 118                 if verbose:
 119                     print main_input
 120                 path, fname = os.path.realpath(main_input).rsplit('/', 1)
 121                 provider = DirDocProvider(path)
 122                 slug, ext = os.path.splitext(fname)
 123
 124                 outfile_dir = os.path.join(output_dir, slug)
 125                 os.makedirs(os.path.join(output_dir, slug))
 126
 127                 doc = WLDocument.from_file(main_input, provider=provider)
 128                 info = doc.book_info
 129
 130                 product_elem = deepcopy(product)
 131                 product_elem[0].text = cls.utf_trunc(slug, 100)
 132                 product_elem[1].text = cls.utf_trunc(info.title, 255)
 133                 product_elem[2].text = cls.utf_trunc(info.description, 255)
 134                 product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
 135                 product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
 136                 product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
 137                 xml.append(product_elem)
 138
 139                 cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
 140                 outfile = os.path.join(outfile_dir, '1.epub')
 141                 outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
 142                 doc.save_output_file(doc.as_epub(), output_path=outfile)
 143                 doc.save_output_file(doc.as_epub(doc, sample=25), output_path=outfile_sample)
 144                 outfile = os.path.join(outfile_dir, '1.mobi')
 145                 outfile_sample = os.path.join(outfile_dir, '1.sample.mobi')
 146                 doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), output_path=outfile)
 147                 doc.save_output_file(
 148                     doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), output_path=outfile_sample)
 149         except ParseError, e:
 150             print '%(file)s:%(name)s:%(message)s' % {
 151                 'file': main_input,
 152                 'name': e.__class__.__name__,
 153                 'message': e.message
 154             }
 155
 156         xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
 157         xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
 158         xml_file.close()