librarian/packagers.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 import os
   7 from copy import deepcopy
   8 from lxml import etree
   9 from librarian import epub, pdf, DirDocProvider, ParseError, cover
  10 from librarian.dcparser import BookInfo
  11
  12
  13 class Packager(object):
  14     cover = None
  15     flags = None
  16
  17     @classmethod
  18     def prepare_file(cls, main_input, output_dir, verbose=False):
  19         path, fname = os.path.realpath(main_input).rsplit('/', 1)
  20         provider = DirDocProvider(path)
  21         slug, ext = os.path.splitext(fname)
  22
  23         if output_dir != '':
  24             try:
  25                 os.makedirs(output_dir)
  26             except:
  27                 pass
  28         outfile = os.path.join(output_dir, slug + '.' + cls.ext)
  29         cls.converter.transform(provider, file_path=main_input, output_file=outfile,
  30                 cover=cls.cover, flags=cls.flags)
  31
  32
  33     @classmethod
  34     def prepare(cls, input_filenames, output_dir='', verbose=False):
  35         try:
  36             for main_input in input_filenames:
  37                 if verbose:
  38                     print main_input
  39                 cls.prepare_file(main_input, output_dir, verbose)
  40         except ParseError, e:
  41             print '%(file)s:%(name)s:%(message)s' % {
  42                 'file': main_input,
  43                 'name': e.__class__.__name__,
  44                 'message': e.message
  45             }
  46
  47
  48 class EpubPackager(Packager):
  49     converter = epub
  50     ext = 'epub'
  51
  52 class PdfPackager(Packager):
  53     converter = pdf
  54     ext = 'pdf'
  55
  56
  57 class GandalfEpubPackager(EpubPackager):
  58     cover = cover.GandalfCover
  59
  60 class GandalfPdfPackager(PdfPackager):
  61     cover = cover.GandalfCover
  62
  63 class ArtaTechEpubPackager(EpubPackager):
  64     cover = cover.ArtaTechCover
  65
  66 class ArtaTechPdfPackager(PdfPackager):
  67     cover = cover.ArtaTechCover
  68
  69 class BookotekaEpubPackager(EpubPackager):
  70     cover = cover.BookotekaCover
  71
  72 class PrestigioEpubPackager(EpubPackager):
  73     cover = cover.PrestigioCover
  74     flags = ('less-advertising',)
  75
  76 class PrestigioPdfPackager(PdfPackager):
  77     cover = cover.PrestigioCover
  78     flags = ('less-advertising',)
  79
  80
  81 class VirtualoEpubPackager(Packager):
  82     @staticmethod
  83     def utf_trunc(text, limit):
  84         """ truncates text to at most `limit' bytes in utf-8 """
  85         if text is None:
  86             return text
  87         orig_text = text
  88         if len(text.encode('utf-8')) > limit:
  89             newlimit = limit - 3
  90             while len(text.encode('utf-8')) > newlimit:
  91                 text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
  92             text += '...'
  93         return text
  94
  95     @classmethod
  96     def prepare(cls, input_filenames, output_dir='', verbose=False):
  97         xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
  98             <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
  99         product = etree.fromstring("""<product>
 100                 <publisherProductId></publisherProductId>
 101                 <title></title>
 102                 <info></info>
 103                 <description></description>
 104                 <authors>
 105                     <author>
 106                         <names>Jan</names>
 107                         <lastName>Kowalski</lastName>
 108                     </author>
 109                 </authors>
 110                 <price>0.0</price>
 111                 <language>PL</language>
 112             </product>""")
 113
 114         try:
 115             for main_input in input_filenames:
 116                 if verbose:
 117                     print main_input
 118                 path, fname = os.path.realpath(main_input).rsplit('/', 1)
 119                 provider = DirDocProvider(path)
 120                 slug, ext = os.path.splitext(fname)
 121
 122                 outfile_dir = os.path.join(output_dir, slug)
 123                 os.makedirs(os.path.join(output_dir, slug))
 124
 125                 info = BookInfo.from_file(main_input)
 126
 127                 product_elem = deepcopy(product)
 128                 product_elem[0].text = cls.utf_trunc(slug, 100)
 129                 product_elem[1].text = cls.utf_trunc(info.title, 255)
 130                 product_elem[2].text = cls.utf_trunc(info.description, 255)
 131                 product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
 132                 product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
 133                 product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
 134                 xml.append(product_elem)
 135
 136                 cover.VirtualoCover(
 137                     u' '.join(info.author.first_names + (info.author.last_name,)),
 138                     info.title
 139                     ).save(os.path.join(outfile_dir, slug+'.jpg'))
 140                 outfile = os.path.join(outfile_dir, '1.epub')
 141                 outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
 142                 epub.transform(provider, file_path=main_input, output_file=outfile)
 143                 epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
 144         except ParseError, e:
 145             print '%(file)s:%(name)s:%(message)s' % {
 146                 'file': main_input,
 147                 'name': e.__class__.__name__,
 148                 'message': e.message
 149             }
 150
 151         xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
 152         xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
 153         xml_file.close()