epub/mobi fixes
[librarian.git] / librarian / packagers.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import os
7 from copy import deepcopy
8 from lxml import etree
9 from librarian import epub, pdf, DirDocProvider, ParseError, cover
10 from librarian.dcparser import BookInfo
11
12
13 class Packager(object):
14     cover = None
15     flags = None
16
17     @classmethod
18     def prepare_file(cls, main_input, output_dir, verbose=False):
19         path, fname = os.path.realpath(main_input).rsplit('/', 1)
20         provider = DirDocProvider(path)
21         slug, ext = os.path.splitext(fname)
22
23         if output_dir != '':
24             try:
25                 os.makedirs(output_dir)
26             except:
27                 pass
28         outfile = os.path.join(output_dir, slug + '.' + cls.ext)
29         cls.converter.transform(provider, file_path=main_input, output_file=outfile,
30                 cover=cls.cover, flags=cls.flags)
31
32
33     @classmethod
34     def prepare(cls, input_filenames, output_dir='', verbose=False):
35         try:
36             for main_input in input_filenames:
37                 if verbose:
38                     print main_input
39                 cls.prepare_file(main_input, output_dir, verbose)
40         except ParseError, e:
41             print '%(file)s:%(name)s:%(message)s' % {
42                 'file': main_input,
43                 'name': e.__class__.__name__,
44                 'message': e.message
45             }
46
47
48 class EpubPackager(Packager):
49     converter = epub
50     ext = 'epub'
51
52 class PdfPackager(Packager):
53     converter = pdf
54     ext = 'pdf'
55
56
57 class GandalfEpubPackager(EpubPackager):
58     cover = cover.GandalfCover
59
60 class GandalfPdfPackager(PdfPackager):
61     cover = cover.GandalfCover
62
63 class BookotekaEpubPackager(EpubPackager):
64     cover = cover.BookotekaCover
65
66 class PrestigioEpubPackager(EpubPackager):
67     cover = cover.PrestigioCover
68     flags = ('less-advertising',)
69
70 class PrestigioPdfPackager(PdfPackager):
71     cover = cover.PrestigioCover
72     flags = ('less-advertising',)
73
74
75 class VirtualoEpubPackager(Packager):
76     @staticmethod
77     def utf_trunc(text, limit):
78         """ truncates text to at most `limit' bytes in utf-8 """
79         if text is None:
80             return text
81         orig_text = text
82         if len(text.encode('utf-8')) > limit:
83             newlimit = limit - 3
84             while len(text.encode('utf-8')) > newlimit:
85                 text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
86             text += '...'
87         return text
88
89     @classmethod
90     def prepare(cls, input_filenames, output_dir='', verbose=False):
91         xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
92             <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
93         product = etree.fromstring("""<product>
94                 <publisherProductId></publisherProductId>
95                 <title></title>
96                 <info></info>
97                 <description></description>
98                 <authors>
99                     <author>
100                         <names>Jan</names>
101                         <lastName>Kowalski</lastName>
102                     </author>
103                 </authors>
104                 <price>0.0</price>
105                 <language>PL</language>
106             </product>""")
107
108         try:
109             for main_input in input_filenames:
110                 if verbose:
111                     print main_input
112                 path, fname = os.path.realpath(main_input).rsplit('/', 1)
113                 provider = DirDocProvider(path)
114                 slug, ext = os.path.splitext(fname)
115
116                 outfile_dir = os.path.join(output_dir, slug)
117                 os.makedirs(os.path.join(output_dir, slug))
118
119                 info = BookInfo.from_file(main_input)
120
121                 product_elem = deepcopy(product)
122                 product_elem[0].text = cls.utf_trunc(slug, 100)
123                 product_elem[1].text = cls.utf_trunc(info.title, 255)
124                 product_elem[2].text = cls.utf_trunc(info.description, 255)
125                 product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
126                 product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
127                 product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
128                 xml.append(product_elem)
129
130                 cover.VirtualoCover(
131                     u' '.join(info.author.first_names + (info.author.last_name,)),
132                     info.title
133                     ).save(os.path.join(outfile_dir, slug+'.jpg'))
134                 outfile = os.path.join(outfile_dir, '1.epub')
135                 outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
136                 epub.transform(provider, file_path=main_input, output_file=outfile)
137                 epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
138         except ParseError, e:
139             print '%(file)s:%(name)s:%(message)s' % {
140                 'file': main_input,
141                 'name': e.__class__.__name__,
142                 'message': e.message
143             }
144
145         xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
146         xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
147         xml_file.close()