1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
7 from copy import deepcopy
9 from librarian import pdf, epub, DirDocProvider, ParseError, cover
10 from librarian.parser import WLDocument
13 class Packager(object):
18 def prepare_file(cls, main_input, output_dir, verbose=False):
19 path, fname = os.path.realpath(main_input).rsplit('/', 1)
20 provider = DirDocProvider(path)
21 slug, ext = os.path.splitext(fname)
25 os.makedirs(output_dir)
28 outfile = os.path.join(output_dir, slug + '.' + cls.ext)
30 doc = WLDocument.from_file(main_input, provider=provider)
31 output_file = cls.converter.transform(doc,
32 cover=cls.cover, flags=cls.flags)
33 doc.save_output_file(output_file, output_path=outfile)
37 def prepare(cls, input_filenames, output_dir='', verbose=False):
39 for main_input in input_filenames:
42 cls.prepare_file(main_input, output_dir, verbose)
44 print '%(file)s:%(name)s:%(message)s' % {
46 'name': e.__class__.__name__,
51 class EpubPackager(Packager):
55 class PdfPackager(Packager):
60 class GandalfEpubPackager(EpubPackager):
61 cover = cover.GandalfCover
63 class GandalfPdfPackager(PdfPackager):
64 cover = cover.GandalfCover
66 class BookotekaEpubPackager(EpubPackager):
67 cover = cover.BookotekaCover
69 class PrestigioEpubPackager(EpubPackager):
70 cover = cover.PrestigioCover
71 flags = ('less-advertising',)
73 class PrestigioPdfPackager(PdfPackager):
74 cover = cover.PrestigioCover
75 flags = ('less-advertising',)
78 class VirtualoEpubPackager(Packager):
80 def utf_trunc(text, limit):
81 """ truncates text to at most `limit' bytes in utf-8 """
84 if len(text.encode('utf-8')) > limit:
86 while len(text.encode('utf-8')) > newlimit:
87 text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
92 def prepare(cls, input_filenames, output_dir='', verbose=False):
93 xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
94 <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
95 product = etree.fromstring("""<product>
96 <publisherProductId></publisherProductId>
99 <description></description>
103 <lastName>Kowalski</lastName>
107 <language>PL</language>
111 for main_input in input_filenames:
114 path, fname = os.path.realpath(main_input).rsplit('/', 1)
115 provider = DirDocProvider(path)
116 slug, ext = os.path.splitext(fname)
118 outfile_dir = os.path.join(output_dir, slug)
119 os.makedirs(os.path.join(output_dir, slug))
121 doc = WLDocument.from_file(main_input, provider=provider)
124 product_elem = deepcopy(product)
125 product_elem[0].text = cls.utf_trunc(slug, 100)
126 product_elem[1].text = cls.utf_trunc(info.title, 255)
127 product_elem[2].text = cls.utf_trunc(info.description, 255)
128 product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
129 product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
130 product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
131 xml.append(product_elem)
134 u' '.join(info.author.first_names + (info.author.last_name,)),
136 ).save(os.path.join(outfile_dir, slug+'.jpg'))
137 outfile = os.path.join(outfile_dir, '1.epub')
138 outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
139 doc.save_output_file(epub.transform(doc),
141 doc.save_output_file(epub.transform(doc, sample=25),
142 output_path=outfile_sample)
143 except ParseError, e:
144 print '%(file)s:%(name)s:%(message)s' % {
146 'name': e.__class__.__name__,
150 xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
151 xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))