1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
7 from copy import deepcopy
9 from librarian import pdf, epub, DirDocProvider, ParseError, cover
10 from librarian.parser import WLDocument
13 class Packager(object):
18 def prepare_file(cls, main_input, output_dir, verbose=False):
19 path, fname = os.path.realpath(main_input).rsplit('/', 1)
20 provider = DirDocProvider(path)
21 slug, ext = os.path.splitext(fname)
25 os.makedirs(output_dir)
28 outfile = os.path.join(output_dir, slug + '.' + cls.ext)
30 doc = WLDocument.from_file(main_input, provider=provider)
31 output_file = cls.converter.transform(doc,
32 cover=cls.cover, flags=cls.flags)
33 doc.save_output_file(output_file, output_path=outfile)
37 def prepare(cls, input_filenames, output_dir='', verbose=False):
39 for main_input in input_filenames:
42 cls.prepare_file(main_input, output_dir, verbose)
44 print '%(file)s:%(name)s:%(message)s' % {
46 'name': e.__class__.__name__,
51 class EpubPackager(Packager):
55 class PdfPackager(Packager):
60 class GandalfEpubPackager(EpubPackager):
61 cover = cover.GandalfCover
63 class GandalfPdfPackager(PdfPackager):
64 cover = cover.GandalfCover
66 class ArtaTechEpubPackager(EpubPackager):
67 cover = cover.ArtaTechCover
69 class ArtaTechPdfPackager(PdfPackager):
70 cover = cover.ArtaTechCover
72 class BookotekaEpubPackager(EpubPackager):
73 cover = cover.BookotekaCover
75 class PrestigioEpubPackager(EpubPackager):
76 cover = cover.PrestigioCover
77 flags = ('less-advertising',)
79 class PrestigioPdfPackager(PdfPackager):
80 cover = cover.PrestigioCover
81 flags = ('less-advertising',)
84 class VirtualoEpubPackager(Packager):
86 def utf_trunc(text, limit):
87 """ truncates text to at most `limit' bytes in utf-8 """
90 if len(text.encode('utf-8')) > limit:
92 while len(text.encode('utf-8')) > newlimit:
93 text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
98 def prepare(cls, input_filenames, output_dir='', verbose=False):
99 xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
100 <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
101 product = etree.fromstring("""<product>
102 <publisherProductId></publisherProductId>
105 <description></description>
109 <lastName>Kowalski</lastName>
113 <language>PL</language>
117 for main_input in input_filenames:
120 path, fname = os.path.realpath(main_input).rsplit('/', 1)
121 provider = DirDocProvider(path)
122 slug, ext = os.path.splitext(fname)
124 outfile_dir = os.path.join(output_dir, slug)
125 os.makedirs(os.path.join(output_dir, slug))
127 doc = WLDocument.from_file(main_input, provider=provider)
130 product_elem = deepcopy(product)
131 product_elem[0].text = cls.utf_trunc(slug, 100)
132 product_elem[1].text = cls.utf_trunc(info.title, 255)
133 product_elem[2].text = cls.utf_trunc(info.description, 255)
134 product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
135 product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
136 product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
137 xml.append(product_elem)
139 cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
140 outfile = os.path.join(outfile_dir, '1.epub')
141 outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
142 doc.save_output_file(epub.transform(doc),
144 doc.save_output_file(epub.transform(doc, sample=25),
145 output_path=outfile_sample)
146 except ParseError, e:
147 print '%(file)s:%(name)s:%(message)s' % {
149 'name': e.__class__.__name__,
153 xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
154 xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))