2 # -*- coding: utf-8 -*-
4 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
5 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
9 from copy import deepcopy
10 from lxml import etree
12 from librarian import epub, pdf, DirDocProvider, ParseError, cover
13 from librarian.dcparser import BookInfo
16 def utf_trunc(text, limit):
17 """ truncates text to at most `limit' bytes in utf-8 """
21 if len(text.encode('utf-8')) > limit:
23 while len(text.encode('utf-8')) > newlimit:
24 text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
29 def virtualo(filenames, output_dir, verbose):
30 xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
31 <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
32 product = etree.fromstring("""<product>
33 <publisherProductId></publisherProductId>
36 <description></description>
40 <lastName>Kowalski</lastName>
44 <language>PL</language>
48 for main_input in input_filenames:
51 path, fname = os.path.realpath(main_input).rsplit('/', 1)
52 provider = DirDocProvider(path)
53 slug, ext = os.path.splitext(fname)
55 outfile_dir = os.path.join(output_dir, slug)
56 os.makedirs(os.path.join(output_dir, slug))
58 info = BookInfo.from_file(main_input)
60 product_elem = deepcopy(product)
61 product_elem[0].text = utf_trunc(slug, 100)
62 product_elem[1].text = utf_trunc(info.title, 255)
63 product_elem[2].text = utf_trunc(info.description, 255)
64 product_elem[3].text = utf_trunc(info.source_name, 3000)
65 product_elem[4][0][0].text = utf_trunc(u' '.join(info.author.first_names), 100)
66 product_elem[4][0][1].text = utf_trunc(info.author.last_name, 100)
67 xml.append(product_elem)
70 u' '.join(info.author.first_names + (info.author.last_name,)),
72 ).save(os.path.join(outfile_dir, slug+'.jpg'))
73 outfile = os.path.join(outfile_dir, '1.epub')
74 outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
75 epub.transform(provider, file_path=main_input, output_file=outfile)
76 epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
78 print '%(file)s:%(name)s:%(message)s' % {
80 'name': e.__class__.__name__,
84 xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
85 xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
89 def prestigio(filenames, output_dir, verbose):
91 for main_input in input_filenames:
94 path, fname = os.path.realpath(main_input).rsplit('/', 1)
95 provider = DirDocProvider(path)
96 slug, ext = os.path.splitext(fname)
100 os.makedirs(output_dir)
103 outfile = os.path.join(output_dir, slug + '.epub')
104 epub.transform(provider, file_path=main_input, output_file=outfile,
105 cover=cover.PrestigioCover, flags=('less-advertising',))
106 except ParseError, e:
107 print '%(file)s:%(name)s:%(message)s' % {
109 'name': e.__class__.__name__,
114 def prestigio_pdf(filenames, output_dir, verbose):
116 for main_input in input_filenames:
119 path, fname = os.path.realpath(main_input).rsplit('/', 1)
120 provider = DirDocProvider(path)
121 slug, ext = os.path.splitext(fname)
125 os.makedirs(output_dir)
128 outfile = os.path.join(output_dir, slug + '.pdf')
129 pdf.transform(provider, file_path=main_input, output_file=outfile,
130 cover=cover.PrestigioCover, verbose=options.verbose, flags=('less-advertising',))
131 except ParseError, e:
132 print '%(file)s:%(name)s:%(message)s' % {
134 'name': e.__class__.__name__,
139 def bookoteka(filenames, output_dir, verbose):
141 for main_input in input_filenames:
144 path, fname = os.path.realpath(main_input).rsplit('/', 1)
145 provider = DirDocProvider(path)
146 slug, ext = os.path.splitext(fname)
150 os.makedirs(output_dir)
153 outfile = os.path.join(output_dir, slug + '.epub')
154 epub.transform(provider, file_path=main_input, output_file=outfile,
155 cover=cover.BookotekaCover)
156 except ParseError, e:
157 print '%(file)s:%(name)s:%(message)s' % {
159 'name': e.__class__.__name__,
165 if __name__ == '__main__':
166 # Parse commandline arguments
167 usage = """Usage: %prog [options] SOURCE [SOURCE...]
168 Prepare SOURCE files for a partner."""
170 parser = optparse.OptionParser(usage=usage)
172 parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
173 help='print status messages to stdout')
174 parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='',
175 help='specifies the directory for output')
176 parser.add_option('--bookoteka', action='store_true', dest='bookoteka', default=False,
177 help='prepare files for Bookoteka')
178 parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False,
179 help='prepare files for Virtualo API')
180 parser.add_option('--prestigio', action='store_true', dest='prestigio', default=False,
181 help='prepare files for Prestigio')
182 parser.add_option('--prestigio-pdf', action='store_true', dest='prestigio_pdf', default=False,
183 help='prepare PDF files for Prestigio')
185 options, input_filenames = parser.parse_args()
187 if len(input_filenames) < 1:
191 if options.bookoteka:
192 bookoteka(input_filenames, options.output_dir, options.verbose)
194 virtualo(input_filenames, options.output_dir, options.verbose)
195 if options.prestigio:
196 prestigio(input_filenames, options.output_dir, options.verbose)
197 if options.prestigio_pdf:
198 prestigio_pdf(input_filenames, options.output_dir, options.verbose)