scripts/book2partner

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   5 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   6 #
   7 import os.path
   8 import optparse
   9 from copy import deepcopy
  10 from lxml import etree
  11
  12 from librarian import epub, pdf, DirDocProvider, ParseError, cover
  13 from librarian.dcparser import BookInfo
  14
  15
  16 def utf_trunc(text, limit):
  17     """ truncates text to at most `limit' bytes in utf-8 """
  18     if text is None:
  19         return text
  20     orig_text = text
  21     if len(text.encode('utf-8')) > limit:
  22         newlimit = limit - 3
  23         while len(text.encode('utf-8')) > newlimit:
  24             text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
  25         text += '...'
  26     return text
  27
  28
  29 def virtualo(filenames, output_dir, verbose):
  30     xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
  31         <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
  32     product = etree.fromstring("""<product>
  33             <publisherProductId></publisherProductId>
  34             <title></title>
  35             <info></info>
  36             <description></description>
  37             <authors>
  38                 <author>
  39                     <names>Jan</names>
  40                     <lastName>Kowalski</lastName>
  41                 </author>
  42             </authors>
  43             <price>0.0</price>
  44             <language>PL</language>
  45         </product>""")
  46
  47     try:
  48         for main_input in input_filenames:
  49             if options.verbose:
  50                 print main_input
  51             path, fname = os.path.realpath(main_input).rsplit('/', 1)
  52             provider = DirDocProvider(path)
  53             slug, ext = os.path.splitext(fname)
  54
  55             outfile_dir = os.path.join(output_dir, slug)
  56             os.makedirs(os.path.join(output_dir, slug))
  57
  58             info = BookInfo.from_file(main_input)
  59
  60             product_elem = deepcopy(product)
  61             product_elem[0].text = utf_trunc(slug, 100)
  62             product_elem[1].text = utf_trunc(info.title, 255)
  63             product_elem[2].text = utf_trunc(info.description, 255)
  64             product_elem[3].text = utf_trunc(info.source_name, 3000)
  65             product_elem[4][0][0].text = utf_trunc(u' '.join(info.author.first_names), 100)
  66             product_elem[4][0][1].text = utf_trunc(info.author.last_name, 100)
  67             xml.append(product_elem)
  68
  69             cover.VirtualoCover(
  70                 u' '.join(info.author.first_names + (info.author.last_name,)),
  71                 info.title
  72                 ).save(os.path.join(outfile_dir, slug+'.jpg'))
  73             outfile = os.path.join(outfile_dir, '1.epub')
  74             outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
  75             epub.transform(provider, file_path=main_input, output_file=outfile)
  76             epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
  77     except ParseError, e:
  78         print '%(file)s:%(name)s:%(message)s' % {
  79             'file': main_input,
  80             'name': e.__class__.__name__,
  81             'message': e.message
  82         }
  83
  84     xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
  85     xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
  86     xml_file.close()
  87
  88
  89 def prestigio(filenames, output_dir, verbose):
  90     try:
  91         for main_input in input_filenames:
  92             if options.verbose:
  93                 print main_input
  94             path, fname = os.path.realpath(main_input).rsplit('/', 1)
  95             provider = DirDocProvider(path)
  96             slug, ext = os.path.splitext(fname)
  97
  98             if output_dir != '':
  99                 try:
 100                     os.makedirs(output_dir)
 101                 except:
 102                     pass
 103             outfile = os.path.join(output_dir, slug + '.epub')
 104             epub.transform(provider, file_path=main_input, output_file=outfile,
 105                     cover=cover.PrestigioCover, flags=('less-advertising',))
 106     except ParseError, e:
 107         print '%(file)s:%(name)s:%(message)s' % {
 108             'file': main_input,
 109             'name': e.__class__.__name__,
 110             'message': e.message
 111         }
 112
 113
 114 def prestigio_pdf(filenames, output_dir, verbose):
 115     try:
 116         for main_input in input_filenames:
 117             if options.verbose:
 118                 print main_input
 119             path, fname = os.path.realpath(main_input).rsplit('/', 1)
 120             provider = DirDocProvider(path)
 121             slug, ext = os.path.splitext(fname)
 122
 123             if output_dir != '':
 124                 try:
 125                     os.makedirs(output_dir)
 126                 except:
 127                     pass
 128             outfile = os.path.join(output_dir, slug + '.pdf')
 129             pdf.transform(provider, file_path=main_input, output_file=outfile,
 130                     cover=cover.PrestigioCover, verbose=options.verbose, flags=('less-advertising',))
 131     except ParseError, e:
 132         print '%(file)s:%(name)s:%(message)s' % {
 133             'file': main_input,
 134             'name': e.__class__.__name__,
 135             'message': e.message
 136         }
 137
 138
 139 def bookoteka(filenames, output_dir, verbose):
 140     try:
 141         for main_input in input_filenames:
 142             if options.verbose:
 143                 print main_input
 144             path, fname = os.path.realpath(main_input).rsplit('/', 1)
 145             provider = DirDocProvider(path)
 146             slug, ext = os.path.splitext(fname)
 147
 148             if output_dir != '':
 149                 try:
 150                     os.makedirs(output_dir)
 151                 except:
 152                     pass
 153             outfile = os.path.join(output_dir, slug + '.epub')
 154             epub.transform(provider, file_path=main_input, output_file=outfile,
 155                     cover=cover.BookotekaCover)
 156     except ParseError, e:
 157         print '%(file)s:%(name)s:%(message)s' % {
 158             'file': main_input,
 159             'name': e.__class__.__name__,
 160             'message': e.message
 161         }
 162
 163
 164
 165 if __name__ == '__main__':
 166     # Parse commandline arguments
 167     usage = """Usage: %prog [options] SOURCE [SOURCE...]
 168     Prepare SOURCE files for a partner."""
 169
 170     parser = optparse.OptionParser(usage=usage)
 171
 172     parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
 173         help='print status messages to stdout')
 174     parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='',
 175                       help='specifies the directory for output')
 176     parser.add_option('--bookoteka', action='store_true', dest='bookoteka', default=False,
 177                       help='prepare files for Bookoteka')
 178     parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False,
 179                       help='prepare files for Virtualo API')
 180     parser.add_option('--prestigio', action='store_true', dest='prestigio', default=False,
 181                       help='prepare files for Prestigio')
 182     parser.add_option('--prestigio-pdf', action='store_true', dest='prestigio_pdf', default=False,
 183                       help='prepare PDF files for Prestigio')
 184
 185     options, input_filenames = parser.parse_args()
 186
 187     if len(input_filenames) < 1:
 188         parser.print_help()
 189         exit(1)
 190
 191     if options.bookoteka:
 192         bookoteka(input_filenames, options.output_dir, options.verbose)
 193     if options.virtualo:
 194         virtualo(input_filenames, options.output_dir, options.verbose)
 195     if options.prestigio:
 196         prestigio(input_filenames, options.output_dir, options.verbose)
 197     if options.prestigio_pdf:
 198         prestigio_pdf(input_filenames, options.output_dir, options.verbose)