#1032: epubs for virtualo
[librarian.git] / scripts / book2partner
diff --git a/scripts/book2partner b/scripts/book2partner
new file mode 100755 (executable)
index 0000000..5866cc3
--- /dev/null
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.  
+#
+import os.path
+import optparse
+from copy import deepcopy
+from lxml import etree
+
+from librarian import epub, DirDocProvider, ParseError, cover
+from librarian.dcparser import BookInfo
+
+
+def utf_trunc(text, limit):
+""" truncates text to at most `limit' bytes in utf-8 """
+    if text is None:
+        return text
+    orig_text = text
+    if len(text.encode('utf-8')) > limit:
+        newlimit = limit - 3
+        while len(text.encode('utf-8')) > newlimit:
+            text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
+        text += '...'
+    return text
+
+
+def virtualo(filenames, output_dir, verbose):
+    xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
+        <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
+    product = etree.fromstring("""<product>
+            <publisherProductId></publisherProductId>
+            <title></title>
+            <info></info>
+            <description></description>
+            <authors>
+                <author>
+                    <names>Jan</names>
+                    <lastName>Kowalski</lastName>
+                </author>
+            </authors>
+            <price>0.0</price>
+            <language>PL</language>
+        </product>""")
+
+    try:
+        for main_input in input_filenames:
+            if options.verbose:
+                print main_input
+            path, fname = os.path.realpath(main_input).rsplit('/', 1)
+            provider = DirDocProvider(path)
+            slug, ext = os.path.splitext(fname)
+
+            outfile_dir = os.path.join(output_dir, slug)
+            os.makedirs(os.path.join(output_dir, slug))
+
+            info = BookInfo.from_file(main_input)
+
+            product_elem = deepcopy(product)
+            product_elem[0].text = utf_trunc(slug, 100)
+            product_elem[1].text = utf_trunc(info.title, 255)
+            product_elem[2].text = utf_trunc(info.description, 255)
+            product_elem[3].text = utf_trunc(info.source_name, 3000)
+            product_elem[4][0][0].text = utf_trunc(u' '.join(info.author.first_names), 100)
+            product_elem[4][0][1].text = utf_trunc(info.author.last_name, 100)
+            xml.append(product_elem)
+
+            cover.cover(600, 730, 
+                u' '.join(info.author.first_names + (info.author.last_name,)), 
+                info.title
+                ).save(os.path.join(outfile_dir, slug+'.jpg'))
+            outfile = os.path.join(outfile_dir, '1.epub')
+            outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
+            epub.transform(provider, file_path=main_input, output_file=outfile)
+            epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
+    except ParseError, e:
+        print '%(file)s:%(name)s:%(message)s' % {
+            'file': main_input,
+            'name': e.__class__.__name__,
+            'message': e.message
+        }
+
+    xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
+    xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
+    xml_file.close()
+
+
+
+
+if __name__ == '__main__':
+    # Parse commandline arguments
+    usage = """Usage: %prog [options] SOURCE [SOURCE...]
+    Prepare SOURCE files for a partner."""
+
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+        help='print status messages to stdout')
+    parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='',
+                      help='specifies the directory for output')
+    parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False,
+                      help='prepare files for Virtualo API')
+
+    options, input_filenames = parser.parse_args()
+
+    if len(input_filenames) < 1:
+        parser.print_help()
+        exit(1)
+
+    if options.virtualo:
+        virtualo(input_filenames, options.output_dir, options.verbose)