Preliminary math and tables support.
[librarian.git] / scripts / book2partner
index 136fffc..f1892bb 100755 (executable)
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-import os.path
+import inspect
 import optparse
 import optparse
-from copy import deepcopy
-from lxml import etree
+import os
+import sys
 
 
-from librarian import epub, pdf, DirDocProvider, ParseError, cover
-from librarian.dcparser import BookInfo
-
-
-def utf_trunc(text, limit):
-    """ truncates text to at most `limit' bytes in utf-8 """
-    if text is None:
-        return text
-    orig_text = text
-    if len(text.encode('utf-8')) > limit:
-        newlimit = limit - 3
-        while len(text.encode('utf-8')) > newlimit:
-            text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
-        text += '...'
-    return text
-
-
-def virtualo(filenames, output_dir, verbose):
-    xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
-        <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
-    product = etree.fromstring("""<product>
-            <publisherProductId></publisherProductId>
-            <title></title>
-            <info></info>
-            <description></description>
-            <authors>
-                <author>
-                    <names>Jan</names>
-                    <lastName>Kowalski</lastName>
-                </author>
-            </authors>
-            <price>0.0</price>
-            <language>PL</language>
-        </product>""")
-
-    try:
-        for main_input in input_filenames:
-            if options.verbose:
-                print main_input
-            path, fname = os.path.realpath(main_input).rsplit('/', 1)
-            provider = DirDocProvider(path)
-            slug, ext = os.path.splitext(fname)
-
-            outfile_dir = os.path.join(output_dir, slug)
-            os.makedirs(os.path.join(output_dir, slug))
-
-            info = BookInfo.from_file(main_input)
-
-            product_elem = deepcopy(product)
-            product_elem[0].text = utf_trunc(slug, 100)
-            product_elem[1].text = utf_trunc(info.title, 255)
-            product_elem[2].text = utf_trunc(info.description, 255)
-            product_elem[3].text = utf_trunc(info.source_name, 3000)
-            product_elem[4][0][0].text = utf_trunc(u' '.join(info.author.first_names), 100)
-            product_elem[4][0][1].text = utf_trunc(info.author.last_name, 100)
-            xml.append(product_elem)
-
-            cover.VirtualoCover(
-                u' '.join(info.author.first_names + (info.author.last_name,)),
-                info.title
-                ).save(os.path.join(outfile_dir, slug+'.jpg'))
-            outfile = os.path.join(outfile_dir, '1.epub')
-            outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
-            epub.transform(provider, file_path=main_input, output_file=outfile)
-            epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
-    except ParseError, e:
-        print '%(file)s:%(name)s:%(message)s' % {
-            'file': main_input,
-            'name': e.__class__.__name__,
-            'message': e.message
-        }
-
-    xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
-    xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
-    xml_file.close()
-
-
-def prestigio(filenames, output_dir, verbose):
-    try:
-        for main_input in input_filenames:
-            if options.verbose:
-                print main_input
-            path, fname = os.path.realpath(main_input).rsplit('/', 1)
-            provider = DirDocProvider(path)
-            slug, ext = os.path.splitext(fname)
-
-            if output_dir != '':
-                try:
-                    os.makedirs(output_dir)
-                except:
-                    pass
-            outfile = os.path.join(output_dir, slug + '.epub')
-            epub.transform(provider, file_path=main_input, output_file=outfile,
-                    cover=cover.PrestigioCover, flags=('less-advertising',))
-    except ParseError, e:
-        print '%(file)s:%(name)s:%(message)s' % {
-            'file': main_input,
-            'name': e.__class__.__name__,
-            'message': e.message
-        }
-
-
-def prestigio_pdf(filenames, output_dir, verbose):
-    try:
-        for main_input in input_filenames:
-            if options.verbose:
-                print main_input
-            path, fname = os.path.realpath(main_input).rsplit('/', 1)
-            provider = DirDocProvider(path)
-            slug, ext = os.path.splitext(fname)
-
-            if output_dir != '':
-                try:
-                    os.makedirs(output_dir)
-                except:
-                    pass
-            outfile = os.path.join(output_dir, slug + '.pdf')
-            pdf.transform(provider, file_path=main_input, output_file=outfile,
-                    cover=cover.PrestigioCover, verbose=options.verbose, flags=('less-advertising',))
-    except ParseError, e:
-        print '%(file)s:%(name)s:%(message)s' % {
-            'file': main_input,
-            'name': e.__class__.__name__,
-            'message': e.message
-        }
-
-
-def bookoteka(filenames, output_dir, verbose):
+from librarian import packagers
+try:
+    from collections import OrderedDict
+except ImportError:
     try:
     try:
-        for main_input in input_filenames:
-            if options.verbose:
-                print main_input
-            path, fname = os.path.realpath(main_input).rsplit('/', 1)
-            provider = DirDocProvider(path)
-            slug, ext = os.path.splitext(fname)
-
-            if output_dir != '':
-                try:
-                    os.makedirs(output_dir)
-                except:
-                    pass
-            outfile = os.path.join(output_dir, slug + '.epub')
-            epub.transform(provider, file_path=main_input, output_file=outfile,
-                    cover=cover.BookotekaCover)
-    except ParseError, e:
-        print '%(file)s:%(name)s:%(message)s' % {
-            'file': main_input,
-            'name': e.__class__.__name__,
-            'message': e.message
-        }
-
+        from django.utils.datastructures import SortedDict
+        OrderedDict = SortedDict
+    except ImportError:
+        OrderedDict = dict
 
 
 if __name__ == '__main__':
 
 
 if __name__ == '__main__':
@@ -173,26 +31,54 @@ if __name__ == '__main__':
         help='print status messages to stdout')
     parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='',
                       help='specifies the directory for output')
         help='print status messages to stdout')
     parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='',
                       help='specifies the directory for output')
-    parser.add_option('--bookoteka', action='store_true', dest='bookoteka', default=False,
-                      help='prepare files for Bookoteka')
-    parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False,
-                      help='prepare files for Virtualo API')
-    parser.add_option('--prestigio', action='store_true', dest='prestigio', default=False,
-                      help='prepare files for Prestigio')
-    parser.add_option('--prestigio-pdf', action='store_true', dest='prestigio_pdf', default=False,
-                      help='prepare PDF files for Prestigio')
+    parser.add_option('-f', '--force-overwrite',
+        action='store_true',
+        dest='overwrite',
+        default=False,
+        help='rebuild the file even if it already exists')
+    parser.add_option('-p', '--packages',
+        dest='packages',
+        metavar='package,...',
+        default='',
+        help='lists packages to build')
+    parser.add_option('-m', '--packages-modules',
+        dest='packages_modules', metavar='path.to.module,...',
+        default='librarian.partners',
+        help='modules with packages definitions to load')
+    parser.add_option('-l', '--list-packages',
+        action='store_true',
+        dest='list_packages',
+        default=False,
+        help='lists available packages')
 
     options, input_filenames = parser.parse_args()
 
     options, input_filenames = parser.parse_args()
-
-    if len(input_filenames) < 1:
+    packages = OrderedDict()
+
+    sys.path.insert(0, os.getcwd())
+    for module_name in options.packages_modules.split(','):
+        if not module_name:
+            continue
+        module = __import__(module_name, globals(), locals(), ['*'])
+        for package_name in dir(module):
+            package = getattr(module, package_name)
+            if inspect.isclass(package) and issubclass(package, packagers.Packager):
+                packages[package_name] = package
+    if not packages:
+        print 'No packages found!'
+
+    if options.list_packages:
+        print 'Available packages:'
+        for package_name, package in packages.items():
+            print ' ', package_name
+        exit(0)
+
+    if len(input_filenames) < 1 or not options.packages:
         parser.print_help()
         exit(1)
 
         parser.print_help()
         exit(1)
 
-    if options.bookoteka:
-        bookoteka(input_filenames, options.output_dir, options.verbose)
-    if options.virtualo:
-        virtualo(input_filenames, options.output_dir, options.verbose)
-    if options.prestigio:
-        prestigio(input_filenames, options.output_dir, options.verbose)
-    if options.prestigio_pdf:
-        prestigio_pdf(input_filenames, options.output_dir, options.verbose)
+    used_packages = [packages[p] for p in options.packages.split(',')]
+    for package in used_packages:
+        if options.verbose:
+            print 'Package:', package.__name__
+        package.prepare(input_filenames,
+            options.output_dir, options.verbose, options.overwrite)