Fixes #2570: Text spilling into fragments from outside.
[librarian.git] / scripts / book2partner
index 6955899..4b84c2f 100755 (executable)
@@ -4,137 +4,9 @@
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-import os.path
 import optparse
-from copy import deepcopy
-from lxml import etree
-
-from librarian import epub, DirDocProvider, ParseError, cover
-from librarian.dcparser import BookInfo
-
-
-def utf_trunc(text, limit):
-    """ truncates text to at most `limit' bytes in utf-8 """
-    if text is None:
-        return text
-    orig_text = text
-    if len(text.encode('utf-8')) > limit:
-        newlimit = limit - 3
-        while len(text.encode('utf-8')) > newlimit:
-            text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
-        text += '...'
-    return text
-
-
-def virtualo(filenames, output_dir, verbose):
-    xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
-        <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
-    product = etree.fromstring("""<product>
-            <publisherProductId></publisherProductId>
-            <title></title>
-            <info></info>
-            <description></description>
-            <authors>
-                <author>
-                    <names>Jan</names>
-                    <lastName>Kowalski</lastName>
-                </author>
-            </authors>
-            <price>0.0</price>
-            <language>PL</language>
-        </product>""")
-
-    try:
-        for main_input in input_filenames:
-            if options.verbose:
-                print main_input
-            path, fname = os.path.realpath(main_input).rsplit('/', 1)
-            provider = DirDocProvider(path)
-            slug, ext = os.path.splitext(fname)
-
-            outfile_dir = os.path.join(output_dir, slug)
-            os.makedirs(os.path.join(output_dir, slug))
-
-            info = BookInfo.from_file(main_input)
-
-            product_elem = deepcopy(product)
-            product_elem[0].text = utf_trunc(slug, 100)
-            product_elem[1].text = utf_trunc(info.title, 255)
-            product_elem[2].text = utf_trunc(info.description, 255)
-            product_elem[3].text = utf_trunc(info.source_name, 3000)
-            product_elem[4][0][0].text = utf_trunc(u' '.join(info.author.first_names), 100)
-            product_elem[4][0][1].text = utf_trunc(info.author.last_name, 100)
-            xml.append(product_elem)
-
-            cover.VirtualoCover(
-                u' '.join(info.author.first_names + (info.author.last_name,)),
-                info.title
-                ).save(os.path.join(outfile_dir, slug+'.jpg'))
-            outfile = os.path.join(outfile_dir, '1.epub')
-            outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
-            epub.transform(provider, file_path=main_input, output_file=outfile)
-            epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
-    except ParseError, e:
-        print '%(file)s:%(name)s:%(message)s' % {
-            'file': main_input,
-            'name': e.__class__.__name__,
-            'message': e.message
-        }
-
-    xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
-    xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
-    xml_file.close()
-
-
-def prestigio(filenames, output_dir, verbose):
-    try:
-        for main_input in input_filenames:
-            if options.verbose:
-                print main_input
-            path, fname = os.path.realpath(main_input).rsplit('/', 1)
-            provider = DirDocProvider(path)
-            slug, ext = os.path.splitext(fname)
-
-            if output_dir != '':
-                try:
-                    os.makedirs(output_dir)
-                except:
-                    pass
-            outfile = os.path.join(output_dir, slug + '.epub')
-            epub.transform(provider, file_path=main_input, output_file=outfile,
-                    cover=cover.PrestigioCover, flags=('less-advertising',))
-    except ParseError, e:
-        print '%(file)s:%(name)s:%(message)s' % {
-            'file': main_input,
-            'name': e.__class__.__name__,
-            'message': e.message
-        }
-
-
-def bookoteka(filenames, output_dir, verbose):
-    try:
-        for main_input in input_filenames:
-            if options.verbose:
-                print main_input
-            path, fname = os.path.realpath(main_input).rsplit('/', 1)
-            provider = DirDocProvider(path)
-            slug, ext = os.path.splitext(fname)
-
-            if output_dir != '':
-                try:
-                    os.makedirs(output_dir)
-                except:
-                    pass
-            outfile = os.path.join(output_dir, slug + '.epub')
-            epub.transform(provider, file_path=main_input, output_file=outfile,
-                    cover=cover.BookotekaCover)
-    except ParseError, e:
-        print '%(file)s:%(name)s:%(message)s' % {
-            'file': main_input,
-            'name': e.__class__.__name__,
-            'message': e.message
-        }
 
+from librarian import packagers
 
 
 if __name__ == '__main__':
@@ -150,10 +22,16 @@ if __name__ == '__main__':
                       help='specifies the directory for output')
     parser.add_option('--bookoteka', action='store_true', dest='bookoteka', default=False,
                       help='prepare files for Bookoteka')
+    parser.add_option('--gandalf', action='store_true', dest='gandalf', default=False,
+                      help='prepare EPUB files for Gandalf')
+    parser.add_option('--gandalf-pdf', action='store_true', dest='gandalf_pdf', default=False,
+                      help='prepare PDF files for Gandalf')
     parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False,
                       help='prepare files for Virtualo API')
     parser.add_option('--prestigio', action='store_true', dest='prestigio', default=False,
                       help='prepare files for Prestigio')
+    parser.add_option('--prestigio-pdf', action='store_true', dest='prestigio_pdf', default=False,
+                      help='prepare PDF files for Prestigio')
 
     options, input_filenames = parser.parse_args()
 
@@ -162,8 +40,14 @@ if __name__ == '__main__':
         exit(1)
 
     if options.bookoteka:
-        bookoteka(input_filenames, options.output_dir, options.verbose)
+        packagers.BookotekaEpubPackager.prepare(input_filenames, options.output_dir, options.verbose)
+    if options.gandalf:
+        packagers.GandalfEpubPackager.prepare(input_filenames, options.output_dir, options.verbose)
+    if options.gandalf_pdf:
+        packagers.GandalfPdfPackager.prepare(input_filenames, options.output_dir, options.verbose)
     if options.virtualo:
-        virtualo(input_filenames, options.output_dir, options.verbose)
+        packagers.VirtualoPackager.prepare(input_filenames, options.output_dir, options.verbose)
     if options.prestigio:
-        prestigio(input_filenames, options.output_dir, options.verbose)
+        packagers.PrestigioEpubPackager.prepare(input_filenames, options.output_dir, options.verbose)
+    if options.prestigio_pdf:
+        packagers.PrestigioPdfPackager.prepare(input_filenames, options.output_dir, options.verbose)