Reorganizing partner code a little.
authorRadek Czajka <radekczajka@nowoczesnapolska.org.pl>
Thu, 20 Feb 2014 14:10:10 +0000 (15:10 +0100)
committerRadek Czajka <radekczajka@nowoczesnapolska.org.pl>
Thu, 20 Feb 2014 14:10:10 +0000 (15:10 +0100)
librarian/packagers.py
librarian/partners.py [new file with mode: 0644]
scripts/book2partner

index ddfd7c8..a32a2a0 100644 (file)
@@ -4,9 +4,7 @@
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 import os
-from copy import deepcopy
-from lxml import etree
-from librarian import pdf, epub, DirDocProvider, ParseError, cover
+from librarian import pdf, epub, mobi, DirDocProvider, ParseError, cover
 from librarian.parser import WLDocument
 
 
@@ -15,7 +13,11 @@ class Packager(object):
     flags = None
 
     @classmethod
-    def prepare_file(cls, main_input, output_dir, verbose=False):
+    def transform(cls, *args, **kwargs):
+        return cls.converter.transform(*args, **kwargs)
+
+    @classmethod
+    def prepare_file(cls, main_input, output_dir, verbose=False, overwrite=False):
         path, fname = os.path.realpath(main_input).rsplit('/', 1)
         provider = DirDocProvider(path)
         slug, ext = os.path.splitext(fname)
@@ -26,20 +28,22 @@ class Packager(object):
             except:
                 pass
         outfile = os.path.join(output_dir, slug + '.' + cls.ext)
+        if os.path.exists(outfile) and not overwrite:
+            return
 
         doc = WLDocument.from_file(main_input, provider=provider)
-        output_file = cls.converter.transform(doc,
+        output_file = cls.transform(doc,
                 cover=cls.cover, flags=cls.flags)
         doc.save_output_file(output_file, output_path=outfile)
 
 
     @classmethod
-    def prepare(cls, input_filenames, output_dir='', verbose=False):
+    def prepare(cls, input_filenames, output_dir='', verbose=False, overwrite=False):
         try:
             for main_input in input_filenames:
                 if verbose:
                     print main_input
-                cls.prepare_file(main_input, output_dir, verbose)
+                cls.prepare_file(main_input, output_dir, verbose, overwrite)
         except ParseError, e:
             print '%(file)s:%(name)s:%(message)s' % {
                 'file': main_input,
@@ -52,105 +56,14 @@ class EpubPackager(Packager):
     converter = epub
     ext = 'epub'
 
+class MobiPackager(Packager):
+    converter = mobi
+    ext = 'mobi'
+
 class PdfPackager(Packager):
     converter = pdf
     ext = 'pdf'
 
-
-class GandalfEpubPackager(EpubPackager):
-    cover = cover.GandalfCover
-
-class GandalfPdfPackager(PdfPackager):
-    cover = cover.GandalfCover
-
-class BookotekaEpubPackager(EpubPackager):
-    cover = cover.BookotekaCover
-
-class PrestigioEpubPackager(EpubPackager):
-    cover = cover.PrestigioCover
-    flags = ('less-advertising',)
-
-class PrestigioPdfPackager(PdfPackager):
-    cover = cover.PrestigioCover
-    flags = ('less-advertising',)
-
-
-class VirtualoPackager(Packager):
-    @staticmethod
-    def utf_trunc(text, limit):
-        """ truncates text to at most `limit' bytes in utf-8 """
-        if text is None:
-            return text
-        if len(text.encode('utf-8')) > limit:
-            newlimit = limit - 3
-            while len(text.encode('utf-8')) > newlimit:
-                text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
-            text += '...'
-        return text
-
     @classmethod
-    def prepare(cls, input_filenames, output_dir='', verbose=False):
-        xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
-            <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
-        product = etree.fromstring("""<product>
-                <publisherProductId></publisherProductId>
-                <title></title>
-                <info></info>
-                <description></description>
-                <authors>
-                    <author>
-                        <names>Jan</names>
-                        <lastName>Kowalski</lastName>
-                    </author>
-                </authors>
-                <price>0.0</price>
-                <language>PL</language>
-            </product>""")
-
-        try:
-            for main_input in input_filenames:
-                if verbose:
-                    print main_input
-                path, fname = os.path.realpath(main_input).rsplit('/', 1)
-                provider = DirDocProvider(path)
-                slug, ext = os.path.splitext(fname)
-
-                outfile_dir = os.path.join(output_dir, slug)
-                os.makedirs(os.path.join(output_dir, slug))
-
-                doc = WLDocument.from_file(main_input, provider=provider)
-                info = doc.book_info
-
-                product_elem = deepcopy(product)
-                product_elem[0].text = cls.utf_trunc(slug, 100)
-                product_elem[1].text = cls.utf_trunc(info.title, 255)
-                product_elem[2].text = cls.utf_trunc(info.description, 255)
-                product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
-                product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
-                product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
-                xml.append(product_elem)
-
-                cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
-                outfile = os.path.join(outfile_dir, '1.epub')
-                outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
-                doc.save_output_file(doc.as_epub(),
-                        output_path=outfile)
-                doc.save_output_file(doc.as_epub(doc, sample=25), 
-                        output_path=outfile_sample)
-                outfile = os.path.join(outfile_dir, '1.mobi')
-                outfile_sample = os.path.join(outfile_dir, '1.sample.mobi')
-                doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover),
-                        output_path=outfile)
-                doc.save_output_file(
-                        doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), 
-                        output_path=outfile_sample)
-        except ParseError, e:
-            print '%(file)s:%(name)s:%(message)s' % {
-                'file': main_input,
-                'name': e.__class__.__name__,
-                'message': e.message
-            }
-
-        xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
-        xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
-        xml_file.close()
+    def transform(cls, *args, **kwargs):
+        return cls.converter.transform(*args, morefloats='new', **kwargs)
diff --git a/librarian/partners.py b/librarian/partners.py
new file mode 100644 (file)
index 0000000..58bc8c5
--- /dev/null
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+
+"""
+Classes for packaging ebooks for some old partners.
+These should be removed from librarian to separate package,
+along with custom cover images etc.
+
+New partners shouldn't be added here, but in the partners repository.
+"""
+
+from librarian import packagers, cover
+
+class GandalfEpub(packagers.EpubPackager):
+    cover = cover.GandalfCover
+
+class GandalfPdf(packagers.PdfPackager):
+    cover = cover.GandalfCover
+
+class BookotekaEpub(packagers.EpubPackager):
+    cover = cover.BookotekaCover
+
+class PrestigioEpub(packagers.EpubPackager):
+    cover = cover.PrestigioCover
+    flags = ('less-advertising',)
+
+class PrestigioPdf(packagers.PdfPackager):
+    cover = cover.PrestigioCover
+    flags = ('less-advertising',)
+
+
+class Virtualo(packagers.Packager):
+    @staticmethod
+    def utf_trunc(text, limit):
+        """ truncates text to at most `limit' bytes in utf-8 """
+        if text is None:
+            return text
+        if len(text.encode('utf-8')) > limit:
+            newlimit = limit - 3
+            while len(text.encode('utf-8')) > newlimit:
+                text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
+            text += '...'
+        return text
+
+    @classmethod
+    def prepare(cls, input_filenames, output_dir='', verbose=False):
+        from lxml import etree
+        from librarian import DirDocProvider, ParseError
+        from librarian.parser import WLDocument
+        from copy import deepcopy
+        import os
+        import os.path
+
+        xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
+            <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
+        product = etree.fromstring("""<product>
+                <publisherProductId></publisherProductId>
+                <title></title>
+                <info></info>
+                <description></description>
+                <authors>
+                    <author>
+                        <names>Jan</names>
+                        <lastName>Kowalski</lastName>
+                    </author>
+                </authors>
+                <price>0.0</price>
+                <language>PL</language>
+            </product>""")
+
+        try:
+            for main_input in input_filenames:
+                if verbose:
+                    print main_input
+                path, fname = os.path.realpath(main_input).rsplit('/', 1)
+                provider = DirDocProvider(path)
+                slug, ext = os.path.splitext(fname)
+
+                outfile_dir = os.path.join(output_dir, slug)
+                os.makedirs(os.path.join(output_dir, slug))
+
+                doc = WLDocument.from_file(main_input, provider=provider)
+                info = doc.book_info
+
+                product_elem = deepcopy(product)
+                product_elem[0].text = cls.utf_trunc(slug, 100)
+                product_elem[1].text = cls.utf_trunc(info.title, 255)
+                product_elem[2].text = cls.utf_trunc(info.description, 255)
+                product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
+                product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
+                product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
+                xml.append(product_elem)
+
+                cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
+                outfile = os.path.join(outfile_dir, '1.epub')
+                outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
+                doc.save_output_file(doc.as_epub(),
+                        output_path=outfile)
+                doc.save_output_file(doc.as_epub(doc, sample=25),
+                        output_path=outfile_sample)
+                outfile = os.path.join(outfile_dir, '1.mobi')
+                outfile_sample = os.path.join(outfile_dir, '1.sample.mobi')
+                doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover),
+                        output_path=outfile)
+                doc.save_output_file(
+                        doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
+                        output_path=outfile_sample)
+        except ParseError, e:
+            print '%(file)s:%(name)s:%(message)s' % {
+                'file': main_input,
+                'name': e.__class__.__name__,
+                'message': e.message
+            }
+
+        xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
+        xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
+        xml_file.close()
index 4b84c2f..3534d2a 100755 (executable)
@@ -4,9 +4,18 @@
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
+import inspect
 import optparse
 
 from librarian import packagers
+try:
+    from collections import OrderedDict
+except ImportError:
+    try:
+        from django.utils.datastructures import SortedDict
+        OrderedDict = SortedDict
+    except ImportError:
+        OrderedDict = dict
 
 
 if __name__ == '__main__':
@@ -20,34 +29,53 @@ if __name__ == '__main__':
         help='print status messages to stdout')
     parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='',
                       help='specifies the directory for output')
-    parser.add_option('--bookoteka', action='store_true', dest='bookoteka', default=False,
-                      help='prepare files for Bookoteka')
-    parser.add_option('--gandalf', action='store_true', dest='gandalf', default=False,
-                      help='prepare EPUB files for Gandalf')
-    parser.add_option('--gandalf-pdf', action='store_true', dest='gandalf_pdf', default=False,
-                      help='prepare PDF files for Gandalf')
-    parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False,
-                      help='prepare files for Virtualo API')
-    parser.add_option('--prestigio', action='store_true', dest='prestigio', default=False,
-                      help='prepare files for Prestigio')
-    parser.add_option('--prestigio-pdf', action='store_true', dest='prestigio_pdf', default=False,
-                      help='prepare PDF files for Prestigio')
+    parser.add_option('-f', '--force-overwrite',
+        action='store_true',
+        dest='overwrite',
+        default=False,
+        help='rebuild the file even if it already exists')
+    parser.add_option('-p', '--packages',
+        dest='packages',
+        metavar='package,...',
+        default='',
+        help='lists packages to build')
+    parser.add_option('-m', '--packages-modules',
+        dest='packages_modules', metavar='path.to.module,...',
+        default='librarian.partners',
+        help='modules with packages definitions to load')
+    parser.add_option('-l', '--list-packages',
+        action='store_true',
+        dest='list_packages',
+        default=False,
+        help='lists available packages')
 
     options, input_filenames = parser.parse_args()
+    packages = OrderedDict()
 
-    if len(input_filenames) < 1:
+    for module_name in options.packages_modules.split(','):
+        if not module_name:
+            continue
+        module = __import__(module_name, globals(), locals(), ['*'])
+        for package_name in dir(module):
+            package = getattr(module, package_name)
+            if inspect.isclass(package) and issubclass(package, packagers.Packager):
+                packages[package_name] = package
+    if not packages:
+        print 'No packages found!'
+
+    if options.list_packages:
+        print 'Available packages:'
+        for package_name, package in packages.items():
+            print ' ', package_name
+        exit(0)
+
+    if len(input_filenames) < 1 or not options.packages:
         parser.print_help()
         exit(1)
 
-    if options.bookoteka:
-        packagers.BookotekaEpubPackager.prepare(input_filenames, options.output_dir, options.verbose)
-    if options.gandalf:
-        packagers.GandalfEpubPackager.prepare(input_filenames, options.output_dir, options.verbose)
-    if options.gandalf_pdf:
-        packagers.GandalfPdfPackager.prepare(input_filenames, options.output_dir, options.verbose)
-    if options.virtualo:
-        packagers.VirtualoPackager.prepare(input_filenames, options.output_dir, options.verbose)
-    if options.prestigio:
-        packagers.PrestigioEpubPackager.prepare(input_filenames, options.output_dir, options.verbose)
-    if options.prestigio_pdf:
-        packagers.PrestigioPdfPackager.prepare(input_filenames, options.output_dir, options.verbose)
+    used_packages = [packages[p] for p in options.packages.split(',')]
+    for package in used_packages:
+        if options.verbose:
+            print 'Package:', package.__name__
+        package.prepare(input_filenames,
+            options.output_dir, options.verbose, options.overwrite)