From: Radek Czajka Date: Tue, 21 Aug 2012 12:35:42 +0000 (+0200) Subject: - inheriting covers after parents, X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/1405761d13e0ba62db4b1f9a9d5fa1472142b6d0?ds=sidebyside;hp=--cc - inheriting covers after parents, - DRY: move build methods to relevant fields, simplify adding formats (in catalogue.constants) - add CATALOGUE_{DONT_BUILD,FORMAT_ZIPS} instead of many NO_BUILD_* and ALL_*_ZIP settings, move to catalogue.app_settings - catalogue.management.importbooks: one option for skipping formats - use override_settings in tests - minor test fixes --- 1405761d13e0ba62db4b1f9a9d5fa1472142b6d0 diff --git a/apps/catalogue/__init__.py b/apps/catalogue/__init__.py index e69de29bb..2d0d89dd1 100644 --- a/apps/catalogue/__init__.py +++ b/apps/catalogue/__init__.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import logging +from django.conf import settings as settings +from catalogue.utils import AppSettings + + +class Settings(AppSettings): + """Default settings for catalogue app.""" + DEFAULT_LANGUAGE = u'pol' + # PDF needs TeXML + XeLaTeX, MOBI needs Calibre. + DONT_BUILD = {'pdf', 'mobi'} + FORMAT_ZIPS = { + 'epub': 'wolnelektury_pl_epub', + 'pdf': 'wolnelektury_pl_pdf', + 'mobi': 'wolnelektury_pl_mobi', + 'fb2': 'wolnelektury_pl_fb2', + } + + def _more_DONT_BUILD(self, value): + for format_ in ['cover', 'pdf', 'epub', 'mobi', 'fb2', 'txt']: + attname = 'NO_BUILD_%s' % format_.upper() + if hasattr(settings, attname): + logging.warn("%s is deprecated, " + "use CATALOGUE_DONT_BUILD instead", attname) + if getattr(settings, attname): + value.add(format_) + else: + value.remove(format_) + return value + + def _more_FORMAT_ZIPS(self, value): + for format_ in ['epub', 'pdf', 'mobi', 'fb2']: + attname = 'ALL_%s_ZIP' % format_.upper() + if hasattr(settings, attname): + logging.warn("%s is deprecated, " + "use CATALOGUE_FORMAT_ZIPS[%s] instead", + attname, format_) + value[format_] = getattr(settings, attname) + return value + + +app_settings = Settings('CATALOGUE') diff --git a/apps/catalogue/constants.py b/apps/catalogue/constants.py index e1c92f8cd..1ad0b1b5e 100644 --- a/apps/catalogue/constants.py +++ b/apps/catalogue/constants.py @@ -10,3 +10,12 @@ LICENSES = { 'description': _('Creative Commons Attribution-ShareAlike 3.0 Unported'), }, } + +# Those will be generated only for books with own HTML. +EBOOK_FORMATS_WITHOUT_CHILDREN = ['txt', 'fb2'] +# Those will be generated for all books. +EBOOK_FORMATS_WITH_CHILDREN = ['pdf', 'epub', 'mobi'] +# Those will be generated when inherited cover changes. +EBOOK_FORMATS_WITH_COVERS = ['mobi'] + +EBOOK_FORMATS = EBOOK_FORMATS_WITHOUT_CHILDREN + EBOOK_FORMATS_WITH_CHILDREN diff --git a/apps/catalogue/fields.py b/apps/catalogue/fields.py index 5ab78eb03..2c4e4d57b 100644 --- a/apps/catalogue/fields.py +++ b/apps/catalogue/fields.py @@ -2,8 +2,182 @@ # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from django.conf import settings +from django.core.files import File from django.db import models from django.db.models.fields.files import FieldFile +from catalogue import app_settings +from catalogue.utils import remove_zip, truncate_html_words +from celery import Task +from celery.task import task +from waiter.utils import clear_cache + + +class EbookFieldFile(FieldFile): + """Represents contents of an ebook file field.""" + + def build(self): + """Build the ebook immediately.""" + return self.field.builder(self) + + def build_delay(self): + """Builds the ebook in a delayed task.""" + return self.field.builder.delay(self) + + +class EbookField(models.FileField): + """Represents an ebook file field, attachable to a model.""" + attr_class = EbookFieldFile + + def __init__(self, format_name, *args, **kwargs): + super(EbookField, self).__init__(*args, **kwargs) + self.format_name = format_name + + @property + def builder(self): + """Finds a celery task suitable for the format of the field.""" + return BuildEbook.for_format(self.format_name) + + def contribute_to_class(self, cls, name): + super(EbookField, self).contribute_to_class(cls, name) + + def has(model_instance): + return bool(getattr(model_instance, self.attname, None)) + has.__doc__ = None + has.__name__ = "has_%s" % self.attname + has.short_description = self.name + has.boolean = True + setattr(cls, 'has_%s' % self.attname, has) + + +class BuildEbook(Task): + formats = {} + + @classmethod + def register(cls, format_name): + """A decorator for registering subclasses for particular formats.""" + def wrapper(builder): + cls.formats[format_name] = builder + return builder + return wrapper + + @classmethod + def for_format(cls, format_name): + """Returns a celery task suitable for specified format.""" + return cls.formats.get(format_name, BuildEbookTask) + + @staticmethod + def transform(wldoc, fieldfile): + """Transforms an librarian.WLDocument into an librarian.OutputFile. + + By default, it just calls relevant wldoc.as_??? method. + + """ + return getattr(wldoc, "as_%s" % fieldfile.field.format_name)() + + def run(self, fieldfile): + book = fieldfile.instance + out = self.transform(book.wldocument(), fieldfile) + fieldfile.save(None, File(open(out.get_filename())), save=False) + if book.pk is not None: + type(book).objects.filter(pk=book.pk).update(**{ + fieldfile.field.attname: fieldfile + }) + if fieldfile.field.format_name in app_settings.FORMAT_ZIPS: + remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name]) +# Don't decorate BuildEbook, because we want to subclass it. +BuildEbookTask = task(BuildEbook, ignore_result=True) + + +@BuildEbook.register('txt') +@task(ignore_result=True) +class BuildTxt(BuildEbook): + @staticmethod + def transform(wldoc, fieldfile): + return wldoc.as_text() + + +@BuildEbook.register('pdf') +@task(ignore_result=True) +class BuildPdf(BuildEbook): + @staticmethod + def transform(wldoc, fieldfile): + return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS) + + def run(self, fieldfile): + BuildEbook.run(self, fieldfile) + clear_cache(fieldfile.instance.slug) + + +@BuildEbook.register('html') +@task(ignore_result=True) +class BuildHtml(BuildEbook): + def run(self, fieldfile): + from django.core.files.base import ContentFile + from slughifi import slughifi + from sortify import sortify + from librarian import html + from catalogue.models import Fragment, Tag + + book = fieldfile.instance + + meta_tags = list(book.tags.filter( + category__in=('author', 'epoch', 'genre', 'kind'))) + book_tag = book.book_tag() + + html_output = self.transform( + book.wldocument(parse_dublincore=False), + fieldfile) + if html_output: + fieldfile.save(None, ContentFile(html_output.get_string()), + save=False) + type(book).objects.filter(pk=book.pk).update(**{ + fieldfile.field.attname: fieldfile + }) + + # get ancestor l-tags for adding to new fragments + ancestor_tags = [] + p = book.parent + while p: + ancestor_tags.append(p.book_tag()) + p = p.parent + + # Delete old fragments and create them from scratch + book.fragments.all().delete() + # Extract fragments + closed_fragments, open_fragments = html.extract_fragments(fieldfile.path) + for fragment in closed_fragments.values(): + try: + theme_names = [s.strip() for s in fragment.themes.split(',')] + except AttributeError: + continue + themes = [] + for theme_name in theme_names: + if not theme_name: + continue + tag, created = Tag.objects.get_or_create( + slug=slughifi(theme_name), + category='theme') + if created: + tag.name = theme_name + tag.sort_key = sortify(theme_name.lower()) + tag.save() + themes.append(tag) + if not themes: + continue + + text = fragment.to_string() + short_text = truncate_html_words(text, 15) + if text == short_text: + short_text = '' + new_fragment = Fragment.objects.create(anchor=fragment.id, + book=book, text=text, short_text=short_text) + + new_fragment.save() + new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags) + book.html_built.send(sender=book) + return True + return False class OverwritingFieldFile(FieldFile): @@ -28,7 +202,14 @@ class OverwritingFileField(models.FileField): try: # check for south from south.modelsinspector import add_introspection_rules - - add_introspection_rules([], ["^catalogue\.fields\.OverwritingFileField"]) except ImportError: pass +else: + add_introspection_rules([ + ( + [EbookField], + [], + {'format_name': ('format_name', {})} + ) + ], ["^catalogue\.fields\.EbookField"]) + add_introspection_rules([], ["^catalogue\.fields\.OverwritingFileField"]) diff --git a/apps/catalogue/management/commands/importbooks.py b/apps/catalogue/management/commands/importbooks.py index b323edc49..637e2148f 100644 --- a/apps/catalogue/management/commands/importbooks.py +++ b/apps/catalogue/management/commands/importbooks.py @@ -22,14 +22,9 @@ class Command(BaseCommand): help='Verbosity level; 0=minimal output, 1=normal output, 2=all output'), make_option('-f', '--force', action='store_true', dest='force', default=False, help='Overwrite works already in the catalogue'), - make_option('-E', '--no-build-epub', action='store_false', dest='build_epub', default=True, - help='Don\'t build EPUB file'), - make_option('-M', '--no-build-mobi', action='store_false', dest='build_mobi', default=True, - help='Don\'t build MOBI file'), - make_option('-T', '--no-build-txt', action='store_false', dest='build_txt', default=True, - help='Don\'t build TXT file'), - make_option('-P', '--no-build-pdf', action='store_false', dest='build_pdf', default=True, - help='Don\'t build PDF file'), + make_option('-D', '--dont-build', dest='dont_build', + metavar="FORMAT,...", + help="Skip building specified formats"), make_option('-S', '--no-search-index', action='store_false', dest='search_index', default=True, help='Skip indexing imported works for search'), make_option('-w', '--wait-until', dest='wait_until', metavar='TIME', @@ -42,22 +37,25 @@ class Command(BaseCommand): def import_book(self, file_path, options): verbose = options.get('verbose') + if options.get('dont_build'): + dont_build = options.get('dont_build').lower().split(',') + else: + dont_build = None file_base, ext = os.path.splitext(file_path) book = Book.from_xml_file(file_path, overwrite=options.get('force'), - build_epub=options.get('build_epub'), - build_txt=options.get('build_txt'), - build_pdf=options.get('build_pdf'), - build_mobi=options.get('build_mobi'), - search_index=options.get('search_index'), - search_index_reuse=True, search_index_tags=False) + dont_build=dont_build, + search_index=options.get('search_index'), + search_index_reuse=True, + search_index_tags=False) for ebook_format in Book.ebook_formats: if os.path.isfile(file_base + '.' + ebook_format): getattr(book, '%s_file' % ebook_format).save( '%s.%s' % (book.slug, ebook_format), - File(file(file_base + '.' + ebook_format))) + File(file(file_base + '.' + ebook_format)), + save=False + ) if verbose: print "Importing %s.%s" % (file_base, ebook_format) - book.save() def import_picture(self, file_path, options): diff --git a/apps/catalogue/migrations/0005_auto__chg_field_book_pdf_file__chg_field_book_html_file__chg_field_boo.py b/apps/catalogue/migrations/0005_auto__chg_field_book_pdf_file__chg_field_book_html_file__chg_field_boo.py new file mode 100644 index 000000000..6f6bc810d --- /dev/null +++ b/apps/catalogue/migrations/0005_auto__chg_field_book_pdf_file__chg_field_book_html_file__chg_field_boo.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + + # Changing field 'Book.pdf_file' + db.alter_column('catalogue_book', 'pdf_file', self.gf('catalogue.fields.EbookField')(max_length=100, format_name='pdf')) + + # Changing field 'Book.html_file' + db.alter_column('catalogue_book', 'html_file', self.gf('catalogue.fields.EbookField')(max_length=100, format_name='html')) + + # Changing field 'Book.xml_file' + db.alter_column('catalogue_book', 'xml_file', self.gf('catalogue.fields.EbookField')(max_length=100, format_name='xml')) + + # Changing field 'Book.txt_file' + db.alter_column('catalogue_book', 'txt_file', self.gf('catalogue.fields.EbookField')(max_length=100, format_name='txt')) + + # Changing field 'Book.fb2_file' + db.alter_column('catalogue_book', 'fb2_file', self.gf('catalogue.fields.EbookField')(max_length=100, format_name='fb2')) + + # Changing field 'Book.mobi_file' + db.alter_column('catalogue_book', 'mobi_file', self.gf('catalogue.fields.EbookField')(max_length=100, format_name='mobi')) + + # Changing field 'Book.epub_file' + db.alter_column('catalogue_book', 'epub_file', self.gf('catalogue.fields.EbookField')(max_length=100, format_name='epub')) + + # Changing field 'Book.cover' + db.alter_column('catalogue_book', 'cover', self.gf('catalogue.fields.EbookField')(max_length=100, null=True, format_name='cover')) + + def backwards(self, orm): + + # Changing field 'Book.pdf_file' + db.alter_column('catalogue_book', 'pdf_file', self.gf('django.db.models.fields.files.FileField')(max_length=100)) + + # Changing field 'Book.html_file' + db.alter_column('catalogue_book', 'html_file', self.gf('django.db.models.fields.files.FileField')(max_length=100)) + + # Changing field 'Book.xml_file' + db.alter_column('catalogue_book', 'xml_file', self.gf('django.db.models.fields.files.FileField')(max_length=100)) + + # Changing field 'Book.txt_file' + db.alter_column('catalogue_book', 'txt_file', self.gf('django.db.models.fields.files.FileField')(max_length=100)) + + # Changing field 'Book.fb2_file' + db.alter_column('catalogue_book', 'fb2_file', self.gf('django.db.models.fields.files.FileField')(max_length=100)) + + # Changing field 'Book.mobi_file' + db.alter_column('catalogue_book', 'mobi_file', self.gf('django.db.models.fields.files.FileField')(max_length=100)) + + # Changing field 'Book.epub_file' + db.alter_column('catalogue_book', 'epub_file', self.gf('django.db.models.fields.files.FileField')(max_length=100)) + + # Changing field 'Book.cover' + db.alter_column('catalogue_book', 'cover', self.gf('django.db.models.fields.files.FileField')(max_length=100, null=True)) + + models = { + 'auth.group': { + 'Meta': {'object_name': 'Group'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), + 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) + }, + 'auth.permission': { + 'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, + 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) + }, + 'auth.user': { + 'Meta': {'object_name': 'User'}, + 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), + 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), + 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) + }, + 'catalogue.book': { + 'Meta': {'ordering': "('sort_key',)", 'object_name': 'Book'}, + '_related_info': ('jsonfield.fields.JSONField', [], {'null': 'True', 'blank': 'True'}), + 'changed_at': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'db_index': 'True', 'blank': 'True'}), + 'common_slug': ('django.db.models.fields.SlugField', [], {'max_length': '120'}), + 'cover': ('catalogue.fields.EbookField', [], {'max_length': '100', 'null': 'True', 'format_name': "'cover'", 'blank': 'True'}), + 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'db_index': 'True', 'blank': 'True'}), + 'description': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'epub_file': ('catalogue.fields.EbookField', [], {'default': "''", 'max_length': '100', 'format_name': "'epub'", 'blank': 'True'}), + 'extra_info': ('jsonfield.fields.JSONField', [], {'default': "'{}'"}), + 'fb2_file': ('catalogue.fields.EbookField', [], {'default': "''", 'max_length': '100', 'format_name': "'fb2'", 'blank': 'True'}), + 'gazeta_link': ('django.db.models.fields.CharField', [], {'max_length': '240', 'blank': 'True'}), + 'html_file': ('catalogue.fields.EbookField', [], {'default': "''", 'max_length': '100', 'format_name': "'html'", 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'language': ('django.db.models.fields.CharField', [], {'default': "'pol'", 'max_length': '3', 'db_index': 'True'}), + 'mobi_file': ('catalogue.fields.EbookField', [], {'default': "''", 'max_length': '100', 'format_name': "'mobi'", 'blank': 'True'}), + 'parent': ('django.db.models.fields.related.ForeignKey', [], {'blank': 'True', 'related_name': "'children'", 'null': 'True', 'to': "orm['catalogue.Book']"}), + 'parent_number': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'pdf_file': ('catalogue.fields.EbookField', [], {'default': "''", 'max_length': '100', 'format_name': "'pdf'", 'blank': 'True'}), + 'slug': ('django.db.models.fields.SlugField', [], {'unique': 'True', 'max_length': '120'}), + 'sort_key': ('django.db.models.fields.CharField', [], {'max_length': '120', 'db_index': 'True'}), + 'title': ('django.db.models.fields.CharField', [], {'max_length': '120'}), + 'txt_file': ('catalogue.fields.EbookField', [], {'default': "''", 'max_length': '100', 'format_name': "'txt'", 'blank': 'True'}), + 'wiki_link': ('django.db.models.fields.CharField', [], {'max_length': '240', 'blank': 'True'}), + 'xml_file': ('catalogue.fields.EbookField', [], {'default': "''", 'max_length': '100', 'format_name': "'xml'", 'blank': 'True'}) + }, + 'catalogue.bookmedia': { + 'Meta': {'ordering': "('type', 'name')", 'object_name': 'BookMedia'}, + 'book': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'media'", 'to': "orm['catalogue.Book']"}), + 'extra_info': ('jsonfield.fields.JSONField', [], {'default': "'{}'"}), + 'file': ('catalogue.fields.OverwritingFileField', [], {'max_length': '100'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': "'100'"}), + 'source_sha1': ('django.db.models.fields.CharField', [], {'max_length': '40', 'null': 'True', 'blank': 'True'}), + 'type': ('django.db.models.fields.CharField', [], {'max_length': "'100'"}), + 'uploaded_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}) + }, + 'catalogue.collection': { + 'Meta': {'ordering': "('title',)", 'object_name': 'Collection'}, + 'book_slugs': ('django.db.models.fields.TextField', [], {}), + 'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'slug': ('django.db.models.fields.SlugField', [], {'max_length': '120', 'primary_key': 'True'}), + 'title': ('django.db.models.fields.CharField', [], {'max_length': '120', 'db_index': 'True'}) + }, + 'catalogue.fragment': { + 'Meta': {'ordering': "('book', 'anchor')", 'object_name': 'Fragment'}, + 'anchor': ('django.db.models.fields.CharField', [], {'max_length': '120'}), + 'book': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'fragments'", 'to': "orm['catalogue.Book']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'short_text': ('django.db.models.fields.TextField', [], {}), + 'text': ('django.db.models.fields.TextField', [], {}) + }, + 'catalogue.tag': { + 'Meta': {'ordering': "('sort_key',)", 'unique_together': "(('slug', 'category'),)", 'object_name': 'Tag'}, + 'book_count': ('django.db.models.fields.IntegerField', [], {'null': 'True', 'blank': 'True'}), + 'category': ('django.db.models.fields.CharField', [], {'max_length': '50', 'db_index': 'True'}), + 'changed_at': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'db_index': 'True', 'blank': 'True'}), + 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'db_index': 'True', 'blank': 'True'}), + 'description': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'gazeta_link': ('django.db.models.fields.CharField', [], {'max_length': '240', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '50', 'db_index': 'True'}), + 'slug': ('django.db.models.fields.SlugField', [], {'max_length': '120'}), + 'sort_key': ('django.db.models.fields.CharField', [], {'max_length': '120', 'db_index': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'}), + 'wiki_link': ('django.db.models.fields.CharField', [], {'max_length': '240', 'blank': 'True'}) + }, + 'catalogue.tagrelation': { + 'Meta': {'unique_together': "(('tag', 'content_type', 'object_id'),)", 'object_name': 'TagRelation', 'db_table': "'catalogue_tag_relation'"}, + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'object_id': ('django.db.models.fields.PositiveIntegerField', [], {'db_index': 'True'}), + 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'items'", 'to': "orm['catalogue.Tag']"}) + }, + 'contenttypes.contenttype': { + 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, + 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) + } + } + + complete_apps = ['catalogue'] \ No newline at end of file diff --git a/apps/catalogue/models/book.py b/apps/catalogue/models/book.py index 5672853f6..e6f08a91a 100644 --- a/apps/catalogue/models/book.py +++ b/apps/catalogue/models/book.py @@ -3,7 +3,7 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import re -from django.conf import settings +from django.conf import settings as settings from django.core.cache import get_cache from django.db import models from django.db.models import permalink @@ -11,8 +11,11 @@ import django.dispatch from django.utils.datastructures import SortedDict from django.utils.translation import ugettext_lazy as _ import jsonfield +from catalogue import constants +from catalogue.fields import EbookField from catalogue.models import Tag, Fragment, BookMedia -from catalogue.utils import create_zip, split_tags, truncate_html_words, book_upload_path +from catalogue.utils import create_zip, split_tags, book_upload_path +from catalogue import app_settings from catalogue import tasks from newtagging import managers @@ -28,7 +31,7 @@ class Book(models.Model): unique=True) common_slug = models.SlugField(_('slug'), max_length=120, db_index=True) language = models.CharField(_('language code'), max_length=3, db_index=True, - default=settings.CATALOGUE_DEFAULT_LANGUAGE) + default=app_settings.DEFAULT_LANGUAGE) description = models.TextField(_('description'), blank=True) created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True) changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True) @@ -38,12 +41,13 @@ class Book(models.Model): wiki_link = models.CharField(blank=True, max_length=240) # files generated during publication - cover = models.FileField(_('cover'), upload_to=book_upload_path('png'), - null=True, blank=True) - ebook_formats = ['pdf', 'epub', 'mobi', 'fb2', 'txt'] + cover = EbookField('cover', _('cover'), + upload_to=book_upload_path('jpg'), null=True, blank=True) + ebook_formats = constants.EBOOK_FORMATS formats = ebook_formats + ['html', 'xml'] - parent = models.ForeignKey('self', blank=True, null=True, related_name='children') + parent = models.ForeignKey('self', blank=True, null=True, + related_name='children') _related_info = jsonfield.JSONField(blank=True, null=True, editable=False) @@ -152,106 +156,33 @@ class Book(models.Model): has_daisy_file.short_description = 'DAISY' has_daisy_file.boolean = True - def wldocument(self, parse_dublincore=True): + def wldocument(self, parse_dublincore=True, inherit=True): from catalogue.import_utils import ORMDocProvider from librarian.parser import WLDocument + if inherit and self.parent: + meta_fallbacks = self.parent.cover_info() + else: + meta_fallbacks = None + return WLDocument.from_file(self.xml_file.path, provider=ORMDocProvider(self), - parse_dublincore=parse_dublincore) - - def build_html(self): - from django.core.files.base import ContentFile - from slughifi import slughifi - from sortify import sortify - from librarian import html - - meta_tags = list(self.tags.filter( - category__in=('author', 'epoch', 'genre', 'kind'))) - book_tag = self.book_tag() - - html_output = self.wldocument(parse_dublincore=False).as_html() - if html_output: - self.html_file.save('%s.html' % self.slug, - ContentFile(html_output.get_string())) - - # get ancestor l-tags for adding to new fragments - ancestor_tags = [] - p = self.parent - while p: - ancestor_tags.append(p.book_tag()) - p = p.parent - - # Delete old fragments and create them from scratch - self.fragments.all().delete() - # Extract fragments - closed_fragments, open_fragments = html.extract_fragments(self.html_file.path) - for fragment in closed_fragments.values(): - try: - theme_names = [s.strip() for s in fragment.themes.split(',')] - except AttributeError: - continue - themes = [] - for theme_name in theme_names: - if not theme_name: - continue - tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme') - if created: - tag.name = theme_name - tag.sort_key = sortify(theme_name.lower()) - tag.save() - themes.append(tag) - if not themes: - continue - - text = fragment.to_string() - short_text = truncate_html_words(text, 15) - if text == short_text: - short_text = '' - new_fragment = Fragment.objects.create(anchor=fragment.id, book=self, - text=text, short_text=short_text) - - new_fragment.save() - new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags) - self.save() - self.html_built.send(sender=self) - return True - return False - - # Thin wrappers for builder tasks - def build_cover(self): - """(Re)builds the cover image.""" - return tasks.build_cover.delay(self.pk) - def build_pdf(self, *args, **kwargs): - """(Re)builds PDF.""" - return tasks.build_pdf.delay(self.pk, *args, **kwargs) - def build_epub(self, *args, **kwargs): - """(Re)builds EPUB.""" - return tasks.build_epub.delay(self.pk, *args, **kwargs) - def build_mobi(self, *args, **kwargs): - """(Re)builds MOBI.""" - return tasks.build_mobi.delay(self.pk, *args, **kwargs) - def build_fb2(self, *args, **kwargs): - """(Re)build FB2""" - return tasks.build_fb2.delay(self.pk, *args, **kwargs) - def build_txt(self, *args, **kwargs): - """(Re)builds TXT.""" - return tasks.build_txt.delay(self.pk, *args, **kwargs) + parse_dublincore=parse_dublincore, + meta_fallbacks=meta_fallbacks) @staticmethod def zip_format(format_): def pretty_file_name(book): return "%s/%s.%s" % ( - b.extra_info['author'], - b.slug, + book.extra_info['author'], + book.slug, format_) field_name = "%s_file" % format_ books = Book.objects.filter(parent=None).exclude(**{field_name: ""}) paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()] - return create_zip(paths, - getattr(settings, "ALL_%s_ZIP" % format_.upper())) + return create_zip(paths, app_settings.FORMAT_ZIPS[format_]) def zip_audiobooks(self, format_): bm = BookMedia.objects.filter(book=self, type=format_) @@ -291,8 +222,11 @@ class Book(models.Model): @classmethod def from_text_and_meta(cls, raw_file, book_info, overwrite=False, - build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, build_fb2=True, - search_index=True, search_index_tags=True, search_index_reuse=False): + dont_build=None, search_index=True, + search_index_tags=True, search_index_reuse=False): + if dont_build is None: + dont_build = set() + dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD)) # check for parts before we do anything children = [] @@ -312,12 +246,17 @@ class Book(models.Model): if created: book_shelves = [] + old_cover = None else: if not overwrite: raise Book.AlreadyExists(_('Book %s already exists') % ( book_slug)) # Save shelves for this book book_shelves = list(book.tags.filter(category='set')) + old_cover = book.cover_info() + + # Save XML file + book.xml_file.save('%s.xml' % book.slug, raw_file, save=False) book.language = book_info.language book.title = book_info.title @@ -332,44 +271,41 @@ class Book(models.Model): book.tags = set(meta_tags + book_shelves) - obsolete_children = set(b for b in book.children.all() if b not in children) + cover_changed = old_cover != book.cover_info() + obsolete_children = set(b for b in book.children.all() + if b not in children) for n, child_book in enumerate(children): child_book.parent = book child_book.parent_number = n child_book.save() + if cover_changed: + child_book.parent_cover_changed() # Disown unfaithful children and let them cope on their own. for child in obsolete_children: child.parent = None child.parent_number = 0 child.save() tasks.fix_tree_tags.delay(child) - - # Save XML file - book.xml_file.save('%s.xml' % book.slug, raw_file, save=False) + if old_cover: + child.parent_cover_changed() # delete old fragments when overwriting book.fragments.all().delete() # Build HTML, fix the tree tags, build cover. - has_own_text = bool(book.build_html()) + has_own_text = bool(book.html_file.build()) tasks.fix_tree_tags.delay(book) - book.build_cover(book_info) + if 'cover' not in dont_build: + book.cover.build_delay() # No saves behind this point. if has_own_text: - if not settings.NO_BUILD_TXT and build_txt: - book.build_txt() - if not settings.NO_BUILD_FB2 and build_fb2: - book.build_fb2() - - if not settings.NO_BUILD_EPUB and build_epub: - book.build_epub() - - if not settings.NO_BUILD_PDF and build_pdf: - book.build_pdf() - - if not settings.NO_BUILD_MOBI and build_mobi: - book.build_mobi() + for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN: + if format_ not in dont_build: + getattr(book, '%s_file' % format_).build_delay() + for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN: + if format_ not in dont_build: + getattr(book, '%s_file' % format_).build_delay() if not settings.NO_SEARCH_INDEX and search_index: book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse) @@ -394,7 +330,8 @@ class Book(models.Model): sub_parent_tags = parent_tags + [book.book_tag()] for frag in book.fragments.all(): affected_tags.update(frag.tags) - frag.tags = list(frag.tags.exclude(category='book')) + sub_parent_tags + frag.tags = list(frag.tags.exclude(category='book') + ) + sub_parent_tags for child in book.children.all(): affected_tags.update(fix_subtree(child, sub_parent_tags)) return affected_tags @@ -415,6 +352,36 @@ class Book(models.Model): book.reset_theme_counter() book = book.parent + def cover_info(self, inherit=True): + """Returns a dictionary to serve as fallback for BookInfo. + + For now, the only thing inherited is the cover image. + """ + need = False + info = {} + for field in ('cover_url', 'cover_by', 'cover_source'): + val = self.extra_info.get(field) + if val: + info[field] = val + else: + need = True + if inherit and need and self.parent is not None: + parent_info = self.parent.cover_info() + parent_info.update(info) + info = parent_info + return info + + def parent_cover_changed(self): + """Called when parent book's cover image is changed.""" + if not self.cover_info(inherit=False): + if 'cover' not in app_settings.DONT_BUILD: + self.cover.build_delay() + for format_ in constants.EBOOK_FORMATS_WITH_COVERS: + if format_ not in app_settings.DONT_BUILD: + getattr(self, '%s_file' % format_).build_delay() + for child in self.children.all(): + child.parent_cover_changed() + def related_info(self): """Keeps info about related objects (tags, media) in cache field.""" if self._related_info is not None: @@ -607,20 +574,9 @@ class Book(models.Model): return None -def _has_factory(ftype): - has = lambda self: bool(getattr(self, "%s_file" % ftype)) - has.short_description = ftype.upper() - has.__doc__ = None - has.boolean = True - has.__name__ = "has_%s_file" % ftype - return has - - # add the file fields -for t in Book.formats: - field_name = "%s_file" % t - models.FileField(_("%s file" % t.upper()), - upload_to=book_upload_path(t), - blank=True).contribute_to_class(Book, field_name) - - setattr(Book, "has_%s_file" % t, _has_factory(t)) +for format_ in Book.formats: + field_name = "%s_file" % format_ + EbookField(format_, _("%s file" % format_.upper()), + upload_to=book_upload_path(format_), + blank=True, default='').contribute_to_class(Book, field_name) diff --git a/apps/catalogue/tasks.py b/apps/catalogue/tasks.py index f1d8a9005..af00c35bd 100644 --- a/apps/catalogue/tasks.py +++ b/apps/catalogue/tasks.py @@ -34,75 +34,6 @@ def index_book(book_id, book_info=None): raise e -def _build_ebook(book_id, ext, transform): - """Generic ebook builder.""" - from django.core.files import File - from catalogue.models import Book - - book = Book.objects.get(pk=book_id) - out = transform(book.wldocument()) - field_name = '%s_file' % ext - # Update instead of saving the model to avoid race condition. - getattr(book, field_name).save('%s.%s' % (book.slug, ext), - File(open(out.get_filename())), - save=False - ) - Book.objects.filter(pk=book_id).update(**{ - field_name: getattr(book, field_name) - }) - - -@task(ignore_result=True) -def build_txt(book_id): - """(Re)builds the TXT file for a book.""" - _build_ebook(book_id, 'txt', lambda doc: doc.as_text()) - - -@task(ignore_result=True, rate_limit=settings.CATALOGUE_PDF_RATE_LIMIT) -def build_pdf(book_id): - """(Re)builds the pdf file for a book.""" - from catalogue.models import Book - from catalogue.utils import remove_zip - from waiter.utils import clear_cache - - _build_ebook(book_id, 'pdf', - lambda doc: doc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)) - # Remove cached downloadables - remove_zip(settings.ALL_PDF_ZIP) - book = Book.objects.get(pk=book_id) - clear_cache(book.slug) - - -@task(ignore_result=True, rate_limit=settings.CATALOGUE_EPUB_RATE_LIMIT) -def build_epub(book_id): - """(Re)builds the EPUB file for a book.""" - from catalogue.utils import remove_zip - - _build_ebook(book_id, 'epub', lambda doc: doc.as_epub()) - # remove zip with all epub files - remove_zip(settings.ALL_EPUB_ZIP) - - -@task(ignore_result=True, rate_limit=settings.CATALOGUE_MOBI_RATE_LIMIT) -def build_mobi(book_id): - """(Re)builds the MOBI file for a book.""" - from catalogue.utils import remove_zip - - _build_ebook(book_id, 'mobi', lambda doc: doc.as_mobi()) - # remove zip with all mobi files - remove_zip(settings.ALL_MOBI_ZIP) - - -@task(ignore_result=True, rate_limit=settings.CATALOGUE_FB2_RATE_LIMIT) -def build_fb2(book_id, *args, **kwargs): - """(Re)builds the FB2 file for a book.""" - from catalogue.utils import remove_zip - - _build_ebook(book_id, 'fb2', lambda doc: doc.as_fb2()) - # remove zip with all fb2 files - remove_zip(settings.ALL_FB2_ZIP) - - @task(ignore_result=True, rate_limit=settings.CATALOGUE_CUSTOMPDF_RATE_LIMIT) def build_custom_pdf(book_id, customizations, file_name): """Builds a custom PDF file.""" @@ -116,20 +47,3 @@ def build_custom_pdf(book_id, customizations, file_name): customizations=customizations, morefloats=settings.LIBRARIAN_PDF_MOREFLOATS) DefaultStorage().save(file_name, File(open(pdf.get_filename()))) - - -@task(ignore_result=True) -def build_cover(book_id): - """(Re)builds the cover image.""" - from StringIO import StringIO - from django.core.files.base import ContentFile - from librarian.cover import WLCover - from catalogue.models import Book - - book = Book.objects.get(pk=book_id) - book_info = book.wldocument().book_info - cover = WLCover(book_info).image() - imgstr = StringIO() - cover.save(imgstr, 'png') - book.cover.save(None, ContentFile(imgstr.getvalue()), save=False) - Book.objects.filter(pk=book_id).update(cover=book.cover) diff --git a/apps/catalogue/test_utils.py b/apps/catalogue/test_utils.py index f42818f7e..59e51382a 100644 --- a/apps/catalogue/test_utils.py +++ b/apps/catalogue/test_utils.py @@ -1,31 +1,30 @@ -from django.conf import settings +# -*- coding: utf-8 -*- +# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# from django.test import TestCase -import shutil +from django.test.utils import override_settings import tempfile from slughifi import slughifi from librarian import WLURI +@override_settings( + MEDIA_ROOT=tempfile.mkdtemp(prefix='djangotest_'), + CATALOGUE_DONT_BUILD={'pdf', 'mobi', 'epub', 'txt', 'fb2', 'cover'}, + NO_SEARCH_INDEX = True, + CELERY_ALWAYS_EAGER = True, + CACHES={ + 'api': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}, + 'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}, + 'permanent': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}, + }, +) class WLTestCase(TestCase): """ Generic base class for tests. Adds settings freeze and clears MEDIA_ROOT. """ longMessage = True - def setUp(self): - self._MEDIA_ROOT, settings.MEDIA_ROOT = settings.MEDIA_ROOT, tempfile.mkdtemp(prefix='djangotest_') - settings.NO_SEARCH_INDEX = settings.NO_BUILD_PDF = settings.NO_BUILD_MOBI = settings.NO_BUILD_EPUB = settings.NO_BUILD_TXT = settings.NO_BUILD_FB2 = True - settings.CELERY_ALWAYS_EAGER = True - self._CACHES, settings.CACHES = settings.CACHES, { - 'default': { - 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', - } - } - - def tearDown(self): - shutil.rmtree(settings.MEDIA_ROOT, True) - settings.MEDIA_ROOT = self._MEDIA_ROOT - settings.CACHES = self._CACHES - class PersonStub(object): diff --git a/apps/catalogue/tests/book_import.py b/apps/catalogue/tests/book_import.py index 09d0e1e79..76061d05a 100644 --- a/apps/catalogue/tests/book_import.py +++ b/apps/catalogue/tests/book_import.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import with_statement +from django.conf import settings from django.core.files.base import ContentFile, File from catalogue.test_utils import * @@ -418,7 +418,7 @@ class BookImportGenerateTest(WLTestCase): self.book = models.Book.from_xml_file(xml) def test_gen_pdf(self): - self.book.build_pdf() + self.book.pdf_file.build() book = models.Book.objects.get(pk=self.book.pk) self.assertTrue(path.exists(book.pdf_file.path)) @@ -426,7 +426,7 @@ class BookImportGenerateTest(WLTestCase): """This book contains a child.""" xml = path.join(path.dirname(__file__), "files/fraszki.xml") parent = models.Book.from_xml_file(xml) - parent.build_pdf() + parent.pdf_file.build() parent = models.Book.objects.get(pk=parent.pk) self.assertTrue(path.exists(parent.pdf_file.path)) diff --git a/apps/catalogue/tests/bookmedia.py b/apps/catalogue/tests/bookmedia.py index 5d2ba66c3..da427e8b5 100644 --- a/apps/catalogue/tests/bookmedia.py +++ b/apps/catalogue/tests/bookmedia.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from django.conf import settings from os.path import basename, exists, join, dirname from django.core.files.base import ContentFile, File diff --git a/apps/catalogue/utils.py b/apps/catalogue/utils.py index bda7e0475..1ac0ee8d8 100644 --- a/apps/catalogue/utils.py +++ b/apps/catalogue/utils.py @@ -276,3 +276,27 @@ def clear_custom_pdf(book): """ from waiter.utils import clear_cache clear_cache('book/%s' % book.slug) + + +class AppSettings(object): + """Allows specyfying custom settings for an app, with default values. + + Just subclass, set some properties and instantiate with a prefix. + Getting a SETTING from an instance will check for prefix_SETTING + in project settings if set, else take the default. The value will be + then filtered through _more_SETTING method, if there is one. + + """ + def __init__(self, prefix): + self._prefix = prefix + + def __getattribute__(self, name): + if name.startswith('_'): + return object.__getattribute__(self, name) + value = getattr(settings, + "%s_%s" % (self._prefix, name), + object.__getattribute__(self, name)) + more = "_more_%s" % name + if hasattr(self, more): + value = getattr(self, more)(value) + return value diff --git a/apps/picture/tests/picture_import.py b/apps/picture/tests/picture_import.py index 202acdd65..785aa6d28 100644 --- a/apps/picture/tests/picture_import.py +++ b/apps/picture/tests/picture_import.py @@ -2,11 +2,11 @@ from __future__ import with_statement from os import path -from django.test import TestCase from picture.models import Picture +from catalogue.test_utils import WLTestCase -class PictureTest(TestCase): +class PictureTest(WLTestCase): def test_import(self): picture = Picture.from_xml_file(path.join(path.dirname(__file__), "files/kandinsky-composition-viii.xml")) diff --git a/apps/search/tests/index.py b/apps/search/tests/index.py index 738288892..5155a84e4 100644 --- a/apps/search/tests/index.py +++ b/apps/search/tests/index.py @@ -1,33 +1,24 @@ # -*- coding: utf-8 -*- - -from __future__ import with_statement - from django.conf import settings -from search import Index, Search, IndexStore, JVM, SearchResult -from catalogue import models +from django.test.utils import override_settings from catalogue.test_utils import WLTestCase from lucene import PolishAnalyzer, Version -#from nose.tools import raises from os import path +import tempfile +from catalogue import models +from search import Search, SearchResult +@override_settings( + SEARCH_INDEX = tempfile.mkdtemp(prefix='djangotest_search_'), +) class BookSearchTests(WLTestCase): def setUp(self): - JVM.attachCurrentThread() WLTestCase.setUp(self) - settings.NO_SEARCH_INDEX = False - settings.SEARCH_INDEX = path.join(settings.MEDIA_ROOT, 'search') txt = path.join(path.dirname(__file__), 'files/fraszka-do-anusie.xml') - self.book = models.Book.from_xml_file(txt) - - index = Index() - index.open() - try: - index.index_book(self.book) - except: - index.close() - + with self.settings(NO_SEARCH_INDEX=False): + self.book = models.Book.from_xml_file(txt) self.search = Search() def test_search_perfect_book_author(self): diff --git a/lib/librarian b/lib/librarian index 17a9ed3b7..cbe81ee35 160000 --- a/lib/librarian +++ b/lib/librarian @@ -1 +1 @@ -Subproject commit 17a9ed3b7ef12e0786ddf46bf8a52b1087224762 +Subproject commit cbe81ee35b07783b4f52c3d3dda83db7aaf82d34 diff --git a/wolnelektury/settings/custom.py b/wolnelektury/settings/custom.py index 6bf5a8806..446c730ee 100644 --- a/wolnelektury/settings/custom.py +++ b/wolnelektury/settings/custom.py @@ -5,29 +5,12 @@ API_WAIT = 10 MAX_TAG_LIST = 6 NO_SEARCH_INDEX = False -NO_BUILD_EPUB = False -NO_BUILD_TXT = False -NO_BUILD_FB2 = False -# You'll need XeLaTeX to generate PDF files. -NO_BUILD_PDF = True NO_CUSTOM_PDF = True -# You'll need Calibre installed to generate MOBI files. -NO_BUILD_MOBI = True - - -ALL_EPUB_ZIP = 'wolnelektury_pl_epub' -ALL_PDF_ZIP = 'wolnelektury_pl_pdf' -ALL_MOBI_ZIP = 'wolnelektury_pl_mobi' -ALL_FB2_ZIP = 'wolnelektury_pl_fb2' CATALOGUE_DEFAULT_LANGUAGE = 'pol' PUBLISH_PLAN_FEED = 'http://redakcja.wolnelektury.pl/documents/track/editor-proofreading/?published=false' # limit rate for ebooks creation -CATALOGUE_PDF_RATE_LIMIT = '1/m' -CATALOGUE_EPUB_RATE_LIMIT = '6/m' -CATALOGUE_FB2_RATE_LIMIT = '5/m' -CATALOGUE_MOBI_RATE_LIMIT = '5/m' CATALOGUE_CUSTOMPDF_RATE_LIMIT = '1/m' # set to 'new' or 'old' to skip time-consuming test