Minor fix in OAI-PMH.
[wolnelektury.git] / apps / catalogue / fields.py
index e19df9d..0ff2ca9 100644 (file)
 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-import datetime
-from functools import wraps
-
 from django.conf import settings
 from django.conf import settings
+from django.core.files import File
 from django.db import models
 from django.db.models.fields.files import FieldFile
 from django.db import models
 from django.db.models.fields.files import FieldFile
-from django.db.models import signals
-from django import forms
-from django.forms.widgets import flatatt
-from django.utils.encoding import smart_unicode
-from django.utils import simplejson as json
-from django.utils.html import escape
-from django.utils.safestring import mark_safe
-from django.utils.translation import ugettext_lazy as _
-
-
-class JSONEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, datetime.datetime):
-            return obj.strftime('%Y-%m-%d %H:%M:%S')
-        elif isinstance(obj, datetime.date):
-            return obj.strftime('%Y-%m-%d')
-        elif isinstance(obj, datetime.time):
-            return obj.strftime('%H:%M:%S')
-        return json.JSONEncoder.default(self, obj)
+from catalogue import app_settings
+from catalogue.constants import LANGUAGES_3TO2
+from catalogue.utils import remove_zip, truncate_html_words
+from celery.task import Task, task
+from celery.utils.log import get_task_logger
+from waiter.utils import clear_cache
 
 
+task_logger = get_task_logger(__name__)
 
 
-def dumps(data):
-    return JSONEncoder().encode(data)
 
 
+class EbookFieldFile(FieldFile):
+    """Represents contents of an ebook file field."""
 
 
-def loads(str):
-    return json.loads(str, encoding=settings.DEFAULT_CHARSET)
+    def build(self):
+        """Build the ebook immediately."""
+        return self.field.builder.build(self)
 
 
+    def build_delay(self):
+        """Builds the ebook in a delayed task."""
+        return self.field.builder.delay(self.instance, self.field.attname)
 
 
-class JSONFormField(forms.CharField):
-    widget = forms.Textarea
 
 
-    def clean(self, value):
-        try:
-            loads(value)
-            return value
-        except ValueError, e:
-            raise forms.ValidationError(_('Enter a valid JSON value. Error: %s') % e)
+class EbookField(models.FileField):
+    """Represents an ebook file field, attachable to a model."""
+    attr_class = EbookFieldFile
 
 
+    def __init__(self, format_name, *args, **kwargs):
+        super(EbookField, self).__init__(*args, **kwargs)
+        self.format_name = format_name
 
 
-class JSONField(models.TextField):
-    def formfield(self, **kwargs):
-        defaults = {'form_class': JSONFormField}
-        defaults.update(kwargs)
-        return super(JSONField, self).formfield(**defaults)
+    def deconstruct(self):
+        name, path, args, kwargs = super(EbookField, self).deconstruct()
+        args.insert(0, self.format_name)
+        return name, path, args, kwargs
 
 
-    def db_type(self, connection):
-        return 'text'
-
-    def get_internal_type(self):
-        return 'TextField'
+    @property
+    def builder(self):
+        """Finds a celery task suitable for the format of the field."""
+        return BuildEbook.for_format(self.format_name)
 
     def contribute_to_class(self, cls, name):
 
     def contribute_to_class(self, cls, name):
-        super(JSONField, self).contribute_to_class(cls, name)
-
-        def get_value(model_instance):
-            return loads(getattr(model_instance, self.attname, None))
-        setattr(cls, 'get_%s_value' % self.name, get_value)
-
-        def set_value(model_instance, json):
-            return setattr(model_instance, self.attname, dumps(json))
-        setattr(cls, 'set_%s_value' % self.name, set_value)
-
-
-class JQueryAutoCompleteWidget(forms.TextInput):
-    def __init__(self, options, *args, **kwargs):
-        self.options = dumps(options)
-        super(JQueryAutoCompleteWidget, self).__init__(*args, **kwargs)
-
-    def render_js(self, field_id, options):
-        return u'$(\'#%s\').autocomplete(%s).result(autocomplete_result_handler);' % (field_id, options)
-
-    def render(self, name, value=None, attrs=None):
-        final_attrs = self.build_attrs(attrs, name=name)
-        if value:
-            final_attrs['value'] = smart_unicode(value)
-
-        if not self.attrs.has_key('id'):
-            final_attrs['id'] = 'id_%s' % name
-
-        html = u'''<input type="text" %(attrs)s/>
-            <script type="text/javascript">//<!--
-            %(js)s//--></script>
-            ''' % {
-                'attrs': flatatt(final_attrs),
-                'js' : self.render_js(final_attrs['id'], self.options),
-            }
+        super(EbookField, self).contribute_to_class(cls, name)
+
+        def has(model_instance):
+            return bool(getattr(model_instance, self.attname, None))
+        has.__doc__ = None
+        has.__name__ = str("has_%s" % self.attname)
+        has.short_description = self.name
+        has.boolean = True
+        setattr(cls, 'has_%s' % self.attname, has)
+
+
+class BuildEbook(Task):
+    formats = {}
+
+    @classmethod
+    def register(cls, format_name):
+        """A decorator for registering subclasses for particular formats."""
+        def wrapper(builder):
+            cls.formats[format_name] = builder
+            return builder
+        return wrapper
+
+    @classmethod
+    def for_format(cls, format_name):
+        """Returns a celery task suitable for specified format."""
+        return cls.formats.get(format_name, BuildEbookTask)
+
+    @staticmethod
+    def transform(wldoc, fieldfile):
+        """Transforms an librarian.WLDocument into an librarian.OutputFile.
+
+        By default, it just calls relevant wldoc.as_??? method.
+
+        """
+        return getattr(wldoc, "as_%s" % fieldfile.field.format_name)()
+
+    def run(self, obj, field_name):
+        """Just run `build` on FieldFile, can't pass it directly to Celery."""
+        task_logger.info("%s -> %s" % (obj.slug, field_name))
+        ret = self.build(getattr(obj, field_name))
+        obj.flush_includes()
+        return ret
+
+    def build(self, fieldfile):
+        book = fieldfile.instance
+        out = self.transform(book.wldocument(), fieldfile)
+        fieldfile.save(None, File(open(out.get_filename())), save=False)
+        if book.pk is not None:
+            type(book).objects.filter(pk=book.pk).update(**{
+                fieldfile.field.attname: fieldfile
+            })
+        if fieldfile.field.format_name in app_settings.FORMAT_ZIPS:
+            remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name])
+# Don't decorate BuildEbook, because we want to subclass it.
+BuildEbookTask = task(BuildEbook, ignore_result=True)
+
+
+@BuildEbook.register('txt')
+@task(ignore_result=True)
+class BuildTxt(BuildEbook):
+    @staticmethod
+    def transform(wldoc, fieldfile):
+        return wldoc.as_text()
+
+
+@BuildEbook.register('pdf')
+@task(ignore_result=True)
+class BuildPdf(BuildEbook):
+    @staticmethod
+    def transform(wldoc, fieldfile):
+        return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
+            cover=True)
+
+    def build(self, fieldfile):
+        BuildEbook.build(self, fieldfile)
+        clear_cache(fieldfile.instance.slug)
+
+
+@BuildEbook.register('epub')
+@task(ignore_result=True)
+class BuildEpub(BuildEbook):
+    @staticmethod
+    def transform(wldoc, fieldfile):
+        return wldoc.as_epub(cover=True)
+
+
+@BuildEbook.register('html')
+@task(ignore_result=True)
+class BuildHtml(BuildEbook):
+    def build(self, fieldfile):
+        from django.core.files.base import ContentFile
+        from fnpdjango.utils.text.slughifi import slughifi
+        from sortify import sortify
+        from librarian import html
+        from catalogue.models import Fragment, Tag
+
+        book = fieldfile.instance
+
+        html_output = self.transform(
+                        book.wldocument(parse_dublincore=False),
+                        fieldfile)
+
+        # Delete old fragments, create from scratch if necessary.
+        book.fragments.all().delete()
+
+        if html_output:
+            meta_tags = list(book.tags.filter(
+                category__in=('author', 'epoch', 'genre', 'kind')))
+
+            lang = book.language
+            lang = LANGUAGES_3TO2.get(lang, lang)
+            if lang not in [ln[0] for ln in settings.LANGUAGES]:
+                lang = None
+
+            fieldfile.save(None, ContentFile(html_output.get_string()),
+                    save=False)
+            type(book).objects.filter(pk=book.pk).update(**{
+                fieldfile.field.attname: fieldfile
+            })
+
+            # Extract fragments
+            closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
+            for fragment in closed_fragments.values():
+                try:
+                    theme_names = [s.strip() for s in fragment.themes.split(',')]
+                except AttributeError:
+                    continue
+                themes = []
+                for theme_name in theme_names:
+                    if not theme_name:
+                        continue
+                    if lang == settings.LANGUAGE_CODE:
+                        # Allow creating themes if book in default language.
+                        tag, created = Tag.objects.get_or_create(
+                                            slug=slughifi(theme_name),
+                                            category='theme')
+                        if created:
+                            tag.name = theme_name
+                            setattr(tag, "name_%s" % lang, theme_name)
+                            tag.sort_key = sortify(theme_name.lower())
+                            tag.save()
+                        themes.append(tag)
+                    elif lang is not None:
+                        # Don't create unknown themes in non-default languages.
+                        try:
+                            tag = Tag.objects.get(category='theme',
+                                    **{"name_%s" % lang: theme_name})
+                        except Tag.DoesNotExist:
+                            pass
+                        else:
+                            themes.append(tag)
+                if not themes:
+                    continue
+
+                text = fragment.to_string()
+                short_text = truncate_html_words(text, 15)
+                if text == short_text:
+                    short_text = ''
+                new_fragment = Fragment.objects.create(anchor=fragment.id,
+                        book=book, text=text, short_text=short_text)
+
+                new_fragment.save()
+                new_fragment.tags = set(meta_tags + themes)
+            book.html_built.send(sender=type(self), instance=book)
+            return True
+        return False
+
+@BuildEbook.register('cover_thumb')
+@task(ignore_result=True)
+class BuildCoverThumb(BuildEbook):
+    @classmethod
+    def transform(cls, wldoc, fieldfile):
+        from librarian.cover import WLCover
+        return WLCover(wldoc.book_info, height=193).output_file()
 
 
-        return mark_safe(html)
-
-
-class JQueryAutoCompleteSearchWidget(JQueryAutoCompleteWidget):
-    def __init__(self, *args, **kwargs):
-        super(JQueryAutoCompleteSearchWidget, self).__init__(*args, **kwargs)
-
-    def render_js(self, field_id, options):
-        return u""
-    
-
-class JQueryAutoCompleteField(forms.CharField):
-    def __init__(self, source, options={}, *args, **kwargs):
-        if 'widget' not in kwargs:
-            options['source'] = source
-            kwargs['widget'] = JQueryAutoCompleteWidget(options)
-
-        super(JQueryAutoCompleteField, self).__init__(*args, **kwargs)
-
-
-class JQueryAutoCompleteSearchField(forms.CharField):
-    def __init__(self, options={}, *args, **kwargs):
-        if 'widget' not in kwargs:
-            kwargs['widget'] = JQueryAutoCompleteSearchWidget(options)
-
-        super(JQueryAutoCompleteSearchField, self).__init__(*args, **kwargs)
 
 
 class OverwritingFieldFile(FieldFile):
 
 
 class OverwritingFieldFile(FieldFile):
@@ -141,13 +241,3 @@ class OverwritingFieldFile(FieldFile):
 
 class OverwritingFileField(models.FileField):
     attr_class = OverwritingFieldFile
 
 class OverwritingFileField(models.FileField):
     attr_class = OverwritingFieldFile
-
-
-try:
-    # check for south
-    from south.modelsinspector import add_introspection_rules
-
-    add_introspection_rules([], ["^catalogue\.fields\.JSONField"])
-    add_introspection_rules([], ["^catalogue\.fields\.OverwritingFileField"])
-except ImportError:
-    pass