move a bunch of things to celery
authorRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Fri, 23 Mar 2012 16:27:35 +0000 (17:27 +0100)
committerRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Fri, 23 Mar 2012 16:27:35 +0000 (17:27 +0100)
apps/catalogue/models.py
apps/catalogue/tasks.py
apps/catalogue/utils.py
apps/catalogue/views.py
apps/dictionary/models.py
apps/waiter/migrations/0001_initial.py
apps/waiter/models.py
apps/waiter/templates/waiter/wait.html
apps/waiter/utils.py [new file with mode: 0644]
requirements.txt
wolnelektury/settings/custom.py

index f3b7b46..53caa95 100644 (file)
@@ -5,10 +5,9 @@
 from collections import namedtuple
 
 from django.db import models
-from django.db.models import permalink, Q
+from django.db.models import permalink
 import django.dispatch
 from django.core.cache import get_cache
-from django.core.files.storage import DefaultStorage
 from django.utils.translation import ugettext_lazy as _
 from django.contrib.auth.models import User
 from django.template.loader import render_to_string
@@ -16,7 +15,7 @@ from django.utils.datastructures import SortedDict
 from django.utils.safestring import mark_safe
 from django.utils.translation import get_language
 from django.core.urlresolvers import reverse
-from django.db.models.signals import post_save, m2m_changed, pre_delete, post_delete
+from django.db.models.signals import post_save, pre_delete, post_delete
 import jsonfield
 
 from django.conf import settings
@@ -25,12 +24,8 @@ from newtagging.models import TagBase, tags_updated
 from newtagging import managers
 from catalogue.fields import JSONField, OverwritingFileField
 from catalogue.utils import create_zip, split_tags, truncate_html_words
-from catalogue.tasks import touch_tag, index_book
-from shutil import copy
-from glob import glob
+from catalogue import tasks
 import re
-from os import path
-from waiter.settings import WAITER_ROOT
 
 import search
 
@@ -217,30 +212,6 @@ def book_upload_path(ext=None, maxlen=100):
     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
 
 
-def customizations_hash(customizations):
-    customizations.sort()
-    return hash(tuple(customizations))
-
-
-def get_customized_pdf_path(book, customizations):
-    """
-    Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
-    """
-    h = customizations_hash(customizations)
-    pdf_name = '%s/%s-custom-%s' % (book.slug, book.slug, h)
-    pdf_file = pdf_name + '.pdf'
-
-    return pdf_file
-
-
-def get_existing_customized_pdf(book):
-    """
-    Returns a list of paths to generated customized pdf of a book
-    """
-    pdf_glob = path.join(WAITER_ROOT, book.slug, '*')
-    return glob(pdf_glob)
-
-
 class BookMedia(models.Model):
     FileFormat = namedtuple("FileFormat", "name ext")
     formats = SortedDict([
@@ -503,70 +474,6 @@ class Book(models.Model):
         cover.save(imgstr, 'png')
         self.cover.save(None, ContentFile(imgstr.getvalue()))
 
-    def build_pdf(self, customizations=None, file_name=None):
-        """ (Re)builds the pdf file.
-        customizations - customizations which are passed to LaTeX class file.
-        file_name - save the pdf file under a different name and DO NOT save it in db.
-        """
-        from os import unlink
-        from django.core.files import File
-        from catalogue.utils import remove_zip
-
-        pdf = self.wldocument().as_pdf(customizations=customizations,
-            morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)
-
-        if file_name is None:
-            # we'd like to be sure not to overwrite changes happening while
-            # (timely) pdf generation is taking place (async celery scenario)
-            current_self = Book.objects.get(id=self.id)
-            current_self.pdf_file.save('%s.pdf' % self.slug,
-                    File(open(pdf.get_filename())))
-            self.pdf_file = current_self.pdf_file
-
-            # remove cached downloadables
-            remove_zip(settings.ALL_PDF_ZIP)
-
-            for customized_pdf in get_existing_customized_pdf(self):
-                unlink(customized_pdf)
-        else:
-            print "saving %s" % file_name
-            print "to: %s" % DefaultStorage().path(file_name)
-            DefaultStorage().save(file_name, File(open(pdf.get_filename())))
-
-    def build_mobi(self):
-        """ (Re)builds the MOBI file.
-
-        """
-        from django.core.files import File
-        from catalogue.utils import remove_zip
-
-        mobi = self.wldocument().as_mobi()
-
-        self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi.get_filename())))
-
-        # remove zip with all mobi files
-        remove_zip(settings.ALL_MOBI_ZIP)
-
-    def build_epub(self):
-        """(Re)builds the epub file."""
-        from django.core.files import File
-        from catalogue.utils import remove_zip
-
-        epub = self.wldocument().as_epub()
-
-        self.epub_file.save('%s.epub' % self.slug,
-                File(open(epub.get_filename())))
-
-        # remove zip package with all epub files
-        remove_zip(settings.ALL_EPUB_ZIP)
-
-    def build_txt(self):
-        from django.core.files.base import ContentFile
-
-        text = self.wldocument().as_text()
-        self.txt_file.save('%s.txt' % self.slug, ContentFile(text.get_string()))
-
-
     def build_html(self):
         from django.core.files.base import ContentFile
         from slughifi import slughifi
@@ -636,15 +543,13 @@ class Book(models.Model):
         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
         paths = [(pretty_file_name(b), getattr(b, field_name).path)
                     for b in books]
-        result = create_zip.delay(paths,
+        return create_zip(paths,
                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
-        return result.wait()
 
     def zip_audiobooks(self, format_):
         bm = BookMedia.objects.filter(book=self, type=format_)
         paths = map(lambda bm: (None, bm.file.path), bm)
-        result = create_zip.delay(paths, "%s_%s" % (self.slug, format_))
-        return result.wait()
+        return create_zip(paths, "%s_%s" % (self.slug, format_))
 
     def search_index(self, book_info=None, reuse_index=False, index_tags=True):
         if reuse_index:
@@ -680,8 +585,6 @@ class Book(models.Model):
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
             search_index=True, search_index_tags=True, search_index_reuse=False):
-        import re
-        from sortify import sortify
 
         # check for parts before we do anything
         children = []
@@ -737,18 +640,18 @@ class Book(models.Model):
 
         if book.build_html():
             if not settings.NO_BUILD_TXT and build_txt:
-                book.build_txt()
+                tasks.build_txt.delay(book.pk)
 
         book.build_cover(book_info)
 
         if not settings.NO_BUILD_EPUB and build_epub:
-            book.build_epub()
+            tasks.build_epub.delay(book.pk)
 
         if not settings.NO_BUILD_PDF and build_pdf:
-            book.build_pdf()
+            tasks.build_pdf.delay(book.pk)
 
         if not settings.NO_BUILD_MOBI and build_mobi:
-            book.build_mobi()
+            tasks.build_mobi.delay(book.pk)
 
         if not settings.NO_SEARCH_INDEX and search_index:
             book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
@@ -767,7 +670,7 @@ class Book(models.Model):
             book_descendants += list(child_book.children.all())
 
         for tag in descendants_tags:
-            touch_tag(tag)
+            tasks.touch_tag(tag)
 
         book.save()
 
@@ -1055,7 +958,7 @@ def _tags_updated_handler(sender, affected_tags, **kwargs):
     # reset tag global counter
     # we want Tag.changed_at updated for API to know the tag was touched
     for tag in affected_tags:
-        touch_tag(tag)
+        tasks.touch_tag(tag)
 
     # if book tags changed, reset book tag counter
     if isinstance(sender, Book) and \
index e1ff915..6d19ee1 100755 (executable)
@@ -3,11 +3,12 @@
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from datetime import datetime
-from celery.task import task
-import catalogue.models
 from traceback import print_exc
+from celery.task import task
+from django.conf import settings
 
-@task
+
+# TODO: move to model?
 def touch_tag(tag):
     update_dict = {
         'book_count': tag.get_count(),
@@ -19,9 +20,97 @@ def touch_tag(tag):
 
 @task
 def index_book(book_id, book_info=None):
+    from catalogue.models import Book
     try:
-        return catalogue.models.Book.objects.get(id=book_id).search_index(book_info)
+        return Book.objects.get(id=book_id).search_index(book_info)
     except Exception, e:
         print "Exception during index: %s" % e
         print_exc()
         raise e
+
+
+@task(ignore_result=True)
+def build_txt(book_id):
+    """(Re)builds the TXT file for a book."""
+    from django.core.files.base import ContentFile
+    from catalogue.models import Book
+
+    text = Book.objects.get(pk=book_id).wldocument().as_text()
+
+    # Save the file in new instance. Building TXT takes time and we don't want
+    # to overwrite any interim changes.
+    book = Book.objects.get(id=book_id)
+    book.txt_file.save('%s.txt' % book.slug, ContentFile(text.get_string()))
+
+
+@task(ignore_result=True, rate_limit=settings.CATALOGUE_PDF_RATE_LIMIT)
+def build_pdf(book_id):
+    """(Re)builds the pdf file for a book."""
+    from django.core.files import File
+    from catalogue.models import Book
+    from catalogue.utils import remove_zip
+    from waiter.utils import clear_cache
+
+    pdf = Book.objects.get(pk=book_id).wldocument().as_pdf(
+            morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)
+
+    # Save the file in new instance. Building PDF takes time and we don't want
+    # to overwrite any interim changes.
+    book = Book.objects.get(id=book_id)
+    book.pdf_file.save('%s.pdf' % book.slug,
+             File(open(pdf.get_filename())))
+
+    # Remove cached downloadables
+    remove_zip(settings.ALL_PDF_ZIP)
+    clear_cache(book.slug)
+
+
+@task(ignore_result=True, rate_limit=settings.CATALOGUE_EPUB_RATE_LIMIT)
+def build_epub(book_id):
+    """(Re)builds the EPUB file for a book."""
+    from django.core.files import File
+    from catalogue.models import Book
+    from catalogue.utils import remove_zip
+
+    epub = Book.objects.get(pk=book_id).wldocument().as_epub()
+    # Save the file in new instance. Building MOBI takes time and we don't want
+    # to overwrite any interim changes.
+    book = Book.objects.get(id=book_id)
+    book.epub_file.save('%s.epub' % book.slug,
+             File(open(epub.get_filename())))
+
+    # remove zip with all epub files
+    remove_zip(settings.ALL_EPUB_ZIP)
+
+
+@task(ignore_result=True, rate_limit=settings.CATALOGUE_MOBI_RATE_LIMIT)
+def build_mobi(book_id):
+    """(Re)builds the MOBI file for a book."""
+    from django.core.files import File
+    from catalogue.models import Book
+    from catalogue.utils import remove_zip
+
+    mobi = Book.objects.get(pk=book_id).wldocument().as_mobi()
+    # Save the file in new instance. Building MOBI takes time and we don't want
+    # to overwrite any interim changes.
+    book = Book.objects.get(id=book_id)
+    book.mobi_file.save('%s.mobi' % book.slug,
+             File(open(mobi.get_filename())))
+
+    # remove zip with all mobi files
+    remove_zip(settings.ALL_MOBI_ZIP)
+
+
+@task(rate_limit=settings.CATALOGUE_CUSTOMPDF_RATE_LIMIT)
+def build_custom_pdf(book_id, customizations, file_name):
+    """Builds a custom PDF file."""
+    from django.core.files import File
+    from django.core.files.storage import DefaultStorage
+    from catalogue.models import Book
+
+    print "will gen %s" % DefaultStorage().path(file_name)
+    if not DefaultStorage().exists(file_name):
+        pdf = Book.objects.get(pk=book_id).wldocument().as_pdf(
+                customizations=customizations,
+                morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)
+        DefaultStorage().save(file_name, File(open(pdf.get_filename())))
index 949ac96..9de4eaa 100644 (file)
@@ -9,23 +9,18 @@ import re
 import time
 from base64 import urlsafe_b64encode
 
-from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
+from django.http import HttpResponse
 from django.core.files.uploadedfile import UploadedFile
-from django.core.files.base import File
 from django.core.files.storage import DefaultStorage
 from django.utils.encoding import force_unicode
 from django.utils.hashcompat import sha_constructor
 from django.conf import settings
-from celery.task import task
 from os import mkdir, path, unlink
 from errno import EEXIST, ENOENT
 from fcntl import flock, LOCK_EX
 from zipfile import ZipFile
-from traceback import print_exc
 
 from reporting.utils import read_chunks
-from celery.task import task
-import catalogue.models
 
 # Use the system (hardware-based) random number generator if it exists.
 if hasattr(random, 'SystemRandom'):
@@ -84,7 +79,7 @@ class LockFile(object):
         self.lock.close()
 
 
-@task
+#@task
 def create_zip(paths, zip_slug):
     """
     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
@@ -140,25 +135,6 @@ class AttachmentHttpResponse(HttpResponse):
             for chunk in read_chunks(f):
                 self.write(chunk)
 
-@task(rate_limit=settings.CATALOGUE_CUSTOMPDF_RATE_LIMIT)
-def async_build_pdf(book_id, customizations, file_name):
-    """
-    A celery task to generate pdf files.
-    Accepts the same args as Book.build_pdf, but with book id as first parameter
-    instead of Book instance
-    """
-    try:
-        book = catalogue.models.Book.objects.get(id=book_id)
-        print "will gen %s" % DefaultStorage().path(file_name)
-        if not DefaultStorage().exists(file_name):
-            book.build_pdf(customizations=customizations, file_name=file_name)
-        print "done."
-    except Exception, e:
-        print "Error during pdf creation: %s" % e
-        print_exc
-        raise e
-
-
 class MultiQuerySet(object):
     def __init__(self, *args, **kwargs):
         self.querysets = args
@@ -260,3 +236,24 @@ def truncate_html_words(s, num, end_text='...'):
         out += '</%s>' % tag
     # Return string
     return out
+
+
+def customizations_hash(customizations):
+    customizations.sort()
+    return hash(tuple(customizations))
+
+
+def get_customized_pdf_path(book, customizations):
+    """
+    Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
+    """
+    h = customizations_hash(customizations)
+    return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
+
+
+def clear_custom_pdf(book):
+    """
+    Returns a list of paths to generated customized pdf of a book
+    """
+    from waiter.utils import clear_cache
+    clear_cache('book/%s' % book.slug)
index 5dff961..eadaeca 100644 (file)
@@ -22,14 +22,13 @@ from ajaxable.utils import JSONResponse, AjaxableFormView
 
 from catalogue import models
 from catalogue import forms
-from catalogue.utils import (split_tags,
-    async_build_pdf, MultiQuerySet)
+from catalogue.utils import split_tags, MultiQuerySet, get_customized_pdf_path
+from catalogue.tasks import build_custom_pdf
 from pdcounter import models as pdcounter_models
 from pdcounter import views as pdcounter_views
 from suggest.forms import PublishingSuggestForm
 from picture.models import Picture
 
-from os import path
 from waiter.models import WaitedFile
 
 staff_required = user_passes_test(lambda user: user.is_staff)
@@ -539,10 +538,10 @@ def download_custom_pdf(request, slug, method='GET'):
         form = forms.CustomPDFForm(method == 'GET' and request.GET or request.POST)
         if form.is_valid():
             cust = form.customizations
-            pdf_file = models.get_customized_pdf_path(book, cust)
+            pdf_file = get_customized_pdf_path(book, cust)
 
             url = WaitedFile.order(pdf_file,
-                    lambda p: async_build_pdf.delay(book.id, cust, p),
+                    lambda p: build_custom_pdf.delay(book.id, cust, p),
                     book.pretty_title()
                 )
             return redirect(url)
index 1d2fbba..6238ccb 100644 (file)
@@ -3,7 +3,7 @@
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from django.db import models
-
+from celery.task import task
 from sortify import sortify
 
 from catalogue.models import Book
@@ -19,15 +19,17 @@ class Note(models.Model):
         ordering = ['sort_key']
 
 
-def notes_from_book(sender, **kwargs):
-    from librarian import html
-
-    Note.objects.filter(book=sender).delete()
-    if sender.html_file:
-        for anchor, text_str, html_str in html.extract_annotations(sender.html_file.path):
-            Note.objects.create(book=sender, anchor=anchor,
+@task(ignore_result=True)
+def build_notes(book_id):
+    book = Book.objects.get(pk=book_id)
+    Note.objects.filter(book=book).delete()
+    if book.html_file:
+        from librarian import html
+        for anchor, text_str, html_str in html.extract_annotations(book.html_file.path):
+            Note.objects.create(book=book, anchor=anchor,
                                html=html_str, 
                                sort_key=sortify(text_str).strip()[:128])
-
-# always re-extract notes after making a HTML in a Book
-Book.html_built.connect(notes_from_book)
+    
+@Book.html_built.connect
+def notes_from_book(sender, **kwargs):
+    build_notes.delat(sender)
index 062d6f8..1c27085 100644 (file)
@@ -12,7 +12,7 @@ class Migration(SchemaMigration):
         db.create_table('waiter_waitedfile', (
             ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
             ('path', self.gf('django.db.models.fields.CharField')(unique=True, max_length=255, db_index=True)),
-            ('task', self.gf('django.db.models.fields.CharField')(max_length=64, null=True)),
+            ('task', self.gf('picklefield.fields.PickledObjectField')(null=True)),
             ('description', self.gf('django.db.models.fields.CharField')(max_length=255, null=True, blank=True)),
         ))
         db.send_create_signal('waiter', ['WaitedFile'])
@@ -30,7 +30,7 @@ class Migration(SchemaMigration):
             'description': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
             'path': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255', 'db_index': 'True'}),
-            'task': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True'})
+            'task': ('picklefield.fields.PickledObjectField', [], {'null': 'True'})
         }
     }
 
index 26a9a6d..59eeea6 100644 (file)
@@ -1,22 +1,17 @@
-from os.path import join, abspath, exists
+from os.path import join, isfile
 from django.core.urlresolvers import reverse
 from django.db import models
-from waiter.settings import WAITER_ROOT, WAITER_URL
 from djcelery.models import TaskMeta
+from waiter.settings import WAITER_URL
+from waiter.utils import check_abspath
+from picklefield import PickledObjectField
 
 
 class WaitedFile(models.Model):
     path = models.CharField(max_length=255, unique=True, db_index=True)
-    task = models.CharField(max_length=64, null=True, editable=False)
+    task = PickledObjectField(null=True, editable=False)
     description = models.CharField(max_length=255, null=True, blank=True)
 
-    @staticmethod
-    def abspath(path):
-        abs_path = abspath(join(WAITER_ROOT, path))
-        if not abs_path.startswith(WAITER_ROOT):
-            raise ValueError('Path not inside WAITER_ROOT.')
-        return abs_path
-
     @classmethod
     def exists(cls, path):
         """Returns opened file or None.
@@ -24,10 +19,10 @@ class WaitedFile(models.Model):
         `path` is relative to WAITER_ROOT.
         Won't open a path leading outside of WAITER_ROOT.
         """
-        abs_path = cls.abspath(path)
+        abs_path = check_abspath(path)
         # Pre-fetch objects for deletion to avoid minor race condition
         relevant = [o.id for o in cls.objects.filter(path=path)]
-        if exists(abs_path):
+        if isfile(abs_path):
             cls.objects.filter(id__in=relevant).delete()
             return True
         else:
@@ -37,13 +32,7 @@ class WaitedFile(models.Model):
         if self.task is None:
             # Race; just let the other task roll. 
             return False
-        try:
-            meta = TaskMeta.objects.get(task_id=self.task)
-            assert meta.status in (u'PENDING', u'STARTED', u'SUCCESS', u'RETRY')
-        except TaskMeta.DoesNotExist:
-            # Might happen it's not yet there.
-            pass
-        except AssertionError:
+        if self.task.status not in (u'PENDING', u'STARTED', u'SUCCESS', u'RETRY'):
             return True
         return False
 
@@ -61,7 +50,7 @@ class WaitedFile(models.Model):
         if not already:
             waited, created = cls.objects.get_or_create(path=path)
             if created or waited.is_stale():
-                waited.task = task_creator(cls.abspath(path))
+                waited.task = task_creator(check_abspath(path))
                 waited.description = description
                 waited.save()
             return reverse("waiter", args=[path])
index a9efecd..e15bd64 100644 (file)
@@ -78,8 +78,8 @@ function wait() {
             else
                 setTimeout(wait, 10*1000);
         },
-        error: function() {
-            setTimeout(wait, 10*1000);
+        error: function(xhr) {
+            location.reload();
         }
     });
 }
diff --git a/apps/waiter/utils.py b/apps/waiter/utils.py
new file mode 100644 (file)
index 0000000..0957e9d
--- /dev/null
@@ -0,0 +1,17 @@
+from os.path import abspath, join, exists
+from shutil import rmtree
+from waiter.settings import WAITER_ROOT
+
+
+def check_abspath(path):
+    abs_path = abspath(join(WAITER_ROOT, path))
+    if not abs_path.startswith(WAITER_ROOT):
+        raise ValueError('Path not inside WAITER_ROOT.')
+    return abs_path
+
+
+def clear_cache(path):
+    abs_path = check_abspath(path)
+    if exists(abs_path):
+        rmtree(abs_path)
+    
index 81451db..3c85e31 100644 (file)
@@ -8,6 +8,7 @@ django-rosetta>=0.5.3
 django-maintenancemode>=0.9
 django-piston
 django-jsonfield
+django-picklefield
 django-allauth
 django-honeypot
 
index eed38a2..9abb5ed 100644 (file)
@@ -22,7 +22,10 @@ ALL_MOBI_ZIP = 'wolnelektury_pl_mobi'
 CATALOGUE_DEFAULT_LANGUAGE = 'pol'
 PUBLISH_PLAN_FEED = 'http://redakcja.wolnelektury.pl/documents/track/editor-proofreading/?published=false'
 
-# limit rate for custom PDF creation
+# limit rate for ebooks creation
+CATALOGUE_PDF_RATE_LIMIT = '1/m'
+CATALOGUE_EPUB_RATE_LIMIT = '6/m'
+CATALOGUE_MOBI_RATE_LIMIT = '5/m'
 CATALOGUE_CUSTOMPDF_RATE_LIMIT = '1/m'
 
 # set to 'new' or 'old' to skip time-consuming test