from collections import namedtuple
from django.db import models
-from django.db.models import permalink, Q
+from django.db.models import permalink
import django.dispatch
from django.core.cache import get_cache
-from django.core.files.storage import DefaultStorage
from django.utils.translation import ugettext_lazy as _
from django.contrib.auth.models import User
from django.template.loader import render_to_string
from django.utils.safestring import mark_safe
from django.utils.translation import get_language
from django.core.urlresolvers import reverse
-from django.db.models.signals import post_save, m2m_changed, pre_delete, post_delete
+from django.db.models.signals import post_save, pre_delete, post_delete
import jsonfield
from django.conf import settings
from newtagging import managers
from catalogue.fields import JSONField, OverwritingFileField
from catalogue.utils import create_zip, split_tags, truncate_html_words
-from catalogue.tasks import touch_tag, index_book
-from shutil import copy
-from glob import glob
+from catalogue import tasks
import re
-from os import path
-from waiter.settings import WAITER_ROOT
import search
return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
-def customizations_hash(customizations):
- customizations.sort()
- return hash(tuple(customizations))
-
-
-def get_customized_pdf_path(book, customizations):
- """
- Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
- """
- h = customizations_hash(customizations)
- pdf_name = '%s/%s-custom-%s' % (book.slug, book.slug, h)
- pdf_file = pdf_name + '.pdf'
-
- return pdf_file
-
-
-def get_existing_customized_pdf(book):
- """
- Returns a list of paths to generated customized pdf of a book
- """
- pdf_glob = path.join(WAITER_ROOT, book.slug, '*')
- return glob(pdf_glob)
-
-
class BookMedia(models.Model):
FileFormat = namedtuple("FileFormat", "name ext")
formats = SortedDict([
cover.save(imgstr, 'png')
self.cover.save(None, ContentFile(imgstr.getvalue()))
- def build_pdf(self, customizations=None, file_name=None):
- """ (Re)builds the pdf file.
- customizations - customizations which are passed to LaTeX class file.
- file_name - save the pdf file under a different name and DO NOT save it in db.
- """
- from os import unlink
- from django.core.files import File
- from catalogue.utils import remove_zip
-
- pdf = self.wldocument().as_pdf(customizations=customizations,
- morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)
-
- if file_name is None:
- # we'd like to be sure not to overwrite changes happening while
- # (timely) pdf generation is taking place (async celery scenario)
- current_self = Book.objects.get(id=self.id)
- current_self.pdf_file.save('%s.pdf' % self.slug,
- File(open(pdf.get_filename())))
- self.pdf_file = current_self.pdf_file
-
- # remove cached downloadables
- remove_zip(settings.ALL_PDF_ZIP)
-
- for customized_pdf in get_existing_customized_pdf(self):
- unlink(customized_pdf)
- else:
- print "saving %s" % file_name
- print "to: %s" % DefaultStorage().path(file_name)
- DefaultStorage().save(file_name, File(open(pdf.get_filename())))
-
- def build_mobi(self):
- """ (Re)builds the MOBI file.
-
- """
- from django.core.files import File
- from catalogue.utils import remove_zip
-
- mobi = self.wldocument().as_mobi()
-
- self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi.get_filename())))
-
- # remove zip with all mobi files
- remove_zip(settings.ALL_MOBI_ZIP)
-
- def build_epub(self):
- """(Re)builds the epub file."""
- from django.core.files import File
- from catalogue.utils import remove_zip
-
- epub = self.wldocument().as_epub()
-
- self.epub_file.save('%s.epub' % self.slug,
- File(open(epub.get_filename())))
-
- # remove zip package with all epub files
- remove_zip(settings.ALL_EPUB_ZIP)
-
- def build_txt(self):
- from django.core.files.base import ContentFile
-
- text = self.wldocument().as_text()
- self.txt_file.save('%s.txt' % self.slug, ContentFile(text.get_string()))
-
-
def build_html(self):
from django.core.files.base import ContentFile
from slughifi import slughifi
books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
paths = [(pretty_file_name(b), getattr(b, field_name).path)
for b in books]
- result = create_zip.delay(paths,
+ return create_zip(paths,
getattr(settings, "ALL_%s_ZIP" % format_.upper()))
- return result.wait()
def zip_audiobooks(self, format_):
bm = BookMedia.objects.filter(book=self, type=format_)
paths = map(lambda bm: (None, bm.file.path), bm)
- result = create_zip.delay(paths, "%s_%s" % (self.slug, format_))
- return result.wait()
+ return create_zip(paths, "%s_%s" % (self.slug, format_))
def search_index(self, book_info=None, reuse_index=False, index_tags=True):
if reuse_index:
def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
search_index=True, search_index_tags=True, search_index_reuse=False):
- import re
- from sortify import sortify
# check for parts before we do anything
children = []
if book.build_html():
if not settings.NO_BUILD_TXT and build_txt:
- book.build_txt()
+ tasks.build_txt.delay(book.pk)
book.build_cover(book_info)
if not settings.NO_BUILD_EPUB and build_epub:
- book.build_epub()
+ tasks.build_epub.delay(book.pk)
if not settings.NO_BUILD_PDF and build_pdf:
- book.build_pdf()
+ tasks.build_pdf.delay(book.pk)
if not settings.NO_BUILD_MOBI and build_mobi:
- book.build_mobi()
+ tasks.build_mobi.delay(book.pk)
if not settings.NO_SEARCH_INDEX and search_index:
book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
book_descendants += list(child_book.children.all())
for tag in descendants_tags:
- touch_tag(tag)
+ tasks.touch_tag(tag)
book.save()
# reset tag global counter
# we want Tag.changed_at updated for API to know the tag was touched
for tag in affected_tags:
- touch_tag(tag)
+ tasks.touch_tag(tag)
# if book tags changed, reset book tag counter
if isinstance(sender, Book) and \
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from datetime import datetime
-from celery.task import task
-import catalogue.models
from traceback import print_exc
+from celery.task import task
+from django.conf import settings
-@task
+
+# TODO: move to model?
def touch_tag(tag):
update_dict = {
'book_count': tag.get_count(),
@task
def index_book(book_id, book_info=None):
+ from catalogue.models import Book
try:
- return catalogue.models.Book.objects.get(id=book_id).search_index(book_info)
+ return Book.objects.get(id=book_id).search_index(book_info)
except Exception, e:
print "Exception during index: %s" % e
print_exc()
raise e
+
+
+@task(ignore_result=True)
+def build_txt(book_id):
+ """(Re)builds the TXT file for a book."""
+ from django.core.files.base import ContentFile
+ from catalogue.models import Book
+
+ text = Book.objects.get(pk=book_id).wldocument().as_text()
+
+ # Save the file in new instance. Building TXT takes time and we don't want
+ # to overwrite any interim changes.
+ book = Book.objects.get(id=book_id)
+ book.txt_file.save('%s.txt' % book.slug, ContentFile(text.get_string()))
+
+
+@task(ignore_result=True, rate_limit=settings.CATALOGUE_PDF_RATE_LIMIT)
+def build_pdf(book_id):
+ """(Re)builds the pdf file for a book."""
+ from django.core.files import File
+ from catalogue.models import Book
+ from catalogue.utils import remove_zip
+ from waiter.utils import clear_cache
+
+ pdf = Book.objects.get(pk=book_id).wldocument().as_pdf(
+ morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)
+
+ # Save the file in new instance. Building PDF takes time and we don't want
+ # to overwrite any interim changes.
+ book = Book.objects.get(id=book_id)
+ book.pdf_file.save('%s.pdf' % book.slug,
+ File(open(pdf.get_filename())))
+
+ # Remove cached downloadables
+ remove_zip(settings.ALL_PDF_ZIP)
+ clear_cache(book.slug)
+
+
+@task(ignore_result=True, rate_limit=settings.CATALOGUE_EPUB_RATE_LIMIT)
+def build_epub(book_id):
+ """(Re)builds the EPUB file for a book."""
+ from django.core.files import File
+ from catalogue.models import Book
+ from catalogue.utils import remove_zip
+
+ epub = Book.objects.get(pk=book_id).wldocument().as_epub()
+ # Save the file in new instance. Building MOBI takes time and we don't want
+ # to overwrite any interim changes.
+ book = Book.objects.get(id=book_id)
+ book.epub_file.save('%s.epub' % book.slug,
+ File(open(epub.get_filename())))
+
+ # remove zip with all epub files
+ remove_zip(settings.ALL_EPUB_ZIP)
+
+
+@task(ignore_result=True, rate_limit=settings.CATALOGUE_MOBI_RATE_LIMIT)
+def build_mobi(book_id):
+ """(Re)builds the MOBI file for a book."""
+ from django.core.files import File
+ from catalogue.models import Book
+ from catalogue.utils import remove_zip
+
+ mobi = Book.objects.get(pk=book_id).wldocument().as_mobi()
+ # Save the file in new instance. Building MOBI takes time and we don't want
+ # to overwrite any interim changes.
+ book = Book.objects.get(id=book_id)
+ book.mobi_file.save('%s.mobi' % book.slug,
+ File(open(mobi.get_filename())))
+
+ # remove zip with all mobi files
+ remove_zip(settings.ALL_MOBI_ZIP)
+
+
+@task(rate_limit=settings.CATALOGUE_CUSTOMPDF_RATE_LIMIT)
+def build_custom_pdf(book_id, customizations, file_name):
+ """Builds a custom PDF file."""
+ from django.core.files import File
+ from django.core.files.storage import DefaultStorage
+ from catalogue.models import Book
+
+ print "will gen %s" % DefaultStorage().path(file_name)
+ if not DefaultStorage().exists(file_name):
+ pdf = Book.objects.get(pk=book_id).wldocument().as_pdf(
+ customizations=customizations,
+ morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)
+ DefaultStorage().save(file_name, File(open(pdf.get_filename())))
import time
from base64 import urlsafe_b64encode
-from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
+from django.http import HttpResponse
from django.core.files.uploadedfile import UploadedFile
-from django.core.files.base import File
from django.core.files.storage import DefaultStorage
from django.utils.encoding import force_unicode
from django.utils.hashcompat import sha_constructor
from django.conf import settings
-from celery.task import task
from os import mkdir, path, unlink
from errno import EEXIST, ENOENT
from fcntl import flock, LOCK_EX
from zipfile import ZipFile
-from traceback import print_exc
from reporting.utils import read_chunks
-from celery.task import task
-import catalogue.models
# Use the system (hardware-based) random number generator if it exists.
if hasattr(random, 'SystemRandom'):
self.lock.close()
-@task
+#@task
def create_zip(paths, zip_slug):
"""
Creates a zip in MEDIA_ROOT/zip directory containing files from path.
for chunk in read_chunks(f):
self.write(chunk)
-@task(rate_limit=settings.CATALOGUE_CUSTOMPDF_RATE_LIMIT)
-def async_build_pdf(book_id, customizations, file_name):
- """
- A celery task to generate pdf files.
- Accepts the same args as Book.build_pdf, but with book id as first parameter
- instead of Book instance
- """
- try:
- book = catalogue.models.Book.objects.get(id=book_id)
- print "will gen %s" % DefaultStorage().path(file_name)
- if not DefaultStorage().exists(file_name):
- book.build_pdf(customizations=customizations, file_name=file_name)
- print "done."
- except Exception, e:
- print "Error during pdf creation: %s" % e
- print_exc
- raise e
-
-
class MultiQuerySet(object):
def __init__(self, *args, **kwargs):
self.querysets = args
out += '</%s>' % tag
# Return string
return out
+
+
+def customizations_hash(customizations):
+ customizations.sort()
+ return hash(tuple(customizations))
+
+
+def get_customized_pdf_path(book, customizations):
+ """
+ Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
+ """
+ h = customizations_hash(customizations)
+ return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
+
+
+def clear_custom_pdf(book):
+ """
+ Returns a list of paths to generated customized pdf of a book
+ """
+ from waiter.utils import clear_cache
+ clear_cache('book/%s' % book.slug)
from catalogue import models
from catalogue import forms
-from catalogue.utils import (split_tags,
- async_build_pdf, MultiQuerySet)
+from catalogue.utils import split_tags, MultiQuerySet, get_customized_pdf_path
+from catalogue.tasks import build_custom_pdf
from pdcounter import models as pdcounter_models
from pdcounter import views as pdcounter_views
from suggest.forms import PublishingSuggestForm
from picture.models import Picture
-from os import path
from waiter.models import WaitedFile
staff_required = user_passes_test(lambda user: user.is_staff)
form = forms.CustomPDFForm(method == 'GET' and request.GET or request.POST)
if form.is_valid():
cust = form.customizations
- pdf_file = models.get_customized_pdf_path(book, cust)
+ pdf_file = get_customized_pdf_path(book, cust)
url = WaitedFile.order(pdf_file,
- lambda p: async_build_pdf.delay(book.id, cust, p),
+ lambda p: build_custom_pdf.delay(book.id, cust, p),
book.pretty_title()
)
return redirect(url)
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from django.db import models
-
+from celery.task import task
from sortify import sortify
from catalogue.models import Book
ordering = ['sort_key']
-def notes_from_book(sender, **kwargs):
- from librarian import html
-
- Note.objects.filter(book=sender).delete()
- if sender.html_file:
- for anchor, text_str, html_str in html.extract_annotations(sender.html_file.path):
- Note.objects.create(book=sender, anchor=anchor,
+@task(ignore_result=True)
+def build_notes(book_id):
+ book = Book.objects.get(pk=book_id)
+ Note.objects.filter(book=book).delete()
+ if book.html_file:
+ from librarian import html
+ for anchor, text_str, html_str in html.extract_annotations(book.html_file.path):
+ Note.objects.create(book=book, anchor=anchor,
html=html_str,
sort_key=sortify(text_str).strip()[:128])
-
-# always re-extract notes after making a HTML in a Book
-Book.html_built.connect(notes_from_book)
+
+@Book.html_built.connect
+def notes_from_book(sender, **kwargs):
+ build_notes.delat(sender)
db.create_table('waiter_waitedfile', (
('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
('path', self.gf('django.db.models.fields.CharField')(unique=True, max_length=255, db_index=True)),
- ('task', self.gf('django.db.models.fields.CharField')(max_length=64, null=True)),
+ ('task', self.gf('picklefield.fields.PickledObjectField')(null=True)),
('description', self.gf('django.db.models.fields.CharField')(max_length=255, null=True, blank=True)),
))
db.send_create_signal('waiter', ['WaitedFile'])
'description': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'path': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255', 'db_index': 'True'}),
- 'task': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True'})
+ 'task': ('picklefield.fields.PickledObjectField', [], {'null': 'True'})
}
}
-from os.path import join, abspath, exists
+from os.path import join, isfile
from django.core.urlresolvers import reverse
from django.db import models
-from waiter.settings import WAITER_ROOT, WAITER_URL
from djcelery.models import TaskMeta
+from waiter.settings import WAITER_URL
+from waiter.utils import check_abspath
+from picklefield import PickledObjectField
class WaitedFile(models.Model):
path = models.CharField(max_length=255, unique=True, db_index=True)
- task = models.CharField(max_length=64, null=True, editable=False)
+ task = PickledObjectField(null=True, editable=False)
description = models.CharField(max_length=255, null=True, blank=True)
- @staticmethod
- def abspath(path):
- abs_path = abspath(join(WAITER_ROOT, path))
- if not abs_path.startswith(WAITER_ROOT):
- raise ValueError('Path not inside WAITER_ROOT.')
- return abs_path
-
@classmethod
def exists(cls, path):
"""Returns opened file or None.
`path` is relative to WAITER_ROOT.
Won't open a path leading outside of WAITER_ROOT.
"""
- abs_path = cls.abspath(path)
+ abs_path = check_abspath(path)
# Pre-fetch objects for deletion to avoid minor race condition
relevant = [o.id for o in cls.objects.filter(path=path)]
- if exists(abs_path):
+ if isfile(abs_path):
cls.objects.filter(id__in=relevant).delete()
return True
else:
if self.task is None:
# Race; just let the other task roll.
return False
- try:
- meta = TaskMeta.objects.get(task_id=self.task)
- assert meta.status in (u'PENDING', u'STARTED', u'SUCCESS', u'RETRY')
- except TaskMeta.DoesNotExist:
- # Might happen it's not yet there.
- pass
- except AssertionError:
+ if self.task.status not in (u'PENDING', u'STARTED', u'SUCCESS', u'RETRY'):
return True
return False
if not already:
waited, created = cls.objects.get_or_create(path=path)
if created or waited.is_stale():
- waited.task = task_creator(cls.abspath(path))
+ waited.task = task_creator(check_abspath(path))
waited.description = description
waited.save()
return reverse("waiter", args=[path])
else
setTimeout(wait, 10*1000);
},
- error: function() {
- setTimeout(wait, 10*1000);
+ error: function(xhr) {
+ location.reload();
}
});
}
--- /dev/null
+from os.path import abspath, join, exists
+from shutil import rmtree
+from waiter.settings import WAITER_ROOT
+
+
+def check_abspath(path):
+ abs_path = abspath(join(WAITER_ROOT, path))
+ if not abs_path.startswith(WAITER_ROOT):
+ raise ValueError('Path not inside WAITER_ROOT.')
+ return abs_path
+
+
+def clear_cache(path):
+ abs_path = check_abspath(path)
+ if exists(abs_path):
+ rmtree(abs_path)
+
django-maintenancemode>=0.9
django-piston
django-jsonfield
+django-picklefield
django-allauth
django-honeypot
CATALOGUE_DEFAULT_LANGUAGE = 'pol'
PUBLISH_PLAN_FEED = 'http://redakcja.wolnelektury.pl/documents/track/editor-proofreading/?published=false'
-# limit rate for custom PDF creation
+# limit rate for ebooks creation
+CATALOGUE_PDF_RATE_LIMIT = '1/m'
+CATALOGUE_EPUB_RATE_LIMIT = '6/m'
+CATALOGUE_MOBI_RATE_LIMIT = '5/m'
CATALOGUE_CUSTOMPDF_RATE_LIMIT = '1/m'
# set to 'new' or 'old' to skip time-consuming test