X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/124258c72bb9e69e6336de195f3a3b3e74509b95..90611a3114c6ee7f0ac81e67fe0425f758d247d6:/apps/catalogue/models.py diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index 0c3513047..29106b184 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -5,10 +5,9 @@ from collections import namedtuple from django.db import models -from django.db.models import permalink, Q +from django.db.models import permalink import django.dispatch -from django.core.cache import cache -from django.core.files.storage import DefaultStorage +from django.core.cache import get_cache from django.utils.translation import ugettext_lazy as _ from django.contrib.auth.models import User from django.template.loader import render_to_string @@ -16,23 +15,19 @@ from django.utils.datastructures import SortedDict from django.utils.safestring import mark_safe from django.utils.translation import get_language from django.core.urlresolvers import reverse -from django.db.models.signals import post_save, m2m_changed, pre_delete +from django.db.models.signals import post_save, pre_delete, post_delete +import jsonfield from django.conf import settings from newtagging.models import TagBase, tags_updated from newtagging import managers -from catalogue.fields import JSONField, OverwritingFileField -from catalogue.utils import create_zip, split_tags -from catalogue.tasks import touch_tag, index_book -from shutil import copy -from glob import glob +from catalogue.fields import OverwritingFileField +from catalogue.utils import create_zip, split_tags, truncate_html_words +from catalogue import tasks import re -from os import path -import search - # Those are hard-coded here so that makemessages sees them. TAG_CATEGORIES = ( ('author', _('author')), @@ -44,8 +39,8 @@ TAG_CATEGORIES = ( ('book', _('book')), ) -# not quite, but Django wants you to set a timeout -CACHE_FOREVER = 2419200 # 28 days + +permanent_cache = get_cache('permanent') class TagSubcategoryManager(models.Manager): @@ -58,6 +53,10 @@ class TagSubcategoryManager(models.Manager): class Tag(TagBase): + """A tag attachable to books and fragments (and possibly anything). + + Used to represent searchable metadata (authors, epochs, genres, kinds), + fragment themes (motifs) and some book hierarchy related kludges.""" name = models.CharField(_('name'), max_length=50, db_index=True) slug = models.SlugField(_('slug'), max_length=120, db_index=True) sort_key = models.CharField(_('sort key'), max_length=120, db_index=True) @@ -119,8 +118,8 @@ class Tag(TagBase): objects = Book.tagged.with_all((self,)).order_by() if self.category != 'set': # eliminate descendants - l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects]) - descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)] + l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects.iterator()]) + descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()] if descendants_keys: objects = objects.exclude(pk__in=descendants_keys) return objects.count() @@ -216,30 +215,8 @@ def book_upload_path(ext=None, maxlen=100): return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen) -def get_customized_pdf_path(book, customizations): - """ - Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options. - """ - customizations.sort() - h = hash(tuple(customizations)) - - pdf_name = '%s-custom-%s' % (book.slug, h) - pdf_file = get_dynamic_path(None, pdf_name, ext='pdf') - - return pdf_file - - -def get_existing_customized_pdf(book): - """ - Returns a list of paths to generated customized pdf of a book - """ - pdf_glob = '%s-custom-' % (book.slug,) - pdf_glob = get_dynamic_path(None, pdf_glob, ext='pdf') - pdf_glob = re.sub(r"[.]([a-z0-9]+)$", "*.\\1", pdf_glob) - return glob(path.join(settings.MEDIA_ROOT, pdf_glob)) - - class BookMedia(models.Model): + """Represents media attached to a book.""" FileFormat = namedtuple("FileFormat", "name ext") formats = SortedDict([ ('mp3', FileFormat(name='MP3', ext='mp3')), @@ -253,7 +230,7 @@ class BookMedia(models.Model): name = models.CharField(_('name'), max_length="100") file = OverwritingFileField(_('file'), upload_to=book_upload_path()) uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False) - extra_info = JSONField(_('extra information'), default='{}', editable=False) + extra_info = jsonfield.JSONField(_('extra information'), default='{}', editable=False) book = models.ForeignKey('Book', related_name='media') source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False) @@ -271,7 +248,7 @@ class BookMedia(models.Model): try: old = BookMedia.objects.get(pk=self.pk) - except BookMedia.DoesNotExist, e: + except BookMedia.DoesNotExist: old = None else: # if name changed, change the file name, too @@ -285,9 +262,9 @@ class BookMedia(models.Model): remove_zip("%s_%s" % (old.book.slug, old.type)) remove_zip("%s_%s" % (self.book.slug, self.type)) - extra_info = self.get_extra_info_value() + extra_info = self.extra_info extra_info.update(self.read_meta()) - self.set_extra_info_value(extra_info) + self.extra_info = extra_info self.source_sha1 = self.read_source_sha1(self.file.path, self.type) return super(BookMedia, self).save(*args, **kwargs) @@ -350,6 +327,7 @@ class BookMedia(models.Model): class Book(models.Model): + """Represents a book imported from WL-XML.""" title = models.CharField(_('title'), max_length=120) sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False) slug = models.SlugField(_('slug'), max_length=120, db_index=True, @@ -361,7 +339,7 @@ class Book(models.Model): created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True) changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True) parent_number = models.IntegerField(_('parent number'), default=0) - extra_info = JSONField(_('extra information'), default='{}') + extra_info = jsonfield.JSONField(_('extra information'), default='{}') gazeta_link = models.CharField(blank=True, max_length=240) wiki_link = models.CharField(blank=True, max_length=240) # files generated during publication @@ -372,6 +350,9 @@ class Book(models.Model): formats = ebook_formats + ['html', 'xml'] parent = models.ForeignKey('self', blank=True, null=True, related_name='children') + + _related_info = jsonfield.JSONField(blank=True, null=True, editable=False) + objects = models.Manager() tagged = managers.ModelTaggedItemManager(Tag) tags = managers.TagDescriptor(Tag) @@ -422,18 +403,18 @@ class Book(models.Model): book_tag.save() return book_tag - def has_media(self, type): - if type in Book.formats: - return bool(getattr(self, "%s_file" % type)) + def has_media(self, type_): + if type_ in Book.formats: + return bool(getattr(self, "%s_file" % type_)) else: - return self.media.filter(type=type).exists() + return self.media.filter(type=type_).exists() - def get_media(self, type): - if self.has_media(type): - if type in Book.formats: - return getattr(self, "%s_file" % type) + def get_media(self, type_): + if self.has_media(type_): + if type_ in Book.formats: + return getattr(self, "%s_file" % type_) else: - return self.media.filter(type=type) + return self.media.filter(type=type_) else: return None @@ -450,58 +431,11 @@ class Book(models.Model): if self.id is None: return - cache_key = "Book.short_html/%d/%s" - for lang, langname in settings.LANGUAGES: - cache.delete(cache_key % (self.id, lang)) - cache.delete("Book.mini_box/%d" % (self.id, )) + type(self).objects.filter(pk=self.pk).update(_related_info=None) # Fragment.short_html relies on book's tags, so reset it here too - for fragm in self.fragments.all(): + for fragm in self.fragments.all().iterator(): fragm.reset_short_html() - def short_html(self): - if self.id: - cache_key = "Book.short_html/%d/%s" % (self.id, get_language()) - short_html = cache.get(cache_key) - else: - short_html = None - - if short_html is not None: - return mark_safe(short_html) - else: - tags = self.tags.filter(category__in=('author', 'kind', 'genre', 'epoch')) - tags = split_tags(tags) - - formats = {} - # files generated during publication - for ebook_format in self.ebook_formats: - if self.has_media(ebook_format): - formats[ebook_format] = self.get_media(ebook_format) - - - short_html = unicode(render_to_string('catalogue/book_short.html', - {'book': self, 'tags': tags, 'formats': formats})) - - if self.id: - cache.set(cache_key, short_html, CACHE_FOREVER) - return mark_safe(short_html) - - def mini_box(self): - if self.id: - cache_key = "Book.mini_box/%d" % (self.id, ) - short_html = cache.get(cache_key) - else: - short_html = None - - if short_html is None: - authors = self.tags.filter(category='author') - - short_html = unicode(render_to_string('catalogue/book_mini_box.html', - {'book': self, 'authors': authors, 'STATIC_URL': settings.STATIC_URL})) - - if self.id: - cache.set(cache_key, short_html, CACHE_FOREVER) - return mark_safe(short_html) - def has_description(self): return len(self.description) > 0 has_description.short_description = _('description') @@ -545,71 +479,7 @@ class Book(models.Model): cover.save(imgstr, 'png') self.cover.save(None, ContentFile(imgstr.getvalue())) - def build_pdf(self, customizations=None, file_name=None): - """ (Re)builds the pdf file. - customizations - customizations which are passed to LaTeX class file. - file_name - save the pdf file under a different name and DO NOT save it in db. - """ - from os import unlink - from django.core.files import File - from catalogue.utils import remove_zip - - pdf = self.wldocument().as_pdf(customizations=customizations) - - if file_name is None: - # we'd like to be sure not to overwrite changes happening while - # (timely) pdf generation is taking place (async celery scenario) - current_self = Book.objects.get(id=self.id) - current_self.pdf_file.save('%s.pdf' % self.slug, - File(open(pdf.get_filename()))) - self.pdf_file = current_self.pdf_file - - # remove cached downloadables - remove_zip(settings.ALL_PDF_ZIP) - - for customized_pdf in get_existing_customized_pdf(self): - unlink(customized_pdf) - else: - print "saving %s" % file_name - print "to: %s" % DefaultStorage().path(file_name) - DefaultStorage().save(file_name, File(open(pdf.get_filename()))) - - def build_mobi(self): - """ (Re)builds the MOBI file. - - """ - from django.core.files import File - from catalogue.utils import remove_zip - - mobi = self.wldocument().as_mobi() - - self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi.get_filename()))) - - # remove zip with all mobi files - remove_zip(settings.ALL_MOBI_ZIP) - - def build_epub(self): - """(Re)builds the epub file.""" - from django.core.files import File - from catalogue.utils import remove_zip - - epub = self.wldocument().as_epub() - - self.epub_file.save('%s.epub' % self.slug, - File(open(epub.get_filename()))) - - # remove zip package with all epub files - remove_zip(settings.ALL_EPUB_ZIP) - - def build_txt(self): - from django.core.files.base import ContentFile - - text = self.wldocument().as_text() - self.txt_file.save('%s.txt' % self.slug, ContentFile(text.get_string())) - - def build_html(self): - from markupstring import MarkupString from django.core.files.base import ContentFile from slughifi import slughifi from librarian import html @@ -653,9 +523,9 @@ class Book(models.Model): continue text = fragment.to_string() - short_text = '' - if (len(MarkupString(text)) > 240): - short_text = unicode(MarkupString(text)[:160]) + short_text = truncate_html_words(text, 15) + if text == short_text: + short_text = '' new_fragment = Fragment.objects.create(anchor=fragment.id, book=self, text=text, short_text=short_text) @@ -666,30 +536,43 @@ class Book(models.Model): return True return False + # Thin wrappers for builder tasks + def build_pdf(self, *args, **kwargs): + """(Re)builds PDF.""" + return tasks.build_pdf.delay(self.pk, *args, **kwargs) + def build_epub(self, *args, **kwargs): + """(Re)builds EPUB.""" + return tasks.build_epub.delay(self.pk, *args, **kwargs) + def build_mobi(self, *args, **kwargs): + """(Re)builds MOBI.""" + return tasks.build_mobi.delay(self.pk, *args, **kwargs) + def build_txt(self, *args, **kwargs): + """(Re)builds TXT.""" + return tasks.build_txt.delay(self.pk, *args, **kwargs) + @staticmethod def zip_format(format_): def pretty_file_name(book): return "%s/%s.%s" % ( - b.get_extra_info_value()['author'], + b.extra_info['author'], b.slug, format_) field_name = "%s_file" % format_ books = Book.objects.filter(parent=None).exclude(**{field_name: ""}) paths = [(pretty_file_name(b), getattr(b, field_name).path) - for b in books] - result = create_zip.delay(paths, + for b in books.iterator()] + return create_zip(paths, getattr(settings, "ALL_%s_ZIP" % format_.upper())) - return result.wait() def zip_audiobooks(self, format_): bm = BookMedia.objects.filter(book=self, type=format_) paths = map(lambda bm: (None, bm.file.path), bm) - result = create_zip.delay(paths, "%s_%s" % (self.slug, format_)) - return result.wait() + return create_zip(paths, "%s_%s" % (self.slug, format_)) - def search_index(self, book_info=None): - if hasattr(settings, 'CELERY_ALWAYS_EAGER') and settings.CELERY_ALWAYS_EAGER: + def search_index(self, book_info=None, reuse_index=False, index_tags=True): + import search + if reuse_index: idx = search.ReusableIndex() else: idx = search.Index() @@ -697,7 +580,8 @@ class Book(models.Model): idx.open() try: idx.index_book(self, book_info) - idx.index_tags() + if index_tags: + idx.index_tags() finally: idx.close() @@ -720,9 +604,7 @@ class Book(models.Model): @classmethod def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, - search_index=True): - import re - from sortify import sortify + search_index=True, search_index_tags=True, search_index_reuse=False): # check for parts before we do anything children = [] @@ -730,7 +612,7 @@ class Book(models.Model): for part_url in book_info.parts: try: children.append(Book.objects.get(slug=part_url.slug)) - except Book.DoesNotExist, e: + except Book.DoesNotExist: raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug) @@ -756,7 +638,7 @@ class Book(models.Model): book.common_slug = book_info.variant_of.slug else: book.common_slug = book.slug - book.set_extra_info_value(book_info.to_dict()) + book.extra_info = book_info.to_dict() book.save() meta_tags = Tag.tags_from_info(book_info) @@ -792,7 +674,8 @@ class Book(models.Model): book.build_mobi() if not settings.NO_SEARCH_INDEX and search_index: - index_book.delay(book.id, book_info) + book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse) + #index_book.delay(book.id, book_info) book_descendants = list(book.children.all()) descendants_tags = set() @@ -802,12 +685,12 @@ class Book(models.Model): descendants_tags.update(child_book.tags) child_book.tags = list(child_book.tags) + [book_tag] child_book.save() - for fragment in child_book.fragments.all(): + for fragment in child_book.fragments.all().iterator(): fragment.tags = set(list(fragment.tags) + [book_tag]) book_descendants += list(child_book.children.all()) for tag in descendants_tags: - touch_tag.delay(tag) + tasks.touch_tag(tag) book.save() @@ -818,12 +701,49 @@ class Book(models.Model): cls.published.send(sender=book) return book + def related_info(self): + """Keeps info about related objects (tags, media) in cache field.""" + if self._related_info is not None: + return self._related_info + else: + rel = {'tags': {}, 'media': {}} + + tags = self.tags.filter(category__in=( + 'author', 'kind', 'genre', 'epoch')) + tags = split_tags(tags) + for category in tags: + rel['tags'][category] = [ + (t.name, t.slug) for t in tags[category]] + + for media_format in BookMedia.formats: + rel['media'][media_format] = self.has_media(media_format) + + book = self + parents = [] + while book.parent: + parents.append((book.parent.title, book.parent.slug)) + book = book.parent + parents = parents[::-1] + if parents: + rel['parents'] = parents + + if self.pk: + type(self).objects.filter(pk=self.pk).update(_related_info=rel) + return rel + + def related_themes(self): + theme_counter = self.theme_counter + book_themes = list(Tag.objects.filter(pk__in=theme_counter.keys())) + for tag in book_themes: + tag.count = theme_counter[tag.pk] + return book_themes + def reset_tag_counter(self): if self.id is None: return cache_key = "Book.tag_counter/%d" % self.id - cache.delete(cache_key) + permanent_cache.delete(cache_key) if self.parent: self.parent.reset_tag_counter() @@ -831,20 +751,20 @@ class Book(models.Model): def tag_counter(self): if self.id: cache_key = "Book.tag_counter/%d" % self.id - tags = cache.get(cache_key) + tags = permanent_cache.get(cache_key) else: tags = None if tags is None: tags = {} - for child in self.children.all().order_by(): + for child in self.children.all().order_by().iterator(): for tag_pk, value in child.tag_counter.iteritems(): tags[tag_pk] = tags.get(tag_pk, 0) + value - for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by(): + for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by().iterator(): tags[tag.pk] = 1 if self.id: - cache.set(cache_key, tags, CACHE_FOREVER) + permanent_cache.set(cache_key, tags) return tags def reset_theme_counter(self): @@ -852,7 +772,7 @@ class Book(models.Model): return cache_key = "Book.theme_counter/%d" % self.id - cache.delete(cache_key) + permanent_cache.delete(cache_key) if self.parent: self.parent.reset_theme_counter() @@ -860,18 +780,18 @@ class Book(models.Model): def theme_counter(self): if self.id: cache_key = "Book.theme_counter/%d" % self.id - tags = cache.get(cache_key) + tags = permanent_cache.get(cache_key) else: tags = None if tags is None: tags = {} - for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by(): - for tag in fragment.tags.filter(category='theme').order_by(): + for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by().iterator(): + for tag in fragment.tags.filter(category='theme').order_by().iterator(): tags[tag.pk] = tags.get(tag.pk, 0) + 1 if self.id: - cache.set(cache_key, tags, CACHE_FOREVER) + permanent_cache.set(cache_key, tags) return tags def pretty_title(self, html_links=False): @@ -893,7 +813,7 @@ class Book(models.Model): @classmethod def tagged_top_level(cls, tags): - """ Returns top-level books tagged with `tags'. + """ Returns top-level books tagged with `tags`. It only returns those books which don't have ancestors which are also tagged with those tags. @@ -902,8 +822,9 @@ class Book(models.Model): # get relevant books and their tags objects = cls.tagged.with_all(tags) # eliminate descendants - l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects]) - descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)] + l_tags = Tag.objects.filter(category='book', + slug__in=[book.book_tag_slug() for book in objects.iterator()]) + descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags).iterator()] if descendants_keys: objects = objects.exclude(pk__in=descendants_keys) @@ -922,19 +843,20 @@ class Book(models.Model): 'title', 'parent', 'slug') if filter: books = books.filter(filter).distinct() - book_ids = set((book.pk for book in books)) - for book in books: + + book_ids = set(b['pk'] for b in books.values("pk").iterator()) + for book in books.iterator(): parent = book.parent_id if parent not in book_ids: parent = None books_by_parent.setdefault(parent, []).append(book) else: - for book in books: + for book in books.iterator(): books_by_parent.setdefault(book.parent_id, []).append(book) orphans = [] books_by_author = SortedDict() - for tag in Tag.objects.filter(category='author'): + for tag in Tag.objects.filter(category='author').iterator(): books_by_author[tag] = [] for book in books_by_parent.get(None,()): @@ -956,14 +878,25 @@ class Book(models.Model): "LP": (3, u"liceum"), } def audiences_pl(self): - audiences = self.get_extra_info_value().get('audiences', []) + audiences = self.extra_info.get('audiences', []) audiences = sorted(set([self._audiences_pl[a] for a in audiences])) return [a[1] for a in audiences] + def choose_fragment(self): + tag = self.book_tag() + fragments = Fragment.tagged.with_any([tag]) + if fragments.exists(): + return fragments.order_by('?')[0] + elif self.parent: + return self.parent.choose_fragment() + else: + return None + def _has_factory(ftype): has = lambda self: bool(getattr(self, "%s_file" % ftype)) - has.short_description = t.upper() + has.short_description = ftype.upper() + has.__doc__ = None has.boolean = True has.__name__ = "has_%s_file" % ftype return has @@ -980,6 +913,7 @@ for t in Book.formats: class Fragment(models.Model): + """Represents a themed fragment of a book.""" text = models.TextField() short_text = models.TextField(editable=False) anchor = models.CharField(max_length=120) @@ -1003,12 +937,16 @@ class Fragment(models.Model): cache_key = "Fragment.short_html/%d/%s" for lang, langname in settings.LANGUAGES: - cache.delete(cache_key % (self.id, lang)) + permanent_cache.delete(cache_key % (self.id, lang)) + + def get_short_text(self): + """Returns short version of the fragment.""" + return self.short_text if self.short_text else self.text def short_html(self): if self.id: cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language()) - short_html = cache.get(cache_key) + short_html = permanent_cache.get(cache_key) else: short_html = None @@ -1018,10 +956,28 @@ class Fragment(models.Model): short_html = unicode(render_to_string('catalogue/fragment_short.html', {'fragment': self})) if self.id: - cache.set(cache_key, short_html, CACHE_FOREVER) + permanent_cache.set(cache_key, short_html) return mark_safe(short_html) +class Collection(models.Model): + """A collection of books, which might be defined before publishing them.""" + title = models.CharField(_('title'), max_length=120, db_index=True) + slug = models.SlugField(_('slug'), max_length=120, primary_key=True) + description = models.TextField(_('description'), null=True, blank=True) + + models.SlugField(_('slug'), max_length=120, unique=True, db_index=True) + book_slugs = models.TextField(_('book slugs')) + + class Meta: + ordering = ('title',) + verbose_name = _('collection') + verbose_name_plural = _('collections') + + def __unicode__(self): + return self.title + + ########### # # SIGNALS @@ -1033,7 +989,7 @@ def _tags_updated_handler(sender, affected_tags, **kwargs): # reset tag global counter # we want Tag.changed_at updated for API to know the tag was touched for tag in affected_tags: - touch_tag.delay(tag) + tasks.touch_tag(tag) # if book tags changed, reset book tag counter if isinstance(sender, Book) and \ @@ -1054,8 +1010,24 @@ def _pre_delete_handler(sender, instance, **kwargs): instance.book.save() pre_delete.connect(_pre_delete_handler) + def _post_save_handler(sender, instance, **kwargs): """ refresh all the short_html stuff on BookMedia update """ if sender == BookMedia: instance.book.save() post_save.connect(_post_save_handler) + + +if not settings.NO_SEARCH_INDEX: + @django.dispatch.receiver(post_delete, sender=Book) + def _remove_book_from_index_handler(sender, instance, **kwargs): + """ remove the book from search index, when it is deleted.""" + import search + search.JVM.attachCurrentThread() + idx = search.Index() + idx.open(timeout=10000) # 10 seconds timeout. + try: + idx.remove_book(instance) + idx.index_tags() + finally: + idx.close()