X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/0854610e6f1a717ce100c96f7ebf36cbb3478a5c..7c5ccbccb3c83d91abc726298447bef2c322a559:/apps/catalogue/models.py?ds=inline diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index a94411191..3704b1663 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -5,10 +5,9 @@ from collections import namedtuple from django.db import models -from django.db.models import permalink, Q +from django.db.models import permalink import django.dispatch from django.core.cache import get_cache -from django.core.files.storage import DefaultStorage from django.utils.translation import ugettext_lazy as _ from django.contrib.auth.models import User from django.template.loader import render_to_string @@ -16,22 +15,18 @@ from django.utils.datastructures import SortedDict from django.utils.safestring import mark_safe from django.utils.translation import get_language from django.core.urlresolvers import reverse -from django.db.models.signals import post_save, m2m_changed, pre_delete +from django.db.models.signals import post_save, pre_delete, post_delete import jsonfield from django.conf import settings from newtagging.models import TagBase, tags_updated from newtagging import managers -from catalogue.fields import JSONField, OverwritingFileField -from catalogue.utils import create_zip, split_tags -from catalogue.tasks import touch_tag, index_book -from shutil import copy -from glob import glob +from catalogue.fields import OverwritingFileField +from catalogue.utils import create_zip, split_tags, truncate_html_words +from catalogue import tasks import re -from os import path -import search # Those are hard-coded here so that makemessages sees them. TAG_CATEGORIES = ( @@ -58,6 +53,10 @@ class TagSubcategoryManager(models.Manager): class Tag(TagBase): + """A tag attachable to books and fragments (and possibly anything). + + Used to represent searchable metadata (authors, epochs, genres, kinds), + fragment themes (motifs) and some book hierarchy related kludges.""" name = models.CharField(_('name'), max_length=50, db_index=True) slug = models.SlugField(_('slug'), max_length=120, db_index=True) sort_key = models.CharField(_('sort key'), max_length=120, db_index=True) @@ -119,8 +118,8 @@ class Tag(TagBase): objects = Book.tagged.with_all((self,)).order_by() if self.category != 'set': # eliminate descendants - l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects]) - descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)] + l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects.iterator()]) + descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()] if descendants_keys: objects = objects.exclude(pk__in=descendants_keys) return objects.count() @@ -216,30 +215,8 @@ def book_upload_path(ext=None, maxlen=100): return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen) -def get_customized_pdf_path(book, customizations): - """ - Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options. - """ - customizations.sort() - h = hash(tuple(customizations)) - - pdf_name = '%s-custom-%s' % (book.slug, h) - pdf_file = get_dynamic_path(None, pdf_name, ext='pdf') - - return pdf_file - - -def get_existing_customized_pdf(book): - """ - Returns a list of paths to generated customized pdf of a book - """ - pdf_glob = '%s-custom-' % (book.slug,) - pdf_glob = get_dynamic_path(None, pdf_glob, ext='pdf') - pdf_glob = re.sub(r"[.]([a-z0-9]+)$", "*.\\1", pdf_glob) - return glob(path.join(settings.MEDIA_ROOT, pdf_glob)) - - class BookMedia(models.Model): + """Represents media attached to a book.""" FileFormat = namedtuple("FileFormat", "name ext") formats = SortedDict([ ('mp3', FileFormat(name='MP3', ext='mp3')), @@ -253,7 +230,7 @@ class BookMedia(models.Model): name = models.CharField(_('name'), max_length="100") file = OverwritingFileField(_('file'), upload_to=book_upload_path()) uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False) - extra_info = JSONField(_('extra information'), default='{}', editable=False) + extra_info = jsonfield.JSONField(_('extra information'), default='{}', editable=False) book = models.ForeignKey('Book', related_name='media') source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False) @@ -271,7 +248,7 @@ class BookMedia(models.Model): try: old = BookMedia.objects.get(pk=self.pk) - except BookMedia.DoesNotExist, e: + except BookMedia.DoesNotExist: old = None else: # if name changed, change the file name, too @@ -285,9 +262,9 @@ class BookMedia(models.Model): remove_zip("%s_%s" % (old.book.slug, old.type)) remove_zip("%s_%s" % (self.book.slug, self.type)) - extra_info = self.get_extra_info_value() + extra_info = self.extra_info extra_info.update(self.read_meta()) - self.set_extra_info_value(extra_info) + self.extra_info = extra_info self.source_sha1 = self.read_source_sha1(self.file.path, self.type) return super(BookMedia, self).save(*args, **kwargs) @@ -350,6 +327,7 @@ class BookMedia(models.Model): class Book(models.Model): + """Represents a book imported from WL-XML.""" title = models.CharField(_('title'), max_length=120) sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False) slug = models.SlugField(_('slug'), max_length=120, db_index=True, @@ -361,14 +339,14 @@ class Book(models.Model): created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True) changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True) parent_number = models.IntegerField(_('parent number'), default=0) - extra_info = JSONField(_('extra information'), default='{}') + extra_info = jsonfield.JSONField(_('extra information'), default='{}') gazeta_link = models.CharField(blank=True, max_length=240) wiki_link = models.CharField(blank=True, max_length=240) # files generated during publication cover = models.FileField(_('cover'), upload_to=book_upload_path('png'), null=True, blank=True) - ebook_formats = ['pdf', 'epub', 'mobi', 'txt'] + ebook_formats = ['pdf', 'epub', 'mobi', 'fb2', 'txt'] formats = ebook_formats + ['html', 'xml'] parent = models.ForeignKey('self', blank=True, null=True, related_name='children') @@ -425,18 +403,18 @@ class Book(models.Model): book_tag.save() return book_tag - def has_media(self, type): - if type in Book.formats: - return bool(getattr(self, "%s_file" % type)) + def has_media(self, type_): + if type_ in Book.formats: + return bool(getattr(self, "%s_file" % type_)) else: - return self.media.filter(type=type).exists() + return self.media.filter(type=type_).exists() - def get_media(self, type): - if self.has_media(type): - if type in Book.formats: - return getattr(self, "%s_file" % type) + def get_media(self, type_): + if self.has_media(type_): + if type_ in Book.formats: + return getattr(self, "%s_file" % type_) else: - return self.media.filter(type=type) + return self.media.filter(type=type_) else: return None @@ -455,7 +433,7 @@ class Book(models.Model): type(self).objects.filter(pk=self.pk).update(_related_info=None) # Fragment.short_html relies on book's tags, so reset it here too - for fragm in self.fragments.all(): + for fragm in self.fragments.all().iterator(): fragm.reset_short_html() def has_description(self): @@ -501,73 +479,10 @@ class Book(models.Model): cover.save(imgstr, 'png') self.cover.save(None, ContentFile(imgstr.getvalue())) - def build_pdf(self, customizations=None, file_name=None): - """ (Re)builds the pdf file. - customizations - customizations which are passed to LaTeX class file. - file_name - save the pdf file under a different name and DO NOT save it in db. - """ - from os import unlink - from django.core.files import File - from catalogue.utils import remove_zip - - pdf = self.wldocument().as_pdf(customizations=customizations) - - if file_name is None: - # we'd like to be sure not to overwrite changes happening while - # (timely) pdf generation is taking place (async celery scenario) - current_self = Book.objects.get(id=self.id) - current_self.pdf_file.save('%s.pdf' % self.slug, - File(open(pdf.get_filename()))) - self.pdf_file = current_self.pdf_file - - # remove cached downloadables - remove_zip(settings.ALL_PDF_ZIP) - - for customized_pdf in get_existing_customized_pdf(self): - unlink(customized_pdf) - else: - print "saving %s" % file_name - print "to: %s" % DefaultStorage().path(file_name) - DefaultStorage().save(file_name, File(open(pdf.get_filename()))) - - def build_mobi(self): - """ (Re)builds the MOBI file. - - """ - from django.core.files import File - from catalogue.utils import remove_zip - - mobi = self.wldocument().as_mobi() - - self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi.get_filename()))) - - # remove zip with all mobi files - remove_zip(settings.ALL_MOBI_ZIP) - - def build_epub(self): - """(Re)builds the epub file.""" - from django.core.files import File - from catalogue.utils import remove_zip - - epub = self.wldocument().as_epub() - - self.epub_file.save('%s.epub' % self.slug, - File(open(epub.get_filename()))) - - # remove zip package with all epub files - remove_zip(settings.ALL_EPUB_ZIP) - - def build_txt(self): - from django.core.files.base import ContentFile - - text = self.wldocument().as_text() - self.txt_file.save('%s.txt' % self.slug, ContentFile(text.get_string())) - - def build_html(self): - from markupstring import MarkupString from django.core.files.base import ContentFile from slughifi import slughifi + from sortify import sortify from librarian import html meta_tags = list(self.tags.filter( @@ -602,17 +517,16 @@ class Book(models.Model): tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme') if created: tag.name = theme_name - tag.sort_key = theme_name.lower() + tag.sort_key = sortify(theme_name.lower()) tag.save() themes.append(tag) if not themes: continue text = fragment.to_string() - short_text = '' - markup = MarkupString(text) - if (len(markup) > 240): - short_text = unicode(markup[:160]) + short_text = truncate_html_words(text, 15) + if text == short_text: + short_text = '' new_fragment = Fragment.objects.create(anchor=fragment.id, book=self, text=text, short_text=short_text) @@ -623,29 +537,45 @@ class Book(models.Model): return True return False + # Thin wrappers for builder tasks + def build_pdf(self, *args, **kwargs): + """(Re)builds PDF.""" + return tasks.build_pdf.delay(self.pk, *args, **kwargs) + def build_epub(self, *args, **kwargs): + """(Re)builds EPUB.""" + return tasks.build_epub.delay(self.pk, *args, **kwargs) + def build_mobi(self, *args, **kwargs): + """(Re)builds MOBI.""" + return tasks.build_mobi.delay(self.pk, *args, **kwargs) + def build_fb2(self, *args, **kwargs): + """(Re)build FB2""" + return tasks.build_fb2.delay(self.pk, *args, **kwargs) + def build_txt(self, *args, **kwargs): + """(Re)builds TXT.""" + return tasks.build_txt.delay(self.pk, *args, **kwargs) + @staticmethod def zip_format(format_): def pretty_file_name(book): return "%s/%s.%s" % ( - b.get_extra_info_value()['author'], + b.extra_info['author'], b.slug, format_) field_name = "%s_file" % format_ books = Book.objects.filter(parent=None).exclude(**{field_name: ""}) paths = [(pretty_file_name(b), getattr(b, field_name).path) - for b in books] - result = create_zip.delay(paths, + for b in books.iterator()] + return create_zip(paths, getattr(settings, "ALL_%s_ZIP" % format_.upper())) - return result.wait() def zip_audiobooks(self, format_): bm = BookMedia.objects.filter(book=self, type=format_) paths = map(lambda bm: (None, bm.file.path), bm) - result = create_zip.delay(paths, "%s_%s" % (self.slug, format_)) - return result.wait() + return create_zip(paths, "%s_%s" % (self.slug, format_)) - def search_index(self, book_info=None, reuse_index=False): + def search_index(self, book_info=None, reuse_index=False, index_tags=True): + import search if reuse_index: idx = search.ReusableIndex() else: @@ -654,7 +584,8 @@ class Book(models.Model): idx.open() try: idx.index_book(self, book_info) - idx.index_tags() + if index_tags: + idx.index_tags() finally: idx.close() @@ -676,10 +607,8 @@ class Book(models.Model): @classmethod def from_text_and_meta(cls, raw_file, book_info, overwrite=False, - build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, - search_index=True): - import re - from sortify import sortify + build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, build_fb2=True, + search_index=True, search_index_tags=True, search_index_reuse=False): # check for parts before we do anything children = [] @@ -687,7 +616,7 @@ class Book(models.Model): for part_url in book_info.parts: try: children.append(Book.objects.get(slug=part_url.slug)) - except Book.DoesNotExist, e: + except Book.DoesNotExist: raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug) @@ -713,7 +642,7 @@ class Book(models.Model): book.common_slug = book_info.variant_of.slug else: book.common_slug = book.slug - book.set_extra_info_value(book_info.to_dict()) + book.extra_info = book_info.to_dict() book.save() meta_tags = Tag.tags_from_info(book_info) @@ -748,8 +677,11 @@ class Book(models.Model): if not settings.NO_BUILD_MOBI and build_mobi: book.build_mobi() + if not settings.NO_BUILD_FB2 and build_fb2: + book.build_fb2() + if not settings.NO_SEARCH_INDEX and search_index: - book.search_index() + book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse) #index_book.delay(book.id, book_info) book_descendants = list(book.children.all()) @@ -760,12 +692,12 @@ class Book(models.Model): descendants_tags.update(child_book.tags) child_book.tags = list(child_book.tags) + [book_tag] child_book.save() - for fragment in child_book.fragments.all(): + for fragment in child_book.fragments.all().iterator(): fragment.tags = set(list(fragment.tags) + [book_tag]) book_descendants += list(child_book.children.all()) for tag in descendants_tags: - touch_tag.delay(tag) + tasks.touch_tag(tag) book.save() @@ -806,6 +738,13 @@ class Book(models.Model): type(self).objects.filter(pk=self.pk).update(_related_info=rel) return rel + def related_themes(self): + theme_counter = self.theme_counter + book_themes = list(Tag.objects.filter(pk__in=theme_counter.keys())) + for tag in book_themes: + tag.count = theme_counter[tag.pk] + return book_themes + def reset_tag_counter(self): if self.id is None: return @@ -825,10 +764,10 @@ class Book(models.Model): if tags is None: tags = {} - for child in self.children.all().order_by(): + for child in self.children.all().order_by().iterator(): for tag_pk, value in child.tag_counter.iteritems(): tags[tag_pk] = tags.get(tag_pk, 0) + value - for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by(): + for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by().iterator(): tags[tag.pk] = 1 if self.id: @@ -854,8 +793,8 @@ class Book(models.Model): if tags is None: tags = {} - for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by(): - for tag in fragment.tags.filter(category='theme').order_by(): + for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by().iterator(): + for tag in fragment.tags.filter(category='theme').order_by().iterator(): tags[tag.pk] = tags.get(tag.pk, 0) + 1 if self.id: @@ -881,7 +820,7 @@ class Book(models.Model): @classmethod def tagged_top_level(cls, tags): - """ Returns top-level books tagged with `tags'. + """ Returns top-level books tagged with `tags`. It only returns those books which don't have ancestors which are also tagged with those tags. @@ -890,8 +829,9 @@ class Book(models.Model): # get relevant books and their tags objects = cls.tagged.with_all(tags) # eliminate descendants - l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects]) - descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)] + l_tags = Tag.objects.filter(category='book', + slug__in=[book.book_tag_slug() for book in objects.iterator()]) + descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags).iterator()] if descendants_keys: objects = objects.exclude(pk__in=descendants_keys) @@ -910,19 +850,20 @@ class Book(models.Model): 'title', 'parent', 'slug') if filter: books = books.filter(filter).distinct() - book_ids = set((book.pk for book in books)) - for book in books: + + book_ids = set(b['pk'] for b in books.values("pk").iterator()) + for book in books.iterator(): parent = book.parent_id if parent not in book_ids: parent = None books_by_parent.setdefault(parent, []).append(book) else: - for book in books: + for book in books.iterator(): books_by_parent.setdefault(book.parent_id, []).append(book) orphans = [] books_by_author = SortedDict() - for tag in Tag.objects.filter(category='author'): + for tag in Tag.objects.filter(category='author').iterator(): books_by_author[tag] = [] for book in books_by_parent.get(None,()): @@ -944,7 +885,7 @@ class Book(models.Model): "LP": (3, u"liceum"), } def audiences_pl(self): - audiences = self.get_extra_info_value().get('audiences', []) + audiences = self.extra_info.get('audiences', []) audiences = sorted(set([self._audiences_pl[a] for a in audiences])) return [a[1] for a in audiences] @@ -961,7 +902,8 @@ class Book(models.Model): def _has_factory(ftype): has = lambda self: bool(getattr(self, "%s_file" % ftype)) - has.short_description = t.upper() + has.short_description = ftype.upper() + has.__doc__ = None has.boolean = True has.__name__ = "has_%s_file" % ftype return has @@ -978,6 +920,7 @@ for t in Book.formats: class Fragment(models.Model): + """Represents a themed fragment of a book.""" text = models.TextField() short_text = models.TextField(editable=False) anchor = models.CharField(max_length=120) @@ -1003,6 +946,10 @@ class Fragment(models.Model): for lang, langname in settings.LANGUAGES: permanent_cache.delete(cache_key % (self.id, lang)) + def get_short_text(self): + """Returns short version of the fragment.""" + return self.short_text if self.short_text else self.text + def short_html(self): if self.id: cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language()) @@ -1049,7 +996,7 @@ def _tags_updated_handler(sender, affected_tags, **kwargs): # reset tag global counter # we want Tag.changed_at updated for API to know the tag was touched for tag in affected_tags: - touch_tag.delay(tag) + tasks.touch_tag(tag) # if book tags changed, reset book tag counter if isinstance(sender, Book) and \ @@ -1070,8 +1017,24 @@ def _pre_delete_handler(sender, instance, **kwargs): instance.book.save() pre_delete.connect(_pre_delete_handler) + def _post_save_handler(sender, instance, **kwargs): """ refresh all the short_html stuff on BookMedia update """ if sender == BookMedia: instance.book.save() post_save.connect(_post_save_handler) + + +if not settings.NO_SEARCH_INDEX: + @django.dispatch.receiver(post_delete, sender=Book) + def _remove_book_from_index_handler(sender, instance, **kwargs): + """ remove the book from search index, when it is deleted.""" + import search + search.JVM.attachCurrentThread() + idx = search.Index() + idx.open(timeout=10000) # 10 seconds timeout. + try: + idx.remove_book(instance) + idx.index_tags() + finally: + idx.close()