X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/655700e4cde69f00f698a06ca18991eef784ccda..7c5ccbccb3c83d91abc726298447bef2c322a559:/apps/catalogue/models.py?ds=inline diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index 8b8aa0ac6..3704b1663 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -22,12 +22,11 @@ from django.conf import settings from newtagging.models import TagBase, tags_updated from newtagging import managers -from catalogue.fields import JSONField, OverwritingFileField +from catalogue.fields import OverwritingFileField from catalogue.utils import create_zip, split_tags, truncate_html_words from catalogue import tasks import re -import search # Those are hard-coded here so that makemessages sees them. TAG_CATEGORIES = ( @@ -54,6 +53,10 @@ class TagSubcategoryManager(models.Manager): class Tag(TagBase): + """A tag attachable to books and fragments (and possibly anything). + + Used to represent searchable metadata (authors, epochs, genres, kinds), + fragment themes (motifs) and some book hierarchy related kludges.""" name = models.CharField(_('name'), max_length=50, db_index=True) slug = models.SlugField(_('slug'), max_length=120, db_index=True) sort_key = models.CharField(_('sort key'), max_length=120, db_index=True) @@ -115,8 +118,8 @@ class Tag(TagBase): objects = Book.tagged.with_all((self,)).order_by() if self.category != 'set': # eliminate descendants - l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects]) - descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)] + l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects.iterator()]) + descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()] if descendants_keys: objects = objects.exclude(pk__in=descendants_keys) return objects.count() @@ -213,6 +216,7 @@ def book_upload_path(ext=None, maxlen=100): class BookMedia(models.Model): + """Represents media attached to a book.""" FileFormat = namedtuple("FileFormat", "name ext") formats = SortedDict([ ('mp3', FileFormat(name='MP3', ext='mp3')), @@ -226,7 +230,7 @@ class BookMedia(models.Model): name = models.CharField(_('name'), max_length="100") file = OverwritingFileField(_('file'), upload_to=book_upload_path()) uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False) - extra_info = JSONField(_('extra information'), default='{}', editable=False) + extra_info = jsonfield.JSONField(_('extra information'), default='{}', editable=False) book = models.ForeignKey('Book', related_name='media') source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False) @@ -258,9 +262,9 @@ class BookMedia(models.Model): remove_zip("%s_%s" % (old.book.slug, old.type)) remove_zip("%s_%s" % (self.book.slug, self.type)) - extra_info = self.get_extra_info_value() + extra_info = self.extra_info extra_info.update(self.read_meta()) - self.set_extra_info_value(extra_info) + self.extra_info = extra_info self.source_sha1 = self.read_source_sha1(self.file.path, self.type) return super(BookMedia, self).save(*args, **kwargs) @@ -323,6 +327,7 @@ class BookMedia(models.Model): class Book(models.Model): + """Represents a book imported from WL-XML.""" title = models.CharField(_('title'), max_length=120) sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False) slug = models.SlugField(_('slug'), max_length=120, db_index=True, @@ -334,14 +339,14 @@ class Book(models.Model): created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True) changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True) parent_number = models.IntegerField(_('parent number'), default=0) - extra_info = JSONField(_('extra information'), default='{}') + extra_info = jsonfield.JSONField(_('extra information'), default='{}') gazeta_link = models.CharField(blank=True, max_length=240) wiki_link = models.CharField(blank=True, max_length=240) # files generated during publication cover = models.FileField(_('cover'), upload_to=book_upload_path('png'), null=True, blank=True) - ebook_formats = ['pdf', 'epub', 'mobi', 'txt'] + ebook_formats = ['pdf', 'epub', 'mobi', 'fb2', 'txt'] formats = ebook_formats + ['html', 'xml'] parent = models.ForeignKey('self', blank=True, null=True, related_name='children') @@ -428,7 +433,7 @@ class Book(models.Model): type(self).objects.filter(pk=self.pk).update(_related_info=None) # Fragment.short_html relies on book's tags, so reset it here too - for fragm in self.fragments.all(): + for fragm in self.fragments.all().iterator(): fragm.reset_short_html() def has_description(self): @@ -477,6 +482,7 @@ class Book(models.Model): def build_html(self): from django.core.files.base import ContentFile from slughifi import slughifi + from sortify import sortify from librarian import html meta_tags = list(self.tags.filter( @@ -511,7 +517,7 @@ class Book(models.Model): tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme') if created: tag.name = theme_name - tag.sort_key = theme_name.lower() + tag.sort_key = sortify(theme_name.lower()) tag.save() themes.append(tag) if not themes: @@ -533,26 +539,33 @@ class Book(models.Model): # Thin wrappers for builder tasks def build_pdf(self, *args, **kwargs): + """(Re)builds PDF.""" return tasks.build_pdf.delay(self.pk, *args, **kwargs) def build_epub(self, *args, **kwargs): + """(Re)builds EPUB.""" return tasks.build_epub.delay(self.pk, *args, **kwargs) def build_mobi(self, *args, **kwargs): + """(Re)builds MOBI.""" return tasks.build_mobi.delay(self.pk, *args, **kwargs) + def build_fb2(self, *args, **kwargs): + """(Re)build FB2""" + return tasks.build_fb2.delay(self.pk, *args, **kwargs) def build_txt(self, *args, **kwargs): + """(Re)builds TXT.""" return tasks.build_txt.delay(self.pk, *args, **kwargs) @staticmethod def zip_format(format_): def pretty_file_name(book): return "%s/%s.%s" % ( - b.get_extra_info_value()['author'], + b.extra_info['author'], b.slug, format_) field_name = "%s_file" % format_ books = Book.objects.filter(parent=None).exclude(**{field_name: ""}) paths = [(pretty_file_name(b), getattr(b, field_name).path) - for b in books] + for b in books.iterator()] return create_zip(paths, getattr(settings, "ALL_%s_ZIP" % format_.upper())) @@ -562,6 +575,7 @@ class Book(models.Model): return create_zip(paths, "%s_%s" % (self.slug, format_)) def search_index(self, book_info=None, reuse_index=False, index_tags=True): + import search if reuse_index: idx = search.ReusableIndex() else: @@ -593,7 +607,7 @@ class Book(models.Model): @classmethod def from_text_and_meta(cls, raw_file, book_info, overwrite=False, - build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, + build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, build_fb2=True, search_index=True, search_index_tags=True, search_index_reuse=False): # check for parts before we do anything @@ -628,7 +642,7 @@ class Book(models.Model): book.common_slug = book_info.variant_of.slug else: book.common_slug = book.slug - book.set_extra_info_value(book_info.to_dict()) + book.extra_info = book_info.to_dict() book.save() meta_tags = Tag.tags_from_info(book_info) @@ -663,6 +677,9 @@ class Book(models.Model): if not settings.NO_BUILD_MOBI and build_mobi: book.build_mobi() + if not settings.NO_BUILD_FB2 and build_fb2: + book.build_fb2() + if not settings.NO_SEARCH_INDEX and search_index: book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse) #index_book.delay(book.id, book_info) @@ -675,7 +692,7 @@ class Book(models.Model): descendants_tags.update(child_book.tags) child_book.tags = list(child_book.tags) + [book_tag] child_book.save() - for fragment in child_book.fragments.all(): + for fragment in child_book.fragments.all().iterator(): fragment.tags = set(list(fragment.tags) + [book_tag]) book_descendants += list(child_book.children.all()) @@ -723,7 +740,7 @@ class Book(models.Model): def related_themes(self): theme_counter = self.theme_counter - book_themes = Tag.objects.filter(pk__in=theme_counter.keys()) + book_themes = list(Tag.objects.filter(pk__in=theme_counter.keys())) for tag in book_themes: tag.count = theme_counter[tag.pk] return book_themes @@ -747,10 +764,10 @@ class Book(models.Model): if tags is None: tags = {} - for child in self.children.all().order_by(): + for child in self.children.all().order_by().iterator(): for tag_pk, value in child.tag_counter.iteritems(): tags[tag_pk] = tags.get(tag_pk, 0) + value - for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by(): + for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by().iterator(): tags[tag.pk] = 1 if self.id: @@ -776,8 +793,8 @@ class Book(models.Model): if tags is None: tags = {} - for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by(): - for tag in fragment.tags.filter(category='theme').order_by(): + for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by().iterator(): + for tag in fragment.tags.filter(category='theme').order_by().iterator(): tags[tag.pk] = tags.get(tag.pk, 0) + 1 if self.id: @@ -803,7 +820,7 @@ class Book(models.Model): @classmethod def tagged_top_level(cls, tags): - """ Returns top-level books tagged with `tags'. + """ Returns top-level books tagged with `tags`. It only returns those books which don't have ancestors which are also tagged with those tags. @@ -812,8 +829,9 @@ class Book(models.Model): # get relevant books and their tags objects = cls.tagged.with_all(tags) # eliminate descendants - l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects]) - descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)] + l_tags = Tag.objects.filter(category='book', + slug__in=[book.book_tag_slug() for book in objects.iterator()]) + descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags).iterator()] if descendants_keys: objects = objects.exclude(pk__in=descendants_keys) @@ -832,19 +850,20 @@ class Book(models.Model): 'title', 'parent', 'slug') if filter: books = books.filter(filter).distinct() - book_ids = set((book.pk for book in books)) - for book in books: + + book_ids = set(b['pk'] for b in books.values("pk").iterator()) + for book in books.iterator(): parent = book.parent_id if parent not in book_ids: parent = None books_by_parent.setdefault(parent, []).append(book) else: - for book in books: + for book in books.iterator(): books_by_parent.setdefault(book.parent_id, []).append(book) orphans = [] books_by_author = SortedDict() - for tag in Tag.objects.filter(category='author'): + for tag in Tag.objects.filter(category='author').iterator(): books_by_author[tag] = [] for book in books_by_parent.get(None,()): @@ -866,7 +885,7 @@ class Book(models.Model): "LP": (3, u"liceum"), } def audiences_pl(self): - audiences = self.get_extra_info_value().get('audiences', []) + audiences = self.extra_info.get('audiences', []) audiences = sorted(set([self._audiences_pl[a] for a in audiences])) return [a[1] for a in audiences] @@ -883,7 +902,8 @@ class Book(models.Model): def _has_factory(ftype): has = lambda self: bool(getattr(self, "%s_file" % ftype)) - has.short_description = t.upper() + has.short_description = ftype.upper() + has.__doc__ = None has.boolean = True has.__name__ = "has_%s_file" % ftype return has @@ -900,6 +920,7 @@ for t in Book.formats: class Fragment(models.Model): + """Represents a themed fragment of a book.""" text = models.TextField() short_text = models.TextField(editable=False) anchor = models.CharField(max_length=120) @@ -1004,12 +1025,16 @@ def _post_save_handler(sender, instance, **kwargs): post_save.connect(_post_save_handler) -@django.dispatch.receiver(post_delete, sender=Book) -def _remove_book_from_index_handler(sender, instance, **kwargs): - """ remove the book from search index, when it is deleted.""" - idx = search.Index() - idx.open(timeout=10000) # 10 seconds timeout. - try: - idx.remove_book(instance) - finally: - idx.close() +if not settings.NO_SEARCH_INDEX: + @django.dispatch.receiver(post_delete, sender=Book) + def _remove_book_from_index_handler(sender, instance, **kwargs): + """ remove the book from search index, when it is deleted.""" + import search + search.JVM.attachCurrentThread() + idx = search.Index() + idx.open(timeout=10000) # 10 seconds timeout. + try: + idx.remove_book(instance) + idx.index_tags() + finally: + idx.close()