X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/1b718c10066557540770bb0960a773dce0ad4462..a3222a2e7997e50cf72a4548ee21c06d7e99ca65:/apps/catalogue/models/book.py?ds=sidebyside diff --git a/apps/catalogue/models/book.py b/apps/catalogue/models/book.py index f4e025ed..45726551 100755 --- a/apps/catalogue/models/book.py +++ b/apps/catalogue/models/book.py @@ -4,19 +4,18 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # from django.contrib.sites.models import Site -from django.db import models +from django.db import models, transaction from django.template.loader import render_to_string from django.utils.translation import ugettext_lazy as _ from slughifi import slughifi -from librarian import NoDublinCore, ParseError, ValidationError -from librarian.dcparser import BookInfo import apiclient -from catalogue.helpers import cached_in_field -from catalogue.models import BookPublishRecord, ChunkPublishRecord +from catalogue.helpers import cached_in_field, GalleryMerger +from catalogue.models import BookPublishRecord, ChunkPublishRecord, Project from catalogue.signals import post_publish -from catalogue.tasks import refresh_instance +from catalogue.tasks import refresh_instance, book_content_updated from catalogue.xml_tools import compile_text, split_xml +from cover.models import Image class Book(models.Model): @@ -24,30 +23,35 @@ class Book(models.Model): title = models.CharField(_('title'), max_length=255, db_index=True) slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True) - gallery = models.CharField(_('scan gallery name'), max_length=255, blank=True) + public = models.BooleanField(_('public'), default=True, db_index=True) + gallery = models.CharField(u'materiały', max_length=255, blank=True) + project = models.ForeignKey(Project, null=True, blank=True) - #wl_slug = models.CharField(_('title'), max_length=255, null=True, db_index=True, editable=False) - parent = models.ForeignKey('self', null=True, blank=True, verbose_name=_('parent'), related_name="children") - parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True) + # wl_slug = models.CharField(_('title'), max_length=255, null=True, db_index=True, editable=False) + parent = models.ForeignKey( + 'self', null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False) + parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False) + + for_cybernauts = models.BooleanField(_('for Cybernauts'), default=False) # Cache _short_html = models.TextField(null=True, blank=True, editable=False) _single = models.NullBooleanField(editable=False, db_index=True) _new_publishable = models.NullBooleanField(editable=False) _published = models.NullBooleanField(editable=False) - - # Managers - objects = models.Manager() + _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False) + dc_cover_image = models.ForeignKey( + Image, blank=True, null=True, db_index=True, on_delete=models.SET_NULL, editable=False) + dc_slug = models.CharField(max_length=128, null=True, blank=True, editable=False, db_index=True) class NoTextError(BaseException): pass class Meta: app_label = 'catalogue' - ordering = ['parent_number', 'title'] - verbose_name = _('book') - verbose_name_plural = _('books') - + ordering = ['title', 'slug'] + verbose_name = u'moduł' + verbose_name_plural = u'moduły' # Representing # ============ @@ -72,15 +76,24 @@ class Book(models.Model): @models.permalink def get_absolute_url(self): - return ("catalogue_book", [self.slug]) + return "catalogue_book", [self.slug] + def correct_about(self): + return "http://%s%s" % ( + Site.objects.get_current().domain, + self.get_absolute_url() + ) # Creating & manipulating # ======================= + def accessible(self, request): + return self.public or request.user.is_authenticated() + @classmethod - def create(cls, creator, text, *args, **kwargs): - b = cls.objects.create(*args, **kwargs) + @transaction.commit_on_success + def create(cls, creator, text, **kwargs): + b = cls.objects.create(**kwargs) b.chunk_set.all().update(creator=creator) b[0].commit(text, author=creator) return b @@ -90,8 +103,8 @@ class Book(models.Model): return self.chunk_set.reverse()[0].split(*args, **kwargs) @classmethod - def import_xml_text(cls, text=u'', previous_book=None, - commit_args=None, **kwargs): + @transaction.commit_on_success + def import_xml_text(cls, text=u'', previous_book=None, commit_args=None, **kwargs): """Imports a book from XML, splitting it into chunks as necessary.""" texts = split_xml(text) if previous_book: @@ -105,7 +118,6 @@ class Book(models.Model): for i in range(book_len - len(texts)): texts.append((u'pusta część %d' % (i + 1), u'')) - i = 0 for i, (title, text) in enumerate(texts): if not title: title = u'część %d' % (i + 1) @@ -125,19 +137,22 @@ class Book(models.Model): return instance def make_chunk_slug(self, proposed): - """ + """ Finds a chunk slug not yet used in the book. """ slugs = set(c.slug for c in self) i = 1 - new_slug = proposed + new_slug = proposed[:50] while new_slug in slugs: - new_slug = "%s_%d" % (proposed, i) + new_slug = "%s_%d" % (proposed[:45], i) i += 1 return new_slug + @transaction.commit_on_success def append(self, other, slugs=None, titles=None): """Add all chunks of another book to self.""" + assert self != other + number = self[len(self) - 1].number + 1 len_other = len(other) single = len_other == 1 @@ -166,12 +181,11 @@ class Book(models.Model): # just use the guessed title and original book slug chunk.title = other_title_part if other.slug.startswith(self.slug): - chunk_slug = other.slug[len(self.slug):].lstrip('-_') + chunk.slug = other.slug[len(self.slug):].lstrip('-_') else: - chunk_slug = other.slug - chunk.slug = self.make_chunk_slug(chunk_slug) + chunk.slug = other.slug else: - chunk.title = "%s, %s" % (other_title_part, chunk.title) + chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255] else: chunk.slug = slugs[i] chunk.title = titles[i] @@ -179,8 +193,46 @@ class Book(models.Model): chunk.slug = self.make_chunk_slug(chunk.slug) chunk.save() number += 1 + assert not other.chunk_set.exists() + + gm = GalleryMerger(self.gallery, other.gallery) + self.gallery = gm.merge() + + # and move the gallery starts + if gm.was_merged: + for chunk in self[len(self) - len_other:]: + old_start = chunk.gallery_start or 1 + chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted + chunk.save() + other.delete() + @transaction.commit_on_success + def prepend_history(self, other): + """Prepend history from all the other book's chunks to own.""" + assert self != other + + for i in range(len(self), len(other)): + title = u"pusta część %d" % i + chunk = self.add(slughifi(title), title) + chunk.commit('') + + for i in range(len(other)): + self[i].prepend_history(other[0]) + + assert not other.chunk_set.exists() + other.delete() + + def split(self): + """Splits all the chunks into separate books.""" + for chunk in self: + book = Book.objects.create(title=chunk.title, slug=chunk.slug, public=self.public, gallery=self.gallery) + book[0].delete() + chunk.book = book + chunk.number = 1 + chunk.save() + assert not self.chunk_set.exists() + self.delete() # State & cache # ============= @@ -194,23 +246,34 @@ class Book(models.Model): def assert_publishable(self): assert self.chunk_set.exists(), _('No chunks in the book.') try: - changes = self.get_current_changes(publishable=True) + changes = self.get_current_changes() except self.NoTextError: raise AssertionError(_('Not all chunks have publishable revisions.')) - book_xml = self.materialize(changes=changes) + + from librarian import NoDublinCore, ParseError, ValidationError try: - bi = BookInfo.from_string(book_xml.encode('utf-8')) + bi = self.wldocument(changes=changes, strict=True).book_info + if not bi.audience: + raise ValidationError('No audience specified') + if not bi.type: + raise ValidationError('No type specified') except ParseError, e: - raise AssertionError(_('Invalid XML') + ': ' + str(e)) + raise AssertionError(_('Invalid XML') + ': ' + unicode(e)) except NoDublinCore: raise AssertionError(_('No Dublin Core found.')) except ValidationError, e: - raise AssertionError(_('Invalid Dublin Core') + ': ' + str(e)) + raise AssertionError(_('Invalid Dublin Core') + ': ' + unicode(e)) - valid_about = "http://%s%s" % (Site.objects.get_current().domain, self.get_absolute_url()) + valid_about = self.correct_about() assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about + def publishable_error(self): + try: + return self.assert_publishable() + except AssertionError, e: + return e + def hidden(self): return self.slug.startswith('.') @@ -237,6 +300,15 @@ class Book(models.Model): return self.publish_log.exists() published = cached_in_field('_published')(is_published) + def get_on_track(self): + if self.published: + return -1 + stages = [ch.stage.ordering if ch.stage is not None else 0 for ch in self] + if not len(stages): + return 0 + return min(stages) + on_track = cached_in_field('_on_track')(get_on_track) + def is_single(self): return len(self) == 1 single = cached_in_field('_single')(is_single) @@ -245,11 +317,47 @@ class Book(models.Model): def short_html(self): return render_to_string('catalogue/book_list/book.html', {'book': self}) + def book_info(self, publishable=True): + try: + book_xml = self.wl1_xml(publishable=publishable) + except self.NoTextError: + pass + else: + from librarian.dcparser import BookInfo + from librarian import NoDublinCore, ParseError, ValidationError + try: + return BookInfo.from_string(book_xml) + except (self.NoTextError, ParseError, NoDublinCore, ValidationError): + return None + + def refresh_dc_cache(self): + update = { + 'dc_slug': None, + 'dc_cover_image': None, + } + + info = self.book_info() + if info is not None: + update['dc_slug'] = info.url.slug + if info.cover_source: + try: + image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1])) + except Image.DoesNotExist: + pass + else: + if info.cover_source == image.get_full_url(): + update['dc_cover_image'] = image + Book.objects.filter(pk=self.pk).update(**update) + def touch(self): + # this should only really be done when text or publishable status changes + book_content_updated.delay(self) + update = { "_new_publishable": self.is_new_publishable(), "_published": self.is_published(), "_single": self.is_single(), + "_on_track": self.get_on_track(), "_short_html": None, } Book.objects.filter(pk=self.pk).update(**update) @@ -280,7 +388,7 @@ class Book(models.Model): return changes def materialize(self, publishable=False, changes=None): - """ + """ Get full text of the document compiled from chunks. Takes the current versions of all texts or versions most recently tagged for publishing, @@ -290,16 +398,105 @@ class Book(models.Model): changes = self.get_current_changes(publishable) return compile_text(change.materialize() for change in changes) - def publish(self, user): + def wldocument(self, publishable=True, changes=None, parse_dublincore=True, strict=False): + from catalogue.ebook_utils import RedakcjaDocProvider + from librarian.parser import WLDocument + + return WLDocument.from_string( + self.wl1_xml(publishable=publishable, changes=changes), + provider=RedakcjaDocProvider(publishable=publishable), + parse_dublincore=parse_dublincore, + strict=strict) + + def publish(self, user, host=None): """ Publishes a book on behalf of a (local) user. """ + import json + import os + from django.conf import settings self.assert_publishable() - changes = self.get_current_changes(publishable=True) - book_xml = self.materialize(changes=changes) - apiclient.api_call(user, "books/", {"book_xml": book_xml}) + changes = self.get_current_changes() + data = {"lesson_xml": self.wl1_xml(changes=changes)} + if host: + gallery_url = u'%s%s%s%s/' % (host, settings.MEDIA_URL, settings.IMAGE_DIR, self.slug) + gallery_dir = os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.slug) + if os.path.isdir(gallery_dir): + data['gallery_url'] = gallery_url + attachments = os.listdir(gallery_dir) + else: + attachments = [] + data['attachments'] = json.dumps(attachments) + apiclient.api_call(user, "lessons/", data) # record the publish br = BookPublishRecord.objects.create(book=self, user=user) for c in changes: ChunkPublishRecord.objects.create(book_record=br, change=c) post_publish.send(sender=br) + + def wl1_xml(self, publishable=True, changes=None): + from lxml import etree + import re + from StringIO import StringIO + from urllib import unquote + import os.path + from django.conf import settings + from fnpdjango.utils.text.slughifi import slughifi + from librarian import ParseError, DCNS + + def _register_function(f): + """ Register extension function with lxml """ + ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') + ns[f.__name__] = f + return f + + @_register_function + def slugify(context, text): + """Remove unneeded whitespace from beginning and end""" + if isinstance(text, list): + text = ''.join(text) + return slughifi(text) + + @_register_function + def rmext(context, text): + if isinstance(text, list): + text = ''.join(text) + text = unquote(text) + if '.' in text: + name, ext = text.rsplit('.', 1) + if ext.lower() in ('doc', 'docx', 'odt', 'pdf', 'jpg', 'jpeg'): + text = name + return text + + t = etree.parse(os.path.join(settings.PROJECT_ROOT, 'xslt/wl2to1.xslt')) + ft = self.materialize(publishable=publishable, changes=changes) + ft = ft.replace(' ', ' ') + f2 = StringIO(ft) + i1 = etree.parse(f2) + + for sect in i1.findall('//section'): + if sect[0].text and sect[0].text.strip() == u'Przebieg zajęć': + # Prostujemy. + first = sect.find('section') + subs = first.findall('.//section') + for sub in subs: + sect.append(sub) + break + else: + # print 'BRAK PRZEBIEGU' + dc_type = i1.findall('//dc:type', namespaces={'dc': DCNS.uri}) + if dc_type and dc_type[0] in ('course', 'synthetic'): + raise ParseError('Brak przebiegu') + + i1.getroot().attrib['redslug'] = self.slug + i1.getroot().attrib['wlslug'] = self.slug # THIS! + # print '.', + w1t = i1.xslt(t) + for h in w1t.findall('//aktywnosc/opis'): + if len(h) == 0: + raise ParseError('Pusty element aktywnosc/opis') + # FIXME assumption that every lesson has at most 9 parts + if not h[0].text or not re.match(r'\d\.\s', h[0].text): + raise ParseError('Niepoprawny nagłówek (aktywnosc/opis): %s' % repr(h[0].text)) + h[0].text = h[0].text[3:] + return etree.tostring(w1t, encoding='utf-8')