X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/814264a85573e5fc9274872a255d32e0be608bbd..132297f0be27da544304733bd24da32282310235:/apps/catalogue/models/book.py diff --git a/apps/catalogue/models/book.py b/apps/catalogue/models/book.py index f40a368f..83db3057 100755 --- a/apps/catalogue/models/book.py +++ b/apps/catalogue/models/book.py @@ -246,7 +246,7 @@ class Book(models.Model): def assert_publishable(self): assert self.chunk_set.exists(), _('No chunks in the book.') try: - changes = self.get_current_changes(publishable=True) + changes = self.get_current_changes() except self.NoTextError: raise AssertionError(_('Not all chunks have publishable revisions.')) @@ -254,6 +254,10 @@ class Book(models.Model): try: bi = self.wldocument(changes=changes, strict=True).book_info + if not bi.audience: + raise ValidationError('No audience specified') + if not bi.type: + raise ValidationError('No type specified') except ParseError, e: raise AssertionError(_('Invalid XML') + ': ' + unicode(e)) except NoDublinCore: @@ -315,14 +319,14 @@ class Book(models.Model): def book_info(self, publishable=True): try: - book_xml = self.materialize(publishable=publishable) + book_xml = self.wl1_xml(publishable=publishable) except self.NoTextError: pass else: from librarian.dcparser import BookInfo from librarian import NoDublinCore, ParseError, ValidationError try: - return BookInfo.from_string(book_xml.encode('utf-8')) + return BookInfo.from_string(book_xml) except (self.NoTextError, ParseError, NoDublinCore, ValidationError): return None @@ -361,10 +365,10 @@ class Book(models.Model): def refresh(self): """This should be done offline.""" - self.short_html - self.single - self.new_publishable - self.published + self.short_html() + self.single() + self.new_publishable() + self.published() # Materializing & publishing # ========================== @@ -399,21 +403,97 @@ class Book(models.Model): from librarian.parser import WLDocument return WLDocument.from_string( - self.materialize(publishable=publishable, changes=changes), + self.wl1_xml(publishable=publishable, changes=changes), provider=RedakcjaDocProvider(publishable=publishable), parse_dublincore=parse_dublincore, strict=strict) - def publish(self, user): + def publish(self, user, host=None): """ Publishes a book on behalf of a (local) user. """ + import json + import os + from django.conf import settings self.assert_publishable() - changes = self.get_current_changes(publishable=True) - book_xml = self.materialize(changes=changes) - apiclient.api_call(user, "books/", {"book_xml": book_xml}) + changes = self.get_current_changes() + data = {"lesson_xml": self.wl1_xml(changes=changes)} + if host: + gallery_url = u'%s%s%s%s/' % (host, settings.MEDIA_URL, settings.IMAGE_DIR, self.slug) + gallery_dir = os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.slug) + if os.path.isdir(gallery_dir): + data['gallery_url'] = gallery_url + data['attachments'] = json.dumps(os.listdir(gallery_dir)) + apiclient.api_call(user, "lessons/", data) # record the publish br = BookPublishRecord.objects.create(book=self, user=user) for c in changes: ChunkPublishRecord.objects.create(book_record=br, change=c) post_publish.send(sender=br) + + def wl1_xml(self, publishable=True, changes=None): + from lxml import etree + import re + from StringIO import StringIO + from urllib import unquote + import os.path + from django.conf import settings + from fnpdjango.utils.text.slughifi import slughifi + from librarian import ParseError, DCNS + + def _register_function(f): + """ Register extension function with lxml """ + ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') + ns[f.__name__] = f + return f + + @_register_function + def slugify(context, text): + """Remove unneeded whitespace from beginning and end""" + if isinstance(text, list): + text = ''.join(text) + return slughifi(text) + + @_register_function + def rmext(context, text): + if isinstance(text, list): + text = ''.join(text) + text = unquote(text) + if '.' in text: + name, ext = text.rsplit('.', 1) + if ext.lower() in ('doc', 'docx', 'odt', 'pdf', 'jpg', 'jpeg'): + text = name + return text + + t = etree.parse(os.path.join(settings.PROJECT_ROOT, 'xslt/wl2to1.xslt')) + ft = self.materialize(publishable=publishable, changes=changes) + ft = ft.replace(' ', ' ') + f2 = StringIO(ft) + i1 = etree.parse(f2) + + for sect in i1.findall('//section'): + if sect[0].text and sect[0].text.strip() == u'Przebieg zajęć': + # Prostujemy. + first = sect.find('section') + subs = first.findall('.//section') + for sub in subs: + sect.append(sub) + break + else: + # print 'BRAK PRZEBIEGU' + dc_type = i1.findall('//dc:type', namespaces={'dc': DCNS.uri}) + if dc_type and dc_type[0] in ('course', 'synthetic'): + raise ParseError('Brak przebiegu') + + i1.getroot().attrib['redslug'] = self.slug + i1.getroot().attrib['wlslug'] = self.slug # THIS! + # print '.', + w1t = i1.xslt(t) + for h in w1t.findall('//aktywnosc/opis'): + if len(h) == 0: + raise ParseError('Pusty element aktywnosc/opis') + # FIXME assumption that every lesson has at most 9 parts + if not h[0].text or not re.match(r'\d\.\s', h[0].text): + raise ParseError('Niepoprawny nagłówek (aktywnosc/opis): %s' % repr(h[0].text)) + h[0].text = h[0].text[3:] + return etree.tostring(w1t, encoding='utf-8')