--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import sys
+from django.contrib.auth.models import User
+from optparse import make_option
+
+from django.core.management import BaseCommand
+
+from catalogue.models import Book
+from catalogue.xml_tools import remove_empty_elements
+
+EXCLUDED_SLUGS = [
+ 'aktualizacja-szablonu-8kwie',
+]
+
+
+class Command(BaseCommand):
+ option_list = BaseCommand.option_list + (
+ # make_option('-q', '--quiet', action='store_false', dest='verbose',
+ # default=True, help='Less output'),
+ # make_option('-d', '--dry-run', action='store_true', dest='dry_run',
+ # default=False, help="Don't actually touch anything"),
+ make_option(
+ '-u', '--username', dest='username', metavar='USER',
+ help='Assign commits to this user (required, preferably yourself).'),
+ )
+
+ def handle(self, **options):
+ username = options.get('username')
+
+ if username:
+ user = User.objects.get(username=username)
+ else:
+ print 'Please provide a username.'
+ sys.exit(1)
+
+ for book in Book.objects.all():
+ if book.slug in EXCLUDED_SLUGS:
+ continue
+ print 'processing %s' % book.slug
+ for chunk in book.chunk_set.all():
+ old_head = chunk.head
+ src = old_head.materialize()
+ new_xml = remove_empty_elements(src)
+ if new_xml:
+ new_head = chunk.commit(
+ new_xml,
+ author=user,
+ description=u'automatyczne usunięcie pustych znaczników'
+ )
+ print 'committed %s (chunk %s)' % (book.slug, chunk.number)
+ if old_head.publishable:
+ new_head.set_publishable(True)
from copy import deepcopy
import re
+from django.utils.encoding import force_str
from lxml import etree
from catalogue.constants import TRIM_BEGIN, TRIM_END, MASTERS
if not h[0].text or not re.match(r'\d\.\s', h[0].text):
raise ParseError('Niepoprawny nagłówek (aktywnosc/opis): %s' % repr(h[0].text))
h[0].text = h[0].text[3:]
- return etree.tostring(w1t, encoding='utf-8')
\ No newline at end of file
+ return etree.tostring(w1t, encoding='utf-8')
+
+
+EXCEPTIONS = [
+ ('div', 'img'),
+ ('div', 'video'),
+ ('div', 'table.cell'),
+ ('span', 'link'),
+]
+
+
+def remove_element(element):
+ parent = element.getparent()
+ tail = element.tail
+ if tail:
+ prev = element.getprevious()
+ if prev is not None:
+ prev.tail = (prev.tail or '') + tail
+ else:
+ parent.text = (parent.text or '') + tail
+ parent.remove(element)
+
+
+def remove_empty_elements(xml):
+ try:
+ tree = etree.fromstring(force_str(xml.replace(' ', u'\xa0')))
+ except SyntaxError:
+ return None
+ changed = False
+ another_loop = True
+ while another_loop:
+ another_loop = False
+ for element in tree.findall('.//*'):
+ if (not element.text or not element.text.strip()) and len(element) == 0:
+ if (element.tag, element.attrib.get('class')) not in EXCEPTIONS:
+ remove_element(element)
+ changed = True
+ another_loop = True
+ return etree.tostring(tree, encoding=unicode) if changed else None
from django.utils.translation import ugettext_lazy as _
from catalogue.models import Chunk
+from catalogue.xml_tools import remove_empty_elements
class DocumentPubmarkForm(forms.Form):
self.fields['for_cybernauts'].initial = self.chunk.book.for_cybernauts
self.fields['publishable'].initial = self.chunk.head.publishable
+ def clean_text(self):
+ text = self.cleaned_data.get('text', '')
+ return remove_empty_elements(text)
+
def save(self):
if self.user.is_authenticated():
author = self.user