add command to remove unneeded audience tags
authorJan Szejko <janek37@gmail.com>
Wed, 30 Aug 2017 10:04:28 +0000 (12:04 +0200)
committerJan Szejko <janek37@gmail.com>
Wed, 30 Aug 2017 10:04:28 +0000 (12:04 +0200)
apps/catalogue/management/commands/prune_audience.py [new file with mode: 0644]

diff --git a/apps/catalogue/management/commands/prune_audience.py b/apps/catalogue/management/commands/prune_audience.py
new file mode 100644 (file)
index 0000000..114a26f
--- /dev/null
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+
+import sys
+from django.contrib.auth.models import User
+from lxml import etree
+from optparse import make_option
+
+from django.core.management import BaseCommand
+
+from catalogue.models import Book
+from librarian import DCNS
+
+
+class Command(BaseCommand):
+    option_list = BaseCommand.option_list + (
+        # make_option('-q', '--quiet', action='store_false', dest='verbose',
+        #     default=True, help='Less output'),
+        # make_option('-d', '--dry-run', action='store_true', dest='dry_run',
+        #     default=False, help="Don't actually touch anything"),
+        make_option(
+            '-u', '--username', dest='username', metavar='USER',
+            help='Assign commits to this user (required, preferably yourself).'),
+    )
+    args = 'exclude_file'
+
+    def handle(self, exclude_file, **options):
+        username = options.get('username')
+
+        if username:
+            user = User.objects.get(username=username)
+        else:
+            print 'Please provide a username.'
+            sys.exit(1)
+
+        excluded_slugs = [line.strip() for line in open(exclude_file, 'rb') if line.strip()]
+        books = Book.objects.exclude(slug__in=excluded_slugs)
+
+        for book in books:
+            if not book.is_published():
+                continue
+            print 'processing %s' % book.slug
+            chunk = book.chunk_set.first()
+            old_head = chunk.head
+            src = old_head.materialize()
+            tree = etree.fromstring(src)
+            audience_nodes = tree.findall('.//' + DCNS("audience"))
+            if not audience_nodes:
+                print '%s has no audience, skipping' % book.slug
+                continue
+
+            for node in audience_nodes:
+                node.getparent().remove(node)
+
+            chunk.commit(
+                etree.tostring(tree, encoding=unicode),
+                author=user,
+                description='automatyczne skasowanie audience',
+                publishable=old_head.publishable
+            )
+            print 'committed %s' % book.slug
+            if not old_head.publishable:
+                print 'Warning: %s not publishable, last head: %s, %s' % (
+                    book.slug, old_head.author.username, old_head.description[:40].replace('\n', ' '))