1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from collections import defaultdict
5 from django.db import transaction
9 class XmlUpdater(object):
10 """A base class for massive XML updates.
12 In a subclass, override `fix_tree` and/or use `fixes_field` decorator.
14 * commit_desc: commits description
15 * retain_publishable: set publishable if head is (default: True)
16 * only_first_chunk: process only first chunks of books (default: False)
18 commit_desc = "auto-update"
19 retain_publishable = True
20 only_first_chunk = False
22 _element_fixers = defaultdict(list)
25 self.counters = defaultdict(lambda: 0)
28 def fixes_elements(cls, xpath):
29 """Decorator, registering a function as a fixer for given field type.
31 Any decorated function will be called like
32 f(element, change=..., verbose=...)
33 providing changeset as context.
35 :param xpath: element lookup, e.g. ".//{namespace-uri}tag-name"
36 :returns: True if anything changed
39 cls._element_fixers[xpath].append(fixer)
43 def fix_tree(self, tree, verbose):
44 """Override to provide general tree-fixing mechanism.
46 :param tree: the parsed XML tree
47 :param verbose: verbosity level
48 :returns: True if anythig changed
52 def fix_chunk(self, chunk, user, verbose=0, dry_run=False):
53 """Runs the update for a single chunk."""
55 print(chunk.get_absolute_url())
57 src = old_head.materialize()
59 tree = etree.fromstring(src)
62 print("%s: invalid XML" % chunk.get_absolute_url())
63 self.counters['Bad XML'] += 1
67 # Call the general fixing function.
68 if self.fix_tree(tree, verbose=verbose):
70 # Call the registered fixers.
71 for xpath, fixers in self._element_fixers.items():
72 for elem in tree.findall(xpath):
74 if fixer(elem, change=old_head, verbose=verbose):
78 self.counters['Clean'] += 1
82 new_head = chunk.commit(
83 etree.tostring(tree, encoding='unicode'),
85 description=self.commit_desc
87 if self.retain_publishable:
88 if old_head.publishable:
89 new_head.set_publishable(True)
92 self.counters['Updated chunks'] += 1
94 def run(self, user, verbose=0, dry_run=False, books=None):
95 """Runs the actual update."""
97 from catalogue.models import Book
98 books = Book.objects.all()
100 # Start transaction management.
101 with transaction.atomic():
103 self.counters['All books'] += 1
104 chunks = book.chunk_set.all()
105 if self.only_first_chunk:
108 self.counters['All chunks'] += 1
109 self.fix_chunk(chunk, user, verbose, dry_run)
111 def print_results(self):
112 """Prints the counters."""
113 for item in sorted(self.counters.items()):
114 print("%s: %d" % item)