1 # -*- coding: utf-8 -*-
3 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 from collections import defaultdict
7 from django.db import transaction
11 class XmlUpdater(object):
12 """A base class for massive XML updates.
14 In a subclass, override `fix_tree` and/or use `fixes_field` decorator.
16 * commit_desc: commits description
17 * retain_publishable: set publishable if head is (default: True)
18 * only_first_chunk: process only first chunks of books (default: False)
20 commit_desc = "auto-update"
21 retain_publishable = True
22 only_first_chunk = False
24 _element_fixers = defaultdict(list)
27 self.counters = defaultdict(lambda: 0)
30 def fixes_elements(cls, xpath):
31 """Decorator, registering a function as a fixer for given field type.
33 Any decorated function will be called like
34 f(element, change=..., verbose=...)
35 providing changeset as context.
37 :param xpath: element lookup, e.g. ".//{namespace-uri}tag-name"
38 :returns: True if anything changed
41 cls._element_fixers[xpath].append(fixer)
45 def fix_tree(self, tree, verbose):
46 """Override to provide general tree-fixing mechanism.
48 :param tree: the parsed XML tree
49 :param verbose: verbosity level
50 :returns: True if anythig changed
54 def fix_chunk(self, chunk, user, verbose=0, dry_run=False):
55 """Runs the update for a single chunk."""
57 print chunk.get_absolute_url()
59 src = old_head.materialize()
61 tree = etree.fromstring(src)
64 print "%s: invalid XML" % chunk.get_absolute_url()
65 self.counters['Bad XML'] += 1
69 # Call the general fixing function.
70 if self.fix_tree(tree, verbose=verbose):
72 # Call the registered fixers.
73 for xpath, fixers in self._element_fixers.items():
74 for elem in tree.findall(xpath):
76 if fixer(elem, change=old_head, verbose=verbose):
80 self.counters['Clean'] += 1
84 new_head = chunk.commit(
85 etree.tostring(tree, encoding=unicode),
87 description=self.commit_desc
89 if self.retain_publishable:
90 if old_head.publishable:
91 new_head.set_publishable(True)
94 self.counters['Updated chunks'] += 1
96 def run(self, user, verbose=0, dry_run=False, books=None):
97 """Runs the actual update."""
99 from catalogue.models import Book
100 books = Book.objects.all()
102 # Start transaction management.
103 transaction.commit_unless_managed()
104 transaction.enter_transaction_management()
105 transaction.managed(True)
108 self.counters['All books'] += 1
109 chunks = book.chunk_set.all()
110 if self.only_first_chunk:
113 self.counters['All chunks'] += 1
114 self.fix_chunk(chunk, user, verbose, dry_run)
117 transaction.leave_transaction_management()
119 def print_results(self):
120 """Prints the counters."""
121 for item in sorted(self.counters.items()):
122 print "%s: %d" % item
127 auto_taggers['edumed'] = edumed.tagger