handle utf in make_master
[redakcja.git] / apps / catalogue / management / __init__.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from collections import defaultdict
7 from django.db import transaction
8 from lxml import etree
9
10
11 class XmlUpdater(object):
12     """A base class for massive XML updates.
13
14     In a subclass, override `fix_tree` and/or use `fixes_field` decorator.
15     Attributes:
16     * commit_desc: commits description
17     * retain_publishable: set publishable if head is (default: True)
18     * only_first_chunk: process only first chunks of books (default: False)
19     """
20     commit_desc = "auto-update"
21     retain_publishable = True
22     only_first_chunk = False
23
24     _element_fixers = defaultdict(list)
25
26     def __init__(self):
27         self.counters = defaultdict(lambda: 0)
28
29     @classmethod
30     def fixes_elements(cls, xpath):
31         """Decorator, registering a function as a fixer for given field type.
32
33         Any decorated function will be called like
34             f(element, change=..., verbose=...)
35         providing changeset as context.
36
37         :param xpath: element lookup, e.g. ".//{namespace-uri}tag-name"
38         :returns: True if anything changed
39         """
40         def wrapper(fixer):
41             cls._element_fixers[xpath].append(fixer)
42             return fixer
43         return wrapper
44
45     def fix_tree(self, tree, verbose):
46         """Override to provide general tree-fixing mechanism.
47
48         :param tree: the parsed XML tree
49         :param verbose: verbosity level
50         :returns: True if anythig changed
51         """
52         return False
53
54     def fix_chunk(self, chunk, user, verbose=0, dry_run=False):
55         """Runs the update for a single chunk."""
56         if verbose >= 2:
57             print chunk.get_absolute_url()
58         old_head = chunk.head
59         src = old_head.materialize()
60         try:
61             tree = etree.fromstring(src)
62         except:
63             if verbose:
64                 print "%s: invalid XML" % chunk.get_absolute_url()
65             self.counters['Bad XML'] += 1
66             return
67
68         dirty = False
69         # Call the general fixing function.
70         if self.fix_tree(tree, verbose=verbose):
71             dirty = True
72         # Call the registered fixers.
73         for xpath, fixers in self._element_fixers.items():
74             for elem in tree.findall(xpath):
75                 for fixer in fixers:
76                     if fixer(elem, change=old_head, verbose=verbose):
77                         dirty = True
78
79         if not dirty:
80             self.counters['Clean'] += 1
81             return
82
83         if not dry_run:
84             new_head = chunk.commit(
85                 etree.tostring(tree, encoding=unicode),
86                 author=user,
87                 description=self.commit_desc
88             )
89             if self.retain_publishable:
90                 if old_head.publishable:
91                     new_head.set_publishable(True)
92         if verbose >= 2:
93             print "done"
94         self.counters['Updated chunks'] += 1
95
96     def run(self, user, verbose=0, dry_run=False, books=None):
97         """Runs the actual update."""
98         if books is None:
99             from catalogue.models import Book
100             books = Book.objects.all()
101
102         # Start transaction management.
103         transaction.commit_unless_managed()
104         transaction.enter_transaction_management()
105         transaction.managed(True)
106
107         for book in books:
108             self.counters['All books'] += 1
109             chunks = book.chunk_set.all()
110             if self.only_first_chunk:
111                 chunks = chunks[:1]
112             for chunk in chunks:
113                 self.counters['All chunks'] += 1
114                 self.fix_chunk(chunk, user, verbose, dry_run)
115
116         transaction.commit()
117         transaction.leave_transaction_management()
118
119     def print_results(self):
120         """Prints the counters."""
121         for item in sorted(self.counters.items()):
122             print "%s: %d" % item
123
124
125 auto_taggers = {}
126 from . import edumed
127 auto_taggers['edumed'] = edumed.tagger
128