X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/fdd62169ba22c4c1be2f2306b5339eadd74ffb6d..dd84c95ada6ae106a819b278d542adf0ccc72f21:/apps/catalogue/management/__init__.py diff --git a/apps/catalogue/management/__init__.py b/apps/catalogue/management/__init__.py old mode 100755 new mode 100644 index e69de29b..6bb20474 --- a/apps/catalogue/management/__init__.py +++ b/apps/catalogue/management/__init__.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# +# This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from collections import defaultdict +from django.db import transaction +from lxml import etree + + +class XmlUpdater(object): + """A base class for massive XML updates. + + In a subclass, override `fix_tree` and/or use `fixes_field` decorator. + Attributes: + * commit_desc: commits description + * retain_publishable: set publishable if head is (default: True) + * only_first_chunk: process only first chunks of books (default: False) + """ + commit_desc = "auto-update" + retain_publishable = True + only_first_chunk = False + + _element_fixers = defaultdict(list) + + def __init__(self): + self.counters = defaultdict(lambda: 0) + + @classmethod + def fixes_elements(cls, xpath): + """Decorator, registering a function as a fixer for given field type. + + Any decorated function will be called like + f(element, change=..., verbose=...) + providing changeset as context. + + :param xpath: element lookup, e.g. ".//{namespace-uri}tag-name" + :returns: True if anything changed + """ + def wrapper(fixer): + cls._element_fixers[xpath].append(fixer) + return fixer + return wrapper + + def fix_tree(self, tree, verbose): + """Override to provide general tree-fixing mechanism. + + :param tree: the parsed XML tree + :param verbose: verbosity level + :returns: True if anythig changed + """ + return False + + def fix_chunk(self, chunk, user, verbose=0, dry_run=False): + """Runs the update for a single chunk.""" + if verbose >= 2: + print chunk.get_absolute_url() + old_head = chunk.head + src = old_head.materialize() + try: + tree = etree.fromstring(src) + except: + if verbose: + print "%s: invalid XML" % chunk.get_absolute_url() + self.counters['Bad XML'] += 1 + return + + dirty = False + # Call the general fixing function. + if self.fix_tree(tree, verbose=verbose): + dirty = True + # Call the registered fixers. + for xpath, fixers in self._element_fixers.items(): + for elem in tree.findall(xpath): + for fixer in fixers: + if fixer(elem, change=old_head, verbose=verbose): + dirty = True + + if not dirty: + self.counters['Clean'] += 1 + return + + if not dry_run: + new_head = chunk.commit( + etree.tostring(tree, encoding=unicode), + author=user, + description=self.commit_desc + ) + if self.retain_publishable: + if old_head.publishable: + new_head.set_publishable(True) + if verbose >= 2: + print "done" + self.counters['Updated chunks'] += 1 + + def run(self, user, verbose=0, dry_run=False, books=None): + """Runs the actual update.""" + if books is None: + from catalogue.models import Book + books = Book.objects.all() + + # Start transaction management. + transaction.commit_unless_managed() + transaction.enter_transaction_management() + transaction.managed(True) + + for book in books: + self.counters['All books'] += 1 + chunks = book.chunk_set.all() + if self.only_first_chunk: + chunks = chunks[:1] + for chunk in chunks: + self.counters['All chunks'] += 1 + self.fix_chunk(chunk, user, verbose, dry_run) + + transaction.commit() + transaction.leave_transaction_management() + + def print_results(self): + """Prints the counters.""" + for item in sorted(self.counters.items()): + print "%s: %d" % item + + +auto_taggers = {} +from . import edumed +auto_taggers['edumed'] = edumed.tagger +