X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/69d9738d6855e38869678a54991d30e5cddb8e67..e977f7187b10b1bc0a30794cd585c6b840568996:/src/documents/management/commands/fixdc.py?ds=sidebyside diff --git a/src/documents/management/commands/fixdc.py b/src/documents/management/commands/fixdc.py new file mode 100644 index 00000000..3f4a848e --- /dev/null +++ b/src/documents/management/commands/fixdc.py @@ -0,0 +1,52 @@ +# This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from librarian import RDFNS, WLURI, ValidationError +from librarian.dcparser import BookInfo +from documents.management import XmlUpdater +from documents.management.commands import XmlUpdaterCommand + + +class FixDC(XmlUpdater): + commit_desc = "auto-fixing DC" + retain_publishable = True + only_first_chunk = True + + def fix_wluri(elem, change, verbose): + try: + WLURI.strict(elem.text) + except ValidationError: + correct_field = str(WLURI.from_slug( + WLURI(elem.text.strip()).slug)) + try: + WLURI.strict(correct_field) + except ValidationError: + # Can't make a valid WLURI out of it, leave as is. + return False + if verbose: + print("Changing %s from %s to %s" % ( + elem.tag, elem.text, correct_field + )) + elem.text = correct_field + return True + for field in BookInfo.FIELDS: + if field.validator == WLURI: + XmlUpdater.fixes_elements('.//' + field.uri)(fix_wluri) + + @XmlUpdater.fixes_elements(".//" + RDFNS("Description")) + def fix_rdfabout(elem, change, verbose): + correct_about = change.tree.book.correct_about() + attr_name = RDFNS("about") + current_about = elem.get(attr_name) + if current_about != correct_about: + if verbose: + print("Changing rdf:about from %s to %s" % ( + current_about, correct_about + )) + elem.set(attr_name, correct_about) + return True + + +class Command(XmlUpdaterCommand): + updater = FixDC + help = 'Fixes obvious errors in DC: rdf:about and WLURI format.'