fix flickr url regex
[redakcja.git] / apps / catalogue / management / commands / fix_rdf_about.py
1 # -*- coding: utf-8 -*-
2
3 from optparse import make_option
4
5 from django.contrib.auth.models import User
6 from django.core.management.base import BaseCommand
7 from django.db import transaction
8
9 from catalogue.models import Book
10
11
12 class Command(BaseCommand):
13     option_list = BaseCommand.option_list + (
14         make_option('-q', '--quiet', action='store_false', dest='verbose',
15             default=True, help='Less output'),
16         make_option('-d', '--dry-run', action='store_true', dest='dry_run',
17             default=False, help="Don't actually touch anything"),
18     )
19     help = 'Updates the rdf:about metadata field.'
20
21     def handle(self, *args, **options):
22         from lxml import etree
23
24         verbose = options.get('verbose')
25         dry_run = options.get('dry_run')
26
27         # Start transaction management.
28         transaction.commit_unless_managed()
29         transaction.enter_transaction_management()
30         transaction.managed(True)
31
32         all_books = 0
33         nonxml = 0
34         nordf = 0
35         already = 0
36         done = 0
37
38         for b in Book.objects.all():
39             all_books += 1
40             if verbose:
41                 print "%s: " % b.title,
42             chunk = b[0]
43             old_head = chunk.head
44             src = old_head.materialize()
45
46             try:
47                 t = etree.fromstring(src)
48             except:
49                 nonxml += 1
50                 if verbose:
51                     print "invalid XML"
52                 continue
53             desc = t.find(".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description")
54             if desc is None:
55                 nordf += 1
56                 if verbose:
57                     print "no RDF found"
58                 continue
59
60             correct_about = b.correct_about()
61             attr_name = "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about"
62             if desc.get(attr_name) == correct_about:
63                 already += 1
64                 if verbose:
65                     print "already correct"
66                 continue
67             desc.set(attr_name, correct_about)
68             if not dry_run:
69                 new_head = chunk.commit(etree.tostring(t, encoding=unicode),
70                     author_name='platforma redakcyjna',
71                     description='auto-update rdf:about'
72                     )
73                 # retain the publishable status
74                 if old_head.publishable:
75                     new_head.set_publishable(True)
76             if verbose:
77                 print "done"
78             done += 1
79
80         # Print results
81         print "All books: ", all_books
82         print "Invalid XML: ", nonxml
83         print "No RDF found: ", nordf
84         print "Already correct: ", already
85         print "Books updated: ", done
86
87         transaction.commit()
88         transaction.leave_transaction_management()
89