Update to new librarian api for html, txt.
[wolnelektury.git] / src / catalogue / management / commands / checkintegrity.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from django.core.management.base import BaseCommand
5 from librarian import ParseError
6 from catalogue.models import Book
7
8
9 class Command(BaseCommand):
10     help = 'Checks integrity of catalogue data.'
11
12     def add_arguments(self, parser):
13         parser.add_argument(
14                 '-q', '--quiet', action='store_false', dest='verbose',
15                 default=True, help='Suppress output')
16         parser.add_argument(
17                 '-d', '--dry-run', action='store_true', dest='dry_run',
18                 default=False, help="Just check for problems, don't fix them")
19
20     def handle(self, **options):
21         from django.db import transaction
22
23         verbose = options['verbose']
24
25         with transaction.atomic():
26             for book in Book.objects.all().iterator():
27                 try:
28                     info = book.wldocument().book_info
29                 except ParseError:
30                     if verbose:
31                         print("ERROR! Bad XML for book:", book.slug)
32                         print("To resolve: republish.")
33                         print()
34                 else:
35                     should_be = [p.slug for p in info.parts]
36                     is_now = [p.slug for p in book.children.all().order_by('parent_number')]
37                     if should_be != is_now:
38                         if verbose:
39                             print("ERROR! Wrong children for book:", book.slug)
40                             # print("Is:       ", is_now)
41                             # print("Should be:", should_be)
42                             from difflib import ndiff
43                             print('\n'.join(ndiff(is_now, should_be)))
44                             print("To resolve: republish parent book.")
45                             print()
46
47                 # Check for ancestry.
48                 parents = []
49                 parent = book.parent
50                 while parent:
51                     parents.append(parent)
52                     parent = parent.parent
53                 ancestors = list(book.ancestor.all())
54                 if set(ancestors) != set(parents):
55                     if options['verbose']:
56                         print("Wrong ancestry for book:", book)
57                         print("Is:       ", ", ".join(ancestors))
58                         print("Should be:", ", ".join(parents))
59                     if not options['dry_run']:
60                         book.repopulate_ancestors()
61                         if options['verbose']:
62                             print("Fixed.")
63                     if options['verbose']:
64                         print()
65
66                 # TODO: check metadata tags, reset counters