Wikidata in catalogue.
[redakcja.git] / src / documents / management / commands / merge_books.py
1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 import sys
5
6 from django.contrib.auth.models import User
7 from django.core.management.base import BaseCommand
8 from django.core.management.color import color_style
9 from django.db import transaction
10
11 from documents.models import Book
12
13
14 def common_prefix(texts):
15     common = []
16
17     min_len = min(len(text) for text in texts)
18     for i in range(min_len):
19         chars = list(set([text[i] for text in texts]))
20         if len(chars) > 1:
21             break
22         common.append(chars[0])
23     return "".join(common)
24
25
26 class Command(BaseCommand):
27     help = 'Merges multiple books into one.'
28     args = '[slug]...'
29
30     def add_arguments(self, parser):
31         self.add_argument(
32             '-s', '--slug', dest='new_slug', metavar='SLUG',
33             help='New slug of the merged book (defaults to common part of all slugs).')
34         self.add_argument(
35             '-t', '--title', dest='new_title', metavar='TITLE',
36             help='New title of the merged book (defaults to common part of all titles).')
37         self.add_argument(
38             '-q', '--quiet', action='store_false', dest='verbose', default=True,
39             help='Less output')
40         self.add_argument(
41             '-g', '--guess', action='store_true', dest='guess', default=False,
42             help='Try to guess what merges are needed (but do not apply them).')
43         self.add_argument(
44             '-d', '--dry-run', action='store_true', dest='dry_run', default=False,
45             help='Dry run: do not actually change anything.')
46         self.add_argument(
47             '-f', '--force', action='store_true', dest='force', default=False,
48             help='On slug conflict, hide the original book to archive.')
49
50     def print_guess(self, dry_run=True, force=False):
51         from collections import defaultdict
52         from pipes import quote
53         import re
54     
55         def read_slug(slug):
56             res = []
57             res.append((re.compile(r'__?(przedmowa)$'), -1))
58             res.append((re.compile(r'__?(cz(esc)?|ksiega|rozdzial)__?(?P<n>\d*)$'), None))
59             res.append((re.compile(r'__?(rozdzialy__?)?(?P<n>\d*)-'), None))
60         
61             for r, default in res:
62                 m = r.search(slug)
63                 if m:
64                     start = m.start()
65                     try:
66                         return int(m.group('n')), slug[:start]
67                     except IndexError:
68                         return default, slug[:start]
69             return None, slug
70     
71         def file_to_title(fname):
72             """ Returns a title-like version of a filename. """
73             parts = (p.replace('_', ' ').title() for p in fname.split('__'))
74             return ' / '.join(parts)
75     
76         merges = defaultdict(list)
77         slugs = []
78         for b in Book.objects.all():
79             slugs.append(b.slug)
80             n, ns = read_slug(b.slug)
81             if n is not None:
82                 merges[ns].append((n, b))
83     
84         conflicting_slugs = []
85         for slug in sorted(merges.keys()):
86             merge_list = sorted(merges[slug])
87             if len(merge_list) < 2:
88                 continue
89     
90             merge_slugs = [b.slug for i, b in merge_list]
91             if slug in slugs and slug not in merge_slugs:
92                 conflicting_slugs.append(slug)
93     
94             title = file_to_title(slug)
95             print("./manage.py merge_books %s%s--title=%s --slug=%s \\\n    %s\n" % (
96                 '--dry-run ' if dry_run else '',
97                 '--force ' if force else '',
98                 quote(title), slug,
99                 " \\\n    ".join(merge_slugs)
100                 ))
101     
102         if conflicting_slugs:
103             if force:
104                 print(self.style.NOTICE('# These books will be archived:'))
105             else:
106                 print(self.style.ERROR('# ERROR: Conflicting slugs:'))
107             for slug in conflicting_slugs:
108                 print('#', slug)
109
110
111     def handle(self, *slugs, **options):
112
113         self.style = color_style()
114
115         force = options.get('force')
116         guess = options.get('guess')
117         dry_run = options.get('dry_run')
118         new_slug = options.get('new_slug').decode('utf-8')
119         new_title = options.get('new_title').decode('utf-8')
120         verbose = options.get('verbose')
121
122         if guess:
123             if slugs:
124                 print("Please specify either slugs, or --guess.")
125                 return
126             else:
127                 self.print_guess(dry_run, force)
128                 return
129         if not slugs:
130             print("Please specify some book slugs")
131             return
132
133         # Start transaction management.
134         transaction.enter_transaction_management()
135
136         books = [Book.objects.get(slug=slug) for slug in slugs]
137         common_slug = common_prefix(slugs)
138         common_title = common_prefix([b.title for b in books])
139
140         if not new_title:
141             new_title = common_title
142         elif common_title.startswith(new_title):
143             common_title = new_title
144
145         if not new_slug:
146             new_slug = common_slug
147         elif common_slug.startswith(new_slug):
148             common_slug = new_slug
149
150         if slugs[0] != new_slug and Book.objects.filter(slug=new_slug).exists():
151             self.style.ERROR('Book already exists, skipping!')
152
153
154         if dry_run and verbose:
155             print(self.style.NOTICE('DRY RUN: nothing will be changed.'))
156             print()
157
158         if verbose:
159             print("New title:", self.style.NOTICE(new_title))
160             print("New slug:", self.style.NOTICE(new_slug))
161             print()
162
163         for i, book in enumerate(books):
164             chunk_titles = []
165             chunk_slugs = []
166
167             book_title = book.title[len(common_title):].replace(' / ', ' ').lstrip()
168             book_slug = book.slug[len(common_slug):].replace('__', '_').lstrip('-_')
169             for j, chunk in enumerate(book):
170                 if j:
171                     new_chunk_title = book_title + '_%d' % j
172                     new_chunk_slug = book_slug + '_%d' % j
173                 else:
174                     new_chunk_title, new_chunk_slug = book_title, book_slug
175
176                 chunk_titles.append(new_chunk_title)
177                 chunk_slugs.append(new_chunk_slug)
178
179                 if verbose:
180                     print("title: %s // %s  -->\n       %s // %s\nslug: %s / %s  -->\n      %s / %s" % (
181                         book.title, chunk.title,
182                         new_title, new_chunk_title,
183                         book.slug, chunk.slug,
184                         new_slug, new_chunk_slug))
185                     print()
186
187             if not dry_run:
188                 try:
189                     conflict = Book.objects.get(slug=new_slug)
190                 except Book.DoesNotExist:
191                     conflict = None
192                 else:
193                     if conflict == books[0]:
194                         conflict = None
195
196                 if conflict:
197                     if force:
198                         # FIXME: there still may be a conflict
199                         conflict.slug = '.' + conflict.slug
200                         conflict.save()
201                         print(self.style.NOTICE('Book with slug "%s" moved to "%s".' % (new_slug, conflict.slug)))
202                     else:
203                         print(self.style.ERROR('ERROR: Book with slug "%s" exists.' % new_slug))
204                         return
205
206                 if i:
207                     books[0].append(books[i], slugs=chunk_slugs, titles=chunk_titles)
208                 else:
209                     book.title = new_title
210                     book.slug = new_slug
211                     book.save()
212                     for j, chunk in enumerate(book):
213                         chunk.title = chunk_titles[j]
214                         chunk.slug = chunk_slugs[j]
215                         chunk.save()
216
217
218         transaction.commit()
219         transaction.leave_transaction_management()
220