Updating flickr html parsing, fixing #2688
[redakcja.git] / apps / catalogue / management / commands / merge_books.py
1 # -*- coding: utf-8 -*-
2
3 from optparse import make_option
4 import sys
5
6 from django.contrib.auth.models import User
7 from django.core.management.base import BaseCommand
8 from django.core.management.color import color_style
9 from django.db import transaction
10
11 from slughifi import slughifi
12 from catalogue.models import Book
13
14
15 def common_prefix(texts):
16     common = []
17
18     min_len = min(len(text) for text in texts)
19     for i in range(min_len):
20         chars = list(set([text[i] for text in texts]))
21         if len(chars) > 1:
22             break
23         common.append(chars[0])
24     return "".join(common)
25
26
27 class Command(BaseCommand):
28     option_list = BaseCommand.option_list + (
29         make_option('-s', '--slug', dest='new_slug', metavar='SLUG',
30             help='New slug of the merged book (defaults to common part of all slugs).'),
31         make_option('-t', '--title', dest='new_title', metavar='TITLE',
32             help='New title of the merged book (defaults to common part of all titles).'),
33         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
34             help='Less output'),
35         make_option('-g', '--guess', action='store_true', dest='guess', default=False,
36             help='Try to guess what merges are needed (but do not apply them).'),
37         make_option('-d', '--dry-run', action='store_true', dest='dry_run', default=False,
38             help='Dry run: do not actually change anything.'),
39         make_option('-f', '--force', action='store_true', dest='force', default=False,
40             help='On slug conflict, hide the original book to archive.'),
41     )
42     help = 'Merges multiple books into one.'
43     args = '[slug]...'
44
45
46     def print_guess(self, dry_run=True, force=False):
47         from collections import defaultdict
48         from pipes import quote
49         import re
50     
51         def read_slug(slug):
52             res = []
53             res.append((re.compile(ur'__?(przedmowa)$'), -1))
54             res.append((re.compile(ur'__?(cz(esc)?|ksiega|rozdzial)__?(?P<n>\d*)$'), None))
55             res.append((re.compile(ur'__?(rozdzialy__?)?(?P<n>\d*)-'), None))
56         
57             for r, default in res:
58                 m = r.search(slug)
59                 if m:
60                     start = m.start()
61                     try:
62                         return int(m.group('n')), slug[:start]
63                     except IndexError:
64                         return default, slug[:start]
65             return None, slug
66     
67         def file_to_title(fname):
68             """ Returns a title-like version of a filename. """
69             parts = (p.replace('_', ' ').title() for p in fname.split('__'))
70             return ' / '.join(parts)
71     
72         merges = defaultdict(list)
73         slugs = []
74         for b in Book.objects.all():
75             slugs.append(b.slug)
76             n, ns = read_slug(b.slug)
77             if n is not None:
78                 merges[ns].append((n, b))
79     
80         conflicting_slugs = []
81         for slug in sorted(merges.keys()):
82             merge_list = sorted(merges[slug])
83             if len(merge_list) < 2:
84                 continue
85     
86             merge_slugs = [b.slug for i, b in merge_list]
87             if slug in slugs and slug not in merge_slugs:
88                 conflicting_slugs.append(slug)
89     
90             title = file_to_title(slug)
91             print "./manage.py merge_books %s%s--title=%s --slug=%s \\\n    %s\n" % (
92                 '--dry-run ' if dry_run else '',
93                 '--force ' if force else '',
94                 quote(title), slug,
95                 " \\\n    ".join(merge_slugs)
96                 )
97     
98         if conflicting_slugs:
99             if force:
100                 print self.style.NOTICE('# These books will be archived:')
101             else:
102                 print self.style.ERROR('# ERROR: Conflicting slugs:')
103             for slug in conflicting_slugs:
104                 print '#', slug
105
106
107     def handle(self, *slugs, **options):
108
109         self.style = color_style()
110
111         force = options.get('force')
112         guess = options.get('guess')
113         dry_run = options.get('dry_run')
114         new_slug = options.get('new_slug').decode('utf-8')
115         new_title = options.get('new_title').decode('utf-8')
116         verbose = options.get('verbose')
117
118         if guess:
119             if slugs:
120                 print "Please specify either slugs, or --guess."
121                 return
122             else:
123                 self.print_guess(dry_run, force)
124                 return
125         if not slugs:
126             print "Please specify some book slugs"
127             return
128
129         # Start transaction management.
130         transaction.commit_unless_managed()
131         transaction.enter_transaction_management()
132         transaction.managed(True)
133
134         books = [Book.objects.get(slug=slug) for slug in slugs]
135         common_slug = common_prefix(slugs)
136         common_title = common_prefix([b.title for b in books])
137
138         if not new_title:
139             new_title = common_title
140         elif common_title.startswith(new_title):
141             common_title = new_title
142
143         if not new_slug:
144             new_slug = common_slug
145         elif common_slug.startswith(new_slug):
146             common_slug = new_slug
147
148         if slugs[0] != new_slug and Book.objects.filter(slug=new_slug).exists():
149             self.style.ERROR('Book already exists, skipping!')
150
151
152         if dry_run and verbose:
153             print self.style.NOTICE('DRY RUN: nothing will be changed.')
154             print
155
156         if verbose:
157             print "New title:", self.style.NOTICE(new_title)
158             print "New slug:", self.style.NOTICE(new_slug)
159             print
160
161         for i, book in enumerate(books):
162             chunk_titles = []
163             chunk_slugs = []
164
165             book_title = book.title[len(common_title):].replace(' / ', ' ').lstrip()
166             book_slug = book.slug[len(common_slug):].replace('__', '_').lstrip('-_')
167             for j, chunk in enumerate(book):
168                 if j:
169                     new_chunk_title = book_title + '_%d' % j
170                     new_chunk_slug = book_slug + '_%d' % j
171                 else:
172                     new_chunk_title, new_chunk_slug = book_title, book_slug
173
174                 chunk_titles.append(new_chunk_title)
175                 chunk_slugs.append(new_chunk_slug)
176
177                 if verbose:
178                     print "title: %s // %s  -->\n       %s // %s\nslug: %s / %s  -->\n      %s / %s" % (
179                         book.title, chunk.title,
180                         new_title, new_chunk_title,
181                         book.slug, chunk.slug,
182                         new_slug, new_chunk_slug)
183                     print
184
185             if not dry_run:
186                 try:
187                     conflict = Book.objects.get(slug=new_slug)
188                 except Book.DoesNotExist:
189                     conflict = None
190                 else:
191                     if conflict == books[0]:
192                         conflict = None
193
194                 if conflict:
195                     if force:
196                         # FIXME: there still may be a conflict
197                         conflict.slug = '.' + conflict.slug
198                         conflict.save()
199                         print self.style.NOTICE('Book with slug "%s" moved to "%s".' % (new_slug, conflict.slug))
200                     else:
201                         print self.style.ERROR('ERROR: Book with slug "%s" exists.' % new_slug)
202                         return
203
204                 if i:
205                     books[0].append(books[i], slugs=chunk_slugs, titles=chunk_titles)
206                 else:
207                     book.title = new_title
208                     book.slug = new_slug
209                     book.save()
210                     for j, chunk in enumerate(book):
211                         chunk.title = chunk_titles[j]
212                         chunk.slug = chunk_slugs[j]
213                         chunk.save()
214
215
216         transaction.commit()
217         transaction.leave_transaction_management()
218