From: Radek Czajka Date: Thu, 22 Sep 2011 12:15:50 +0000 (+0200) Subject: some book merging automation X-Git-Url: https://git.mdrn.pl/redakcja.git/commitdiff_plain/5e0a24ff71388cc6b0332916c862fe9cc80187e0?ds=sidebyside;hp=--cc some book merging automation --- 5e0a24ff71388cc6b0332916c862fe9cc80187e0 diff --git a/apps/catalogue/management/commands/merge_books.py b/apps/catalogue/management/commands/merge_books.py new file mode 100755 index 00000000..8ec91f76 --- /dev/null +++ b/apps/catalogue/management/commands/merge_books.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- + +from optparse import make_option +import sys + +from django.contrib.auth.models import User +from django.core.management.base import BaseCommand +from django.core.management.color import color_style +from django.db import transaction + +from slughifi import slughifi +from catalogue.models import Book + + +def common_prefix(texts): + common = [] + + min_len = min(len(text) for text in texts) + for i in range(min_len): + chars = list(set([text[i] for text in texts])) + if len(chars) > 1: + break + common.append(chars[0]) + return "".join(common) + + +def print_guess(dry_run=True): + from collections import defaultdict + from pipes import quote + import re + + def read_slug(slug): + res = [] + res.append((re.compile(ur'__?(przedmowa)$'), -1)) + res.append((re.compile(ur'__?(cz(esc)?|ksiega|rozdzial)__?(?P\d*)$'), None)) + res.append((re.compile(ur'__?(rozdzialy__?)?(?P\d*)-'), None)) + + for r, default in res: + m = r.search(slug) + if m: + start = m.start() + try: + return int(m.group('n')), slug[:start] + except IndexError: + return default, slug[:start] + return None, slug + + def file_to_title(fname): + """ Returns a title-like version of a filename. """ + parts = (p.replace('_', ' ').title() for p in fname.split('__')) + return ' / '.join(parts) + + merges = defaultdict(list) + for b in Book.objects.all(): + n, ns = read_slug(b.slug) + if n is not None: + merges[ns].append((n, b)) + + for slug in sorted(merges.keys()): + merge_list = sorted(merges[slug]) + if len(merge_list) < 2: + continue + + title = file_to_title(slug) + print "./manage.py merge_books %s--title=%s --slug=%s \\\n %s\n" % ( + '--dry-run ' if dry_run else '', + quote(title), slug, + " \\\n ".join(b.slug for i, b in merge_list) + ) + + +class Command(BaseCommand): + option_list = BaseCommand.option_list + ( + make_option('-s', '--slug', dest='new_slug', metavar='SLUG', + help='New slug of the merged book (defaults to common part of all slugs).'), + make_option('-t', '--title', dest='new_title', metavar='TITLE', + help='New title of the merged book (defaults to common part of all titles).'), + make_option('-q', '--quiet', action='store_false', dest='verbose', default=True, + help='Less output'), + make_option('-g', '--guess', action='store_true', dest='guess', default=False, + help='Try to guess what merges are needed (but do not apply them).'), + make_option('-d', '--dry-run', action='store_true', dest='dry_run', default=False, + help='Dry run: do not actually change anything.'), + ) + help = 'Merges multiple books into one.' + args = '[slug]...' + + def handle(self, *slugs, **options): + + self.style = color_style() + + guess = options.get('guess') + dry_run = options.get('dry_run') + new_slug = options.get('new_slug') + new_title = options.get('new_title') + verbose = options.get('verbose') + + if guess: + if slugs: + print "Please specify either slugs, or --guess." + return + else: + print_guess(dry_run) + return + if not slugs: + print "Please specify some book slugs" + return + + + # Start transaction management. + transaction.commit_unless_managed() + transaction.enter_transaction_management() + transaction.managed(True) + + + books = [Book.objects.get(slug=slug) for slug in slugs] + common_slug = common_prefix(slugs) + common_title = common_prefix([b.title for b in books]) + + if not new_title: + new_title = common_title + elif common_title.startswith(new_title): + common_title = new_title + + if not new_slug: + new_slug = common_slug + elif common_slug.startswith(new_slug): + common_slug = new_slug + + if dry_run and verbose: + print self.style.NOTICE('DRY RUN: nothing will be changed.') + print + + if verbose: + print "New title:", self.style.NOTICE(new_title) + print "New slug:", self.style.NOTICE(new_slug) + print + + for i, book in enumerate(books): + chunk_titles = [] + chunk_slugs = [] + + book_title = book.title[len(common_title):].replace(' / ', ' ').lstrip() + book_slug = book.slug[len(common_slug):].replace('__', '_').lstrip('-_') + for j, chunk in enumerate(book): + if j: + new_chunk_title = book_title + '_%d' % j + new_chunk_slug = book_slug + '_%d' % j + else: + new_chunk_title, new_chunk_slug = book_title, book_slug + + chunk_titles.append(new_chunk_title) + chunk_slugs.append(new_chunk_slug) + + if verbose: + print "title: %s // %s -->\n %s // %s\nslug: %s / %s -->\n %s / %s" % ( + book.title, chunk.comment, + new_title, new_chunk_title, + book.slug, chunk.slug, + new_slug, new_chunk_slug) + print + + if not dry_run: + if i: + books[0].append(books[i], slugs=chunk_slugs, titles=chunk_titles) + else: + book.title = new_title + book.slug = new_slug + book.save() + for j, chunk in enumerate(book): + chunk.comment = chunk_titles[j] + chunk.slug = chunk_slugs[j] + chunk.save() + + + transaction.commit() + transaction.leave_transaction_management() + diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index ebeb9aef..69d0a0d3 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -159,37 +159,51 @@ class Book(models.Model): i = 1 new_slug = proposed while new_slug in slugs: - new_slug = "%s-%d" % (proposed, i) + new_slug = "%s_%d" % (proposed, i) i += 1 return new_slug - def append(self, other): + def append(self, other, slugs=None, titles=None): """Add all chunks of another book to self.""" number = self[len(self) - 1].number + 1 - single = len(other) == 1 - for chunk in other: + len_other = len(other) + single = len_other == 1 + + if slugs is not None: + assert len(slugs) == len_other + if titles is not None: + assert len(titles) == len_other + if slugs is None: + slugs = [slughifi(t) for t in titles] + + for i, chunk in enumerate(other): # move chunk to new book chunk.book = self chunk.number = number - # try some title guessing - if other.title.startswith(self.title): - other_title_part = other.title[len(self.title):].lstrip(' /') - else: - other_title_part = other.title - - if single: - # special treatment for appending one-parters: - # just use the guessed title and original book slug - chunk.comment = other_title_part - if other.slug.startswith(self.slug): - chunk_slug = other.slug[len(self.slug):].lstrip('-_') + if titles is None: + # try some title guessing + if other.title.startswith(self.title): + other_title_part = other.title[len(self.title):].lstrip(' /') else: - chunk_slug = other.slug - chunk.slug = self.make_chunk_slug(chunk_slug) + other_title_part = other.title + + if single: + # special treatment for appending one-parters: + # just use the guessed title and original book slug + chunk.comment = other_title_part + if other.slug.startswith(self.slug): + chunk_slug = other.slug[len(self.slug):].lstrip('-_') + else: + chunk_slug = other.slug + chunk.slug = self.make_chunk_slug(chunk_slug) + else: + chunk.comment = "%s, %s" % (other_title_part, chunk.comment) else: - chunk.comment = "%s, %s" % (other_title_part, chunk.comment) - chunk.slug = self.make_chunk_slug(chunk.slug) + chunk.slug = slugs[i] + chunk.comment = titles[i] + + chunk.slug = self.make_chunk_slug(chunk.slug) chunk.save() number += 1 other.delete() @@ -243,18 +257,14 @@ class Chunk(dvcs_models.Document): """ Create an empty chunk after this one """ self.book.chunk_set.filter(number__gt=self.number).update( number=models.F('number')+1) - tries = 1 - new_slug = slug new_chunk = None while not new_chunk: + new_slug = self.book.make_chunk_slug(slug) try: new_chunk = self.book.chunk_set.create(number=self.number+1, creator=creator, slug=new_slug, comment=comment) except IntegrityError: - if not adjust_slug: - raise - new_slug = "%s_%d" % (slug, tries) - tries += 1 + pass return new_chunk @staticmethod