some book merging automation
authorRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 22 Sep 2011 12:15:50 +0000 (14:15 +0200)
committerRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 22 Sep 2011 12:15:50 +0000 (14:15 +0200)
apps/catalogue/management/commands/merge_books.py [new file with mode: 0755]
apps/catalogue/models.py

diff --git a/apps/catalogue/management/commands/merge_books.py b/apps/catalogue/management/commands/merge_books.py
new file mode 100755 (executable)
index 0000000..8ec91f7
--- /dev/null
@@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+
+from optparse import make_option
+import sys
+
+from django.contrib.auth.models import User
+from django.core.management.base import BaseCommand
+from django.core.management.color import color_style
+from django.db import transaction
+
+from slughifi import slughifi
+from catalogue.models import Book
+
+
+def common_prefix(texts):
+    common = []
+
+    min_len = min(len(text) for text in texts)
+    for i in range(min_len):
+        chars = list(set([text[i] for text in texts]))
+        if len(chars) > 1:
+            break
+        common.append(chars[0])
+    return "".join(common)
+
+
+def print_guess(dry_run=True):
+    from collections import defaultdict
+    from pipes import quote
+    import re
+
+    def read_slug(slug):
+        res = []
+        res.append((re.compile(ur'__?(przedmowa)$'), -1))
+        res.append((re.compile(ur'__?(cz(esc)?|ksiega|rozdzial)__?(?P<n>\d*)$'), None))
+        res.append((re.compile(ur'__?(rozdzialy__?)?(?P<n>\d*)-'), None))
+    
+        for r, default in res:
+            m = r.search(slug)
+            if m:
+                start = m.start()
+                try:
+                    return int(m.group('n')), slug[:start]
+                except IndexError:
+                    return default, slug[:start]
+        return None, slug
+
+    def file_to_title(fname):
+        """ Returns a title-like version of a filename. """
+        parts = (p.replace('_', ' ').title() for p in fname.split('__'))
+        return ' / '.join(parts)
+
+    merges = defaultdict(list)
+    for b in Book.objects.all():
+        n, ns = read_slug(b.slug)
+        if n is not None:
+            merges[ns].append((n, b))
+
+    for slug in sorted(merges.keys()):
+        merge_list = sorted(merges[slug])
+        if len(merge_list) < 2:
+            continue
+
+        title = file_to_title(slug)
+        print "./manage.py merge_books %s--title=%s --slug=%s \\\n    %s\n" % (
+            '--dry-run ' if dry_run else '',
+            quote(title), slug,
+            " \\\n    ".join(b.slug for i, b in merge_list)
+            )
+
+
+class Command(BaseCommand):
+    option_list = BaseCommand.option_list + (
+        make_option('-s', '--slug', dest='new_slug', metavar='SLUG',
+            help='New slug of the merged book (defaults to common part of all slugs).'),
+        make_option('-t', '--title', dest='new_title', metavar='TITLE',
+            help='New title of the merged book (defaults to common part of all titles).'),
+        make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
+            help='Less output'),
+        make_option('-g', '--guess', action='store_true', dest='guess', default=False,
+            help='Try to guess what merges are needed (but do not apply them).'),
+        make_option('-d', '--dry-run', action='store_true', dest='dry_run', default=False,
+            help='Dry run: do not actually change anything.'),
+    )
+    help = 'Merges multiple books into one.'
+    args = '[slug]...'
+
+    def handle(self, *slugs, **options):
+
+        self.style = color_style()
+
+        guess = options.get('guess')
+        dry_run = options.get('dry_run')
+        new_slug = options.get('new_slug')
+        new_title = options.get('new_title')
+        verbose = options.get('verbose')
+
+        if guess:
+            if slugs:
+                print "Please specify either slugs, or --guess."
+                return
+            else:
+                print_guess(dry_run)
+                return
+        if not slugs:
+            print "Please specify some book slugs"
+            return
+
+
+        # Start transaction management.
+        transaction.commit_unless_managed()
+        transaction.enter_transaction_management()
+        transaction.managed(True)
+
+
+        books = [Book.objects.get(slug=slug) for slug in slugs]
+        common_slug = common_prefix(slugs)
+        common_title = common_prefix([b.title for b in books])
+
+        if not new_title:
+            new_title = common_title
+        elif common_title.startswith(new_title):
+            common_title = new_title
+
+        if not new_slug:
+            new_slug = common_slug
+        elif common_slug.startswith(new_slug):
+            common_slug = new_slug
+
+        if dry_run and verbose:
+            print self.style.NOTICE('DRY RUN: nothing will be changed.')
+            print
+
+        if verbose:
+            print "New title:", self.style.NOTICE(new_title)
+            print "New slug:", self.style.NOTICE(new_slug)
+            print
+
+        for i, book in enumerate(books):
+            chunk_titles = []
+            chunk_slugs = []
+
+            book_title = book.title[len(common_title):].replace(' / ', ' ').lstrip()
+            book_slug = book.slug[len(common_slug):].replace('__', '_').lstrip('-_')
+            for j, chunk in enumerate(book):
+                if j:
+                    new_chunk_title = book_title + '_%d' % j
+                    new_chunk_slug = book_slug + '_%d' % j
+                else:
+                    new_chunk_title, new_chunk_slug = book_title, book_slug
+
+                chunk_titles.append(new_chunk_title)
+                chunk_slugs.append(new_chunk_slug)
+
+                if verbose:
+                    print "title: %s // %s  -->\n       %s // %s\nslug: %s / %s  -->\n      %s / %s" % (
+                        book.title, chunk.comment,
+                        new_title, new_chunk_title,
+                        book.slug, chunk.slug,
+                        new_slug, new_chunk_slug)
+                    print
+
+            if not dry_run:
+                if i:
+                    books[0].append(books[i], slugs=chunk_slugs, titles=chunk_titles)
+                else:
+                    book.title = new_title
+                    book.slug = new_slug
+                    book.save()
+                    for j, chunk in enumerate(book):
+                        chunk.comment = chunk_titles[j]
+                        chunk.slug = chunk_slugs[j]
+                        chunk.save()
+
+
+        transaction.commit()
+        transaction.leave_transaction_management()
+
index ebeb9ae..69d0a0d 100644 (file)
@@ -159,37 +159,51 @@ class Book(models.Model):
         i = 1
         new_slug = proposed
         while new_slug in slugs:
-            new_slug = "%s-%d" % (proposed, i)
+            new_slug = "%s_%d" % (proposed, i)
             i += 1
         return new_slug
 
-    def append(self, other):
+    def append(self, other, slugs=None, titles=None):
         """Add all chunks of another book to self."""
         number = self[len(self) - 1].number + 1
-        single = len(other) == 1
-        for chunk in other:
+        len_other = len(other)
+        single = len_other == 1
+
+        if slugs is not None:
+            assert len(slugs) == len_other
+        if titles is not None:
+            assert len(titles) == len_other
+            if slugs is None:
+                slugs = [slughifi(t) for t in titles]
+
+        for i, chunk in enumerate(other):
             # move chunk to new book
             chunk.book = self
             chunk.number = number
 
-            # try some title guessing
-            if other.title.startswith(self.title):
-                other_title_part = other.title[len(self.title):].lstrip(' /')
-            else:
-                other_title_part = other.title
-
-            if single:
-                # special treatment for appending one-parters:
-                # just use the guessed title and original book slug
-                chunk.comment = other_title_part
-                if other.slug.startswith(self.slug):
-                    chunk_slug = other.slug[len(self.slug):].lstrip('-_')
+            if titles is None:
+                # try some title guessing
+                if other.title.startswith(self.title):
+                    other_title_part = other.title[len(self.title):].lstrip(' /')
                 else:
-                    chunk_slug = other.slug
-                chunk.slug = self.make_chunk_slug(chunk_slug)
+                    other_title_part = other.title
+
+                if single:
+                    # special treatment for appending one-parters:
+                    # just use the guessed title and original book slug
+                    chunk.comment = other_title_part
+                    if other.slug.startswith(self.slug):
+                        chunk_slug = other.slug[len(self.slug):].lstrip('-_')
+                    else:
+                        chunk_slug = other.slug
+                    chunk.slug = self.make_chunk_slug(chunk_slug)
+                else:
+                    chunk.comment = "%s, %s" % (other_title_part, chunk.comment)
             else:
-                chunk.comment = "%s, %s" % (other_title_part, chunk.comment)
-                chunk.slug = self.make_chunk_slug(chunk.slug)
+                chunk.slug = slugs[i]
+                chunk.comment = titles[i]
+
+            chunk.slug = self.make_chunk_slug(chunk.slug)
             chunk.save()
             number += 1
         other.delete()
@@ -243,18 +257,14 @@ class Chunk(dvcs_models.Document):
         """ Create an empty chunk after this one """
         self.book.chunk_set.filter(number__gt=self.number).update(
                 number=models.F('number')+1)
-        tries = 1
-        new_slug = slug
         new_chunk = None
         while not new_chunk:
+            new_slug = self.book.make_chunk_slug(slug)
             try:
                 new_chunk = self.book.chunk_set.create(number=self.number+1,
                     creator=creator, slug=new_slug, comment=comment)
             except IntegrityError:
-                if not adjust_slug:
-                    raise
-                new_slug = "%s_%d" % (slug, tries)
-                tries += 1
+                pass
         return new_chunk
 
     @staticmethod