X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/fdd62169ba22c4c1be2f2306b5339eadd74ffb6d..391b7a7b21da0ae7aae68826cfee514c8ea7eca9:/apps/catalogue/migrations/0002_from_hg.py diff --git a/apps/catalogue/migrations/0002_from_hg.py b/apps/catalogue/migrations/0002_from_hg.py new file mode 100644 index 00000000..51b12963 --- /dev/null +++ b/apps/catalogue/migrations/0002_from_hg.py @@ -0,0 +1,273 @@ +# encoding: utf-8 +import datetime +from zlib import compress +import os +import os.path +import re +import urllib + +from django.db import models +from mercurial import hg, ui +from south.db import db +from south.v2 import DataMigration + +from django.conf import settings +from slughifi import slughifi + +META_REGEX = re.compile(r'\s*', re.DOTALL | re.MULTILINE) +STAGE_TAGS_RE = re.compile(r'^#stage-finished: (.*)$', re.MULTILINE) +AUTHOR_RE = re.compile(r'\s*(.*?)\s*<(.*)>\s*') + + +def urlunquote(url): + """Unqotes URL + + # >>> urlunquote('Za%C5%BC%C3%B3%C5%82%C4%87_g%C4%99%C5%9Bl%C4%85_ja%C5%BA%C5%84') + # u'Za\u017c\xf3\u0142\u0107_g\u0119\u015bl\u0105 ja\u017a\u0144' + """ + return unicode(urllib.unquote(url), 'utf-8', 'ignore') + + +def split_name(name): + parts = name.split('__') + return parts + + +def file_to_title(fname): + """ Returns a title-like version of a filename. """ + parts = (p.replace('_', ' ').title() for p in fname.split('__')) + return ' / '.join(parts) + + +def plain_text(text): + return re.sub(META_REGEX, '', text, 1) + + +def gallery(slug, text): + result = {} + + m = re.match(META_REGEX, text) + if m: + for line in m.group(1).split('\n'): + try: + k, v = line.split(':', 1) + result[k.strip()] = v.strip() + except ValueError: + continue + + gallery = result.get('gallery', slughifi(slug)) + + if gallery.startswith('/'): + gallery = os.path.basename(gallery) + + return gallery + + +def migrate_file_from_hg(orm, fname, entry): + fname = urlunquote(fname) + print fname + if fname.endswith('.xml'): + fname = fname[:-4] + title = file_to_title(fname) + fname = slughifi(fname) + + # create all the needed objects + # what if it already exists? + book = orm.Book.objects.create( + title=title, + slug=fname) + chunk = orm.Chunk.objects.create( + book=book, + number=1, + slug='1') + try: + chunk.stage = orm.ChunkTag.objects.order_by('ordering')[0] + except IndexError: + chunk.stage = None + + maxrev = entry.filerev() + gallery_link = None + + # this will fail if directory exists + os.makedirs(os.path.join(settings.DVCS_REPO_PATH, str(chunk.pk))) + + for rev in xrange(maxrev + 1): + fctx = entry.filectx(rev) + data = fctx.data() + gallery_link = gallery(fname, data) + data = plain_text(data) + + # get tags from description + description = fctx.description().decode("utf-8", 'replace') + tags = STAGE_TAGS_RE.findall(description) + tags = [orm.ChunkTag.objects.get(slug=slug.strip()) for slug in tags] + + if tags: + max_ordering = max(tags, key=lambda x: x.ordering).ordering + try: + chunk.stage = orm.ChunkTag.objects.filter(ordering__gt=max_ordering).order_by('ordering')[0] + except IndexError: + chunk.stage = None + + description = STAGE_TAGS_RE.sub('', description) + + author = author_name = author_email = None + author_desc = fctx.user().decode("utf-8", 'replace') + m = AUTHOR_RE.match(author_desc) + if m: + try: + author = orm['auth.User'].objects.get(username=m.group(1), email=m.group(2)) + except orm['auth.User'].DoesNotExist: + author_name = m.group(1) + author_email = m.group(2) + else: + author_name = author_desc + + head = orm.ChunkChange.objects.create( + tree=chunk, + revision=rev + 1, + created_at=datetime.datetime.fromtimestamp(fctx.date()[0]), + description=description, + author=author, + author_name=author_name, + author_email=author_email, + parent=chunk.head + ) + + path = "%d/%d" % (chunk.pk, head.pk) + abs_path = os.path.join(settings.DVCS_REPO_PATH, path) + f = open(abs_path, 'wb') + f.write(compress(data)) + f.close() + head.data = path + + head.tags = tags + head.save() + + chunk.head = head + + chunk.save() + if gallery_link: + book.gallery = gallery_link + book.save() + + +class Migration(DataMigration): + + def forwards(self, orm): + try: + hg_path = settings.WIKI_REPOSITORY_PATH + except: + pass + + print 'migrate from', hg_path + repo = hg.repository(ui.ui(), hg_path) + tip = repo['tip'] + for fname in tip: + if fname.startswith('.') or not fname.startswith('a'): + continue + migrate_file_from_hg(orm, fname, tip[fname]) + + + def backwards(self, orm): + "Write your backwards methods here." + pass + + + models = { + 'auth.group': { + 'Meta': {'object_name': 'Group'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), + 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) + }, + 'auth.permission': { + 'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, + 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) + }, + 'auth.user': { + 'Meta': {'object_name': 'User'}, + 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), + 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), + 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) + }, + 'catalogue.book': { + 'Meta': {'ordering': "['parent_number', 'title']", 'object_name': 'Book'}, + 'gallery': ('django.db.models.fields.CharField', [], {'max_length': '255', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'parent': ('django.db.models.fields.related.ForeignKey', [], {'blank': 'True', 'related_name': "'children'", 'null': 'True', 'to': "orm['catalogue.Book']"}), + 'parent_number': ('django.db.models.fields.IntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}), + 'slug': ('django.db.models.fields.SlugField', [], {'unique': 'True', 'max_length': '128', 'db_index': 'True'}), + 'title': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}) + }, + 'catalogue.bookpublishrecord': { + 'Meta': {'ordering': "['-timestamp']", 'object_name': 'BookPublishRecord'}, + 'book': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.Book']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'timestamp': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}) + }, + 'catalogue.chunk': { + 'Meta': {'ordering': "['number']", 'unique_together': "[['book', 'number'], ['book', 'slug']]", 'object_name': 'Chunk'}, + 'book': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.Book']"}), + 'comment': ('django.db.models.fields.CharField', [], {'max_length': '255', 'blank': 'True'}), + 'creator': ('django.db.models.fields.related.ForeignKey', [], {'blank': 'True', 'related_name': "'created_documents'", 'null': 'True', 'to': "orm['auth.User']"}), + 'head': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'to': "orm['catalogue.ChunkChange']", 'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'number': ('django.db.models.fields.IntegerField', [], {}), + 'slug': ('django.db.models.fields.SlugField', [], {'max_length': '50', 'db_index': 'True'}), + 'stage': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.ChunkTag']", 'null': 'True', 'blank': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'}) + }, + 'catalogue.chunkchange': { + 'Meta': {'ordering': "('created_at',)", 'unique_together': "(['tree', 'revision'],)", 'object_name': 'ChunkChange'}, + 'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'}), + 'author_email': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'blank': 'True'}), + 'author_name': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'blank': 'True'}), + 'created_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}), + 'data': ('django.db.models.fields.files.FileField', [], {'max_length': '100'}), + 'description': ('django.db.models.fields.TextField', [], {'default': "''", 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'merge_parent': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'merge_children'", 'null': 'True', 'blank': 'True', 'to': "orm['catalogue.ChunkChange']"}), + 'parent': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'children'", 'null': 'True', 'blank': 'True', 'to': "orm['catalogue.ChunkChange']"}), + 'publishable': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'revision': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), + 'tags': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'change_set'", 'symmetrical': 'False', 'to': "orm['catalogue.ChunkTag']"}), + 'tree': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'change_set'", 'to': "orm['catalogue.Chunk']"}) + }, + 'catalogue.chunkpublishrecord': { + 'Meta': {'object_name': 'ChunkPublishRecord'}, + 'book_record': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.BookPublishRecord']"}), + 'change': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.ChunkChange']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, + 'catalogue.chunktag': { + 'Meta': {'ordering': "['ordering']", 'object_name': 'ChunkTag'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '64'}), + 'ordering': ('django.db.models.fields.IntegerField', [], {}), + 'slug': ('django.db.models.fields.SlugField', [], {'db_index': 'True', 'max_length': '64', 'unique': 'True', 'null': 'True', 'blank': 'True'}) + }, + 'contenttypes.contenttype': { + 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, + 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) + } + } + + complete_apps = ['catalogue']