From: Radek Czajka Date: Wed, 21 Sep 2011 14:46:36 +0000 (+0200) Subject: simplify dvcs storage X-Git-Url: https://git.mdrn.pl/redakcja.git/commitdiff_plain/391b7a7b21da0ae7aae68826cfee514c8ea7eca9?hp=fdd62169ba22c4c1be2f2306b5339eadd74ffb6d simplify dvcs storage --- diff --git a/apps/catalogue/migrations/0001_initial.py b/apps/catalogue/migrations/0001_initial.py index 4ebde478..2ffc684c 100644 --- a/apps/catalogue/migrations/0001_initial.py +++ b/apps/catalogue/migrations/0001_initial.py @@ -1,174 +1,8 @@ # encoding: utf-8 import datetime -import os.path -import cPickle -import re -import urllib - -from django.conf import settings -from django.db import models -from mercurial import mdiff, hg, ui from south.db import db from south.v2 import SchemaMigration - -from slughifi import slughifi - -META_REGEX = re.compile(r'\s*', re.DOTALL | re.MULTILINE) -STAGE_TAGS_RE = re.compile(r'^#stage-finished: (.*)$', re.MULTILINE) -AUTHOR_RE = re.compile(r'\s*(.*?)\s*<(.*)>\s*') - - -def urlunquote(url): - """Unqotes URL - - # >>> urlunquote('Za%C5%BC%C3%B3%C5%82%C4%87_g%C4%99%C5%9Bl%C4%85_ja%C5%BA%C5%84') - # u'Za\u017c\xf3\u0142\u0107_g\u0119\u015bl\u0105 ja\u017a\u0144' - """ - return unicode(urllib.unquote(url), 'utf-8', 'ignore') - - -def split_name(name): - parts = name.split('__') - return parts - - -def file_to_title(fname): - """ Returns a title-like version of a filename. """ - parts = (p.replace('_', ' ').title() for p in fname.split('__')) - return ' / '.join(parts) - - -def make_patch(src, dst): - if isinstance(src, unicode): - src = src.encode('utf-8') - if isinstance(dst, unicode): - dst = dst.encode('utf-8') - return cPickle.dumps(mdiff.textdiff(src, dst)) - - -def plain_text(text): - return re.sub(META_REGEX, '', text, 1) - - -def gallery(slug, text): - result = {} - - m = re.match(META_REGEX, text) - if m: - for line in m.group(1).split('\n'): - try: - k, v = line.split(':', 1) - result[k.strip()] = v.strip() - except ValueError: - continue - - gallery = result.get('gallery', slughifi(slug)) - - if gallery.startswith('/'): - gallery = os.path.basename(gallery) - - return gallery - - -def migrate_file_from_hg(orm, fname, entry): - fname = urlunquote(fname) - print fname - if fname.endswith('.xml'): - fname = fname[:-4] - title = file_to_title(fname) - fname = slughifi(fname) - # create all the needed objects - # what if it already exists? - book = orm.Book.objects.create( - title=title, - slug=fname) - chunk = orm.Chunk.objects.create( - book=book, - number=1, - slug='1') - head = orm.ChunkChange.objects.create( - tree=chunk, - revision=-1, - patch=make_patch('', ''), - created_at=datetime.datetime.fromtimestamp(entry.filectx(0).date()[0]), - description='' - ) - chunk.head = head - try: - chunk.stage = orm.ChunkTag.objects.order_by('ordering')[0] - except IndexError: - chunk.stage = None - old_data = '' - - maxrev = entry.filerev() - gallery_link = None - - for rev in xrange(maxrev + 1): - fctx = entry.filectx(rev) - data = fctx.data() - gallery_link = gallery(fname, data) - data = plain_text(data) - - # get tags from description - description = fctx.description().decode("utf-8", 'replace') - tags = STAGE_TAGS_RE.findall(description) - tags = [orm.ChunkTag.objects.get(slug=slug.strip()) for slug in tags] - - if tags: - max_ordering = max(tags, key=lambda x: x.ordering).ordering - try: - chunk.stage = orm.ChunkTag.objects.filter(ordering__gt=max_ordering).order_by('ordering')[0] - except IndexError: - chunk.stage = None - - description = STAGE_TAGS_RE.sub('', description) - - author = author_name = author_email = None - author_desc = fctx.user().decode("utf-8", 'replace') - m = AUTHOR_RE.match(author_desc) - if m: - try: - author = orm['auth.User'].objects.get(username=m.group(1), email=m.group(2)) - except orm['auth.User'].DoesNotExist: - author_name = m.group(1) - author_email = m.group(2) - else: - author_name = author_desc - - head = orm.ChunkChange.objects.create( - tree=chunk, - revision=rev + 1, - patch=make_patch(old_data, data), - created_at=datetime.datetime.fromtimestamp(fctx.date()[0]), - description=description, - author=author, - author_name=author_name, - author_email=author_email, - parent=chunk.head - ) - head.tags = tags - chunk.head = head - old_data = data - - chunk.save() - if gallery_link: - book.gallery = gallery_link - book.save() - - -def migrate_from_hg(orm): - try: - hg_path = settings.WIKI_REPOSITORY_PATH - except: - pass - - print 'migrate from', hg_path - repo = hg.repository(ui.ui(), hg_path) - tip = repo['tip'] - for fname in tip: - if fname.startswith('.'): - continue - migrate_file_from_hg(orm, fname, tip[fname]) +from django.db import models class Migration(SchemaMigration): @@ -183,7 +17,6 @@ class Migration(SchemaMigration): ('gallery', self.gf('django.db.models.fields.CharField')(max_length=255, blank=True)), ('parent', self.gf('django.db.models.fields.related.ForeignKey')(blank=True, related_name='children', null=True, to=orm['catalogue.Book'])), ('parent_number', self.gf('django.db.models.fields.IntegerField')(db_index=True, null=True, blank=True)), - ('last_published', self.gf('django.db.models.fields.DateTimeField')(null=True, db_index=True)), )) db.send_create_signal('catalogue', ['Book']) @@ -222,7 +55,7 @@ class Migration(SchemaMigration): ('author', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['auth.User'], null=True, blank=True)), ('author_name', self.gf('django.db.models.fields.CharField')(max_length=128, null=True, blank=True)), ('author_email', self.gf('django.db.models.fields.CharField')(max_length=128, null=True, blank=True)), - ('patch', self.gf('django.db.models.fields.TextField')(blank=True)), + ('data', self.gf('django.db.models.fields.files.FileField')(max_length=100)), ('revision', self.gf('django.db.models.fields.IntegerField')(db_index=True)), ('parent', self.gf('django.db.models.fields.related.ForeignKey')(default=None, related_name='children', null=True, blank=True, to=orm['catalogue.ChunkChange'])), ('merge_parent', self.gf('django.db.models.fields.related.ForeignKey')(default=None, related_name='merge_children', null=True, blank=True, to=orm['catalogue.ChunkChange'])), @@ -244,12 +77,27 @@ class Migration(SchemaMigration): )) db.create_unique('catalogue_chunkchange_tags', ['chunkchange_id', 'chunktag_id']) + # Adding model 'BookPublishRecord' + db.create_table('catalogue_bookpublishrecord', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('book', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['catalogue.Book'])), + ('timestamp', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)), + ('user', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['auth.User'])), + )) + db.send_create_signal('catalogue', ['BookPublishRecord']) + + # Adding model 'ChunkPublishRecord' + db.create_table('catalogue_chunkpublishrecord', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('book_record', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['catalogue.BookPublishRecord'])), + ('change', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['catalogue.ChunkChange'])), + )) + db.send_create_signal('catalogue', ['ChunkPublishRecord']) + if not db.dry_run: from django.core.management import call_command call_command("loaddata", "stages.json") - migrate_from_hg(orm) - def backwards(self, orm): @@ -277,6 +125,12 @@ class Migration(SchemaMigration): # Removing M2M table for field tags on 'ChunkChange' db.delete_table('catalogue_chunkchange_tags') + # Deleting model 'BookPublishRecord' + db.delete_table('catalogue_bookpublishrecord') + + # Deleting model 'ChunkPublishRecord' + db.delete_table('catalogue_chunkpublishrecord') + models = { 'auth.group': { @@ -312,12 +166,18 @@ class Migration(SchemaMigration): 'Meta': {'ordering': "['parent_number', 'title']", 'object_name': 'Book'}, 'gallery': ('django.db.models.fields.CharField', [], {'max_length': '255', 'blank': 'True'}), 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'last_published': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'db_index': 'True'}), 'parent': ('django.db.models.fields.related.ForeignKey', [], {'blank': 'True', 'related_name': "'children'", 'null': 'True', 'to': "orm['catalogue.Book']"}), 'parent_number': ('django.db.models.fields.IntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}), 'slug': ('django.db.models.fields.SlugField', [], {'unique': 'True', 'max_length': '128', 'db_index': 'True'}), 'title': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}) }, + 'catalogue.bookpublishrecord': { + 'Meta': {'ordering': "['-timestamp']", 'object_name': 'BookPublishRecord'}, + 'book': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.Book']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'timestamp': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}) + }, 'catalogue.chunk': { 'Meta': {'ordering': "['number']", 'unique_together': "[['book', 'number'], ['book', 'slug']]", 'object_name': 'Chunk'}, 'book': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.Book']"}), @@ -336,16 +196,22 @@ class Migration(SchemaMigration): 'author_email': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'blank': 'True'}), 'author_name': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'blank': 'True'}), 'created_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}), + 'data': ('django.db.models.fields.files.FileField', [], {'max_length': '100'}), 'description': ('django.db.models.fields.TextField', [], {'default': "''", 'blank': 'True'}), 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 'merge_parent': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'merge_children'", 'null': 'True', 'blank': 'True', 'to': "orm['catalogue.ChunkChange']"}), 'parent': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'children'", 'null': 'True', 'blank': 'True', 'to': "orm['catalogue.ChunkChange']"}), - 'patch': ('django.db.models.fields.TextField', [], {'blank': 'True'}), 'publishable': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), 'revision': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 'tags': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'change_set'", 'symmetrical': 'False', 'to': "orm['catalogue.ChunkTag']"}), 'tree': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'change_set'", 'to': "orm['catalogue.Chunk']"}) }, + 'catalogue.chunkpublishrecord': { + 'Meta': {'object_name': 'ChunkPublishRecord'}, + 'book_record': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.BookPublishRecord']"}), + 'change': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.ChunkChange']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, 'catalogue.chunktag': { 'Meta': {'ordering': "['ordering']", 'object_name': 'ChunkTag'}, 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), diff --git a/apps/catalogue/migrations/0002_auto__add_bookpublishrecord__add_chunkpublishrecord__del_field_book_la.py b/apps/catalogue/migrations/0002_auto__add_bookpublishrecord__add_chunkpublishrecord__del_field_book_la.py deleted file mode 100644 index 0c8efc08..00000000 --- a/apps/catalogue/migrations/0002_auto__add_bookpublishrecord__add_chunkpublishrecord__del_field_book_la.py +++ /dev/null @@ -1,140 +0,0 @@ -# encoding: utf-8 -import datetime -from south.db import db -from south.v2 import SchemaMigration -from django.db import models - -class Migration(SchemaMigration): - - def forwards(self, orm): - - # Adding model 'BookPublishRecord' - db.create_table('catalogue_bookpublishrecord', ( - ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), - ('book', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['catalogue.Book'])), - ('timestamp', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)), - ('user', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['auth.User'])), - )) - db.send_create_signal('catalogue', ['BookPublishRecord']) - - # Adding model 'ChunkPublishRecord' - db.create_table('catalogue_chunkpublishrecord', ( - ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), - ('book_record', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['catalogue.BookPublishRecord'])), - ('change', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['catalogue.ChunkChange'])), - )) - db.send_create_signal('catalogue', ['ChunkPublishRecord']) - - # Deleting field 'Book.last_published' - db.delete_column('catalogue_book', 'last_published') - - - def backwards(self, orm): - - # Deleting model 'BookPublishRecord' - db.delete_table('catalogue_bookpublishrecord') - - # Deleting model 'ChunkPublishRecord' - db.delete_table('catalogue_chunkpublishrecord') - - # Adding field 'Book.last_published' - db.add_column('catalogue_book', 'last_published', self.gf('django.db.models.fields.DateTimeField')(null=True, db_index=True), keep_default=False) - - - models = { - 'auth.group': { - 'Meta': {'object_name': 'Group'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), - 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) - }, - 'auth.permission': { - 'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, - 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), - 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) - }, - 'auth.user': { - 'Meta': {'object_name': 'User'}, - 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), - 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), - 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), - 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), - 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), - 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), - 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), - 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), - 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), - 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), - 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) - }, - 'catalogue.book': { - 'Meta': {'ordering': "['parent_number', 'title']", 'object_name': 'Book'}, - 'gallery': ('django.db.models.fields.CharField', [], {'max_length': '255', 'blank': 'True'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'parent': ('django.db.models.fields.related.ForeignKey', [], {'blank': 'True', 'related_name': "'children'", 'null': 'True', 'to': "orm['catalogue.Book']"}), - 'parent_number': ('django.db.models.fields.IntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}), - 'slug': ('django.db.models.fields.SlugField', [], {'unique': 'True', 'max_length': '128', 'db_index': 'True'}), - 'title': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}) - }, - 'catalogue.bookpublishrecord': { - 'Meta': {'ordering': "['-timestamp']", 'object_name': 'BookPublishRecord'}, - 'book': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.Book']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'timestamp': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), - 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}) - }, - 'catalogue.chunk': { - 'Meta': {'ordering': "['number']", 'unique_together': "[['book', 'number'], ['book', 'slug']]", 'object_name': 'Chunk'}, - 'book': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.Book']"}), - 'comment': ('django.db.models.fields.CharField', [], {'max_length': '255', 'blank': 'True'}), - 'creator': ('django.db.models.fields.related.ForeignKey', [], {'blank': 'True', 'related_name': "'created_documents'", 'null': 'True', 'to': "orm['auth.User']"}), - 'head': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'to': "orm['catalogue.ChunkChange']", 'null': 'True', 'blank': 'True'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'number': ('django.db.models.fields.IntegerField', [], {}), - 'slug': ('django.db.models.fields.SlugField', [], {'max_length': '50', 'db_index': 'True'}), - 'stage': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.ChunkTag']", 'null': 'True', 'blank': 'True'}), - 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'}) - }, - 'catalogue.chunkchange': { - 'Meta': {'ordering': "('created_at',)", 'unique_together': "(['tree', 'revision'],)", 'object_name': 'ChunkChange'}, - 'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'}), - 'author_email': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'blank': 'True'}), - 'author_name': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'blank': 'True'}), - 'created_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}), - 'description': ('django.db.models.fields.TextField', [], {'default': "''", 'blank': 'True'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'merge_parent': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'merge_children'", 'null': 'True', 'blank': 'True', 'to': "orm['catalogue.ChunkChange']"}), - 'parent': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'children'", 'null': 'True', 'blank': 'True', 'to': "orm['catalogue.ChunkChange']"}), - 'patch': ('django.db.models.fields.TextField', [], {'blank': 'True'}), - 'publishable': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), - 'revision': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), - 'tags': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'change_set'", 'symmetrical': 'False', 'to': "orm['catalogue.ChunkTag']"}), - 'tree': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'change_set'", 'to': "orm['catalogue.Chunk']"}) - }, - 'catalogue.chunkpublishrecord': { - 'Meta': {'object_name': 'ChunkPublishRecord'}, - 'book_record': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.BookPublishRecord']"}), - 'change': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.ChunkChange']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) - }, - 'catalogue.chunktag': { - 'Meta': {'ordering': "['ordering']", 'object_name': 'ChunkTag'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '64'}), - 'ordering': ('django.db.models.fields.IntegerField', [], {}), - 'slug': ('django.db.models.fields.SlugField', [], {'db_index': 'True', 'max_length': '64', 'unique': 'True', 'null': 'True', 'blank': 'True'}) - }, - 'contenttypes.contenttype': { - 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, - 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) - } - } - - complete_apps = ['catalogue'] diff --git a/apps/catalogue/migrations/0002_from_hg.py b/apps/catalogue/migrations/0002_from_hg.py new file mode 100644 index 00000000..51b12963 --- /dev/null +++ b/apps/catalogue/migrations/0002_from_hg.py @@ -0,0 +1,273 @@ +# encoding: utf-8 +import datetime +from zlib import compress +import os +import os.path +import re +import urllib + +from django.db import models +from mercurial import hg, ui +from south.db import db +from south.v2 import DataMigration + +from django.conf import settings +from slughifi import slughifi + +META_REGEX = re.compile(r'\s*', re.DOTALL | re.MULTILINE) +STAGE_TAGS_RE = re.compile(r'^#stage-finished: (.*)$', re.MULTILINE) +AUTHOR_RE = re.compile(r'\s*(.*?)\s*<(.*)>\s*') + + +def urlunquote(url): + """Unqotes URL + + # >>> urlunquote('Za%C5%BC%C3%B3%C5%82%C4%87_g%C4%99%C5%9Bl%C4%85_ja%C5%BA%C5%84') + # u'Za\u017c\xf3\u0142\u0107_g\u0119\u015bl\u0105 ja\u017a\u0144' + """ + return unicode(urllib.unquote(url), 'utf-8', 'ignore') + + +def split_name(name): + parts = name.split('__') + return parts + + +def file_to_title(fname): + """ Returns a title-like version of a filename. """ + parts = (p.replace('_', ' ').title() for p in fname.split('__')) + return ' / '.join(parts) + + +def plain_text(text): + return re.sub(META_REGEX, '', text, 1) + + +def gallery(slug, text): + result = {} + + m = re.match(META_REGEX, text) + if m: + for line in m.group(1).split('\n'): + try: + k, v = line.split(':', 1) + result[k.strip()] = v.strip() + except ValueError: + continue + + gallery = result.get('gallery', slughifi(slug)) + + if gallery.startswith('/'): + gallery = os.path.basename(gallery) + + return gallery + + +def migrate_file_from_hg(orm, fname, entry): + fname = urlunquote(fname) + print fname + if fname.endswith('.xml'): + fname = fname[:-4] + title = file_to_title(fname) + fname = slughifi(fname) + + # create all the needed objects + # what if it already exists? + book = orm.Book.objects.create( + title=title, + slug=fname) + chunk = orm.Chunk.objects.create( + book=book, + number=1, + slug='1') + try: + chunk.stage = orm.ChunkTag.objects.order_by('ordering')[0] + except IndexError: + chunk.stage = None + + maxrev = entry.filerev() + gallery_link = None + + # this will fail if directory exists + os.makedirs(os.path.join(settings.DVCS_REPO_PATH, str(chunk.pk))) + + for rev in xrange(maxrev + 1): + fctx = entry.filectx(rev) + data = fctx.data() + gallery_link = gallery(fname, data) + data = plain_text(data) + + # get tags from description + description = fctx.description().decode("utf-8", 'replace') + tags = STAGE_TAGS_RE.findall(description) + tags = [orm.ChunkTag.objects.get(slug=slug.strip()) for slug in tags] + + if tags: + max_ordering = max(tags, key=lambda x: x.ordering).ordering + try: + chunk.stage = orm.ChunkTag.objects.filter(ordering__gt=max_ordering).order_by('ordering')[0] + except IndexError: + chunk.stage = None + + description = STAGE_TAGS_RE.sub('', description) + + author = author_name = author_email = None + author_desc = fctx.user().decode("utf-8", 'replace') + m = AUTHOR_RE.match(author_desc) + if m: + try: + author = orm['auth.User'].objects.get(username=m.group(1), email=m.group(2)) + except orm['auth.User'].DoesNotExist: + author_name = m.group(1) + author_email = m.group(2) + else: + author_name = author_desc + + head = orm.ChunkChange.objects.create( + tree=chunk, + revision=rev + 1, + created_at=datetime.datetime.fromtimestamp(fctx.date()[0]), + description=description, + author=author, + author_name=author_name, + author_email=author_email, + parent=chunk.head + ) + + path = "%d/%d" % (chunk.pk, head.pk) + abs_path = os.path.join(settings.DVCS_REPO_PATH, path) + f = open(abs_path, 'wb') + f.write(compress(data)) + f.close() + head.data = path + + head.tags = tags + head.save() + + chunk.head = head + + chunk.save() + if gallery_link: + book.gallery = gallery_link + book.save() + + +class Migration(DataMigration): + + def forwards(self, orm): + try: + hg_path = settings.WIKI_REPOSITORY_PATH + except: + pass + + print 'migrate from', hg_path + repo = hg.repository(ui.ui(), hg_path) + tip = repo['tip'] + for fname in tip: + if fname.startswith('.') or not fname.startswith('a'): + continue + migrate_file_from_hg(orm, fname, tip[fname]) + + + def backwards(self, orm): + "Write your backwards methods here." + pass + + + models = { + 'auth.group': { + 'Meta': {'object_name': 'Group'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), + 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) + }, + 'auth.permission': { + 'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, + 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) + }, + 'auth.user': { + 'Meta': {'object_name': 'User'}, + 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), + 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), + 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) + }, + 'catalogue.book': { + 'Meta': {'ordering': "['parent_number', 'title']", 'object_name': 'Book'}, + 'gallery': ('django.db.models.fields.CharField', [], {'max_length': '255', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'parent': ('django.db.models.fields.related.ForeignKey', [], {'blank': 'True', 'related_name': "'children'", 'null': 'True', 'to': "orm['catalogue.Book']"}), + 'parent_number': ('django.db.models.fields.IntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}), + 'slug': ('django.db.models.fields.SlugField', [], {'unique': 'True', 'max_length': '128', 'db_index': 'True'}), + 'title': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}) + }, + 'catalogue.bookpublishrecord': { + 'Meta': {'ordering': "['-timestamp']", 'object_name': 'BookPublishRecord'}, + 'book': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.Book']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'timestamp': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}) + }, + 'catalogue.chunk': { + 'Meta': {'ordering': "['number']", 'unique_together': "[['book', 'number'], ['book', 'slug']]", 'object_name': 'Chunk'}, + 'book': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.Book']"}), + 'comment': ('django.db.models.fields.CharField', [], {'max_length': '255', 'blank': 'True'}), + 'creator': ('django.db.models.fields.related.ForeignKey', [], {'blank': 'True', 'related_name': "'created_documents'", 'null': 'True', 'to': "orm['auth.User']"}), + 'head': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'to': "orm['catalogue.ChunkChange']", 'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'number': ('django.db.models.fields.IntegerField', [], {}), + 'slug': ('django.db.models.fields.SlugField', [], {'max_length': '50', 'db_index': 'True'}), + 'stage': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.ChunkTag']", 'null': 'True', 'blank': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'}) + }, + 'catalogue.chunkchange': { + 'Meta': {'ordering': "('created_at',)", 'unique_together': "(['tree', 'revision'],)", 'object_name': 'ChunkChange'}, + 'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'}), + 'author_email': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'blank': 'True'}), + 'author_name': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'blank': 'True'}), + 'created_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}), + 'data': ('django.db.models.fields.files.FileField', [], {'max_length': '100'}), + 'description': ('django.db.models.fields.TextField', [], {'default': "''", 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'merge_parent': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'merge_children'", 'null': 'True', 'blank': 'True', 'to': "orm['catalogue.ChunkChange']"}), + 'parent': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'children'", 'null': 'True', 'blank': 'True', 'to': "orm['catalogue.ChunkChange']"}), + 'publishable': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'revision': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), + 'tags': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'change_set'", 'symmetrical': 'False', 'to': "orm['catalogue.ChunkTag']"}), + 'tree': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'change_set'", 'to': "orm['catalogue.Chunk']"}) + }, + 'catalogue.chunkpublishrecord': { + 'Meta': {'object_name': 'ChunkPublishRecord'}, + 'book_record': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.BookPublishRecord']"}), + 'change': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['catalogue.ChunkChange']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, + 'catalogue.chunktag': { + 'Meta': {'ordering': "['ordering']", 'object_name': 'ChunkTag'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '64'}), + 'ordering': ('django.db.models.fields.IntegerField', [], {}), + 'slug': ('django.db.models.fields.SlugField', [], {'db_index': 'True', 'max_length': '64', 'unique': 'True', 'null': 'True', 'blank': 'True'}) + }, + 'contenttypes.contenttype': { + 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, + 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) + } + } + + complete_apps = ['catalogue'] diff --git a/apps/catalogue/templatetags/catalogue.py b/apps/catalogue/templatetags/catalogue.py index 850a2e80..7d138ffa 100644 --- a/apps/catalogue/templatetags/catalogue.py +++ b/apps/catalogue/templatetags/catalogue.py @@ -63,7 +63,6 @@ class WallItem(object): def changes_wall(user, max_len): qs = Chunk.change_model.objects.filter(revision__gt=-1).order_by('-created_at') - qs = qs.defer('patch') qs = qs.select_related('author', 'tree', 'tree__book__title') if user: qs = qs.filter(Q(author=user) | Q(tree__user=user)) diff --git a/apps/dvcs/models.py b/apps/dvcs/models.py index 177b0d30..1668dee8 100644 --- a/apps/dvcs/models.py +++ b/apps/dvcs/models.py @@ -1,11 +1,15 @@ from datetime import datetime +from django.core.files.base import ContentFile +from django.core.files.storage import FileSystemStorage from django.db import models from django.db.models.base import ModelBase from django.contrib.auth.models import User from django.utils.translation import ugettext_lazy as _ from mercurial import mdiff, simplemerge -import pickle + +from dvcs.fields import GzipFileSystemStorage +from dvcs.settings import REPO_PATH class Tag(models.Model): @@ -53,18 +57,23 @@ class Tag(models.Model): models.signals.pre_save.connect(Tag.listener_changed, sender=Tag) +repo = GzipFileSystemStorage(location=REPO_PATH) + +def data_upload_to(instance, filename): + return "%d/%d" % (instance.tree.pk, instance.pk) + class Change(models.Model): """ Single document change related to previous change. The "parent" argument points to the version against which this change has been recorded. Initial text will have a null parent. - Data contains a pickled diff needed to reproduce the initial document. + Data file contains a gzipped text of the document. """ author = models.ForeignKey(User, null=True, blank=True) author_name = models.CharField(max_length=128, null=True, blank=True) author_email = models.CharField(max_length=128, null=True, blank=True) - patch = models.TextField(blank=True) + data = models.FileField(upload_to=data_upload_to, storage=repo) revision = models.IntegerField(db_index=True) parent = models.ForeignKey('self', @@ -86,7 +95,7 @@ class Change(models.Model): unique_together = ['tree', 'revision'] def __unicode__(self): - return u"Id: %r, Tree %r, Parent %r, Patch '''\n%s'''" % (self.id, self.tree_id, self.parent_id, self.patch) + return u"Id: %r, Tree %r, Parent %r, Data: %s" % (self.id, self.tree_id, self.parent_id, self.data) def author_str(self): if self.author: @@ -106,76 +115,42 @@ class Change(models.Model): take the next available revision number if none yet """ if self.revision is None: - self.revision = self.tree.revision() + 1 + tree_rev = self.tree.revision() + if tree_rev is None: + self.revision = 0 + else: + self.revision = tree_rev + 1 return super(Change, self).save(*args, **kwargs) - @staticmethod - def make_patch(src, dst): - if isinstance(src, unicode): - src = src.encode('utf-8') - if isinstance(dst, unicode): - dst = dst.encode('utf-8') - return pickle.dumps(mdiff.textdiff(src, dst)) - def materialize(self): - # special care for merged nodes - if self.parent is None and self.merge_parent is not None: - return self.apply_to(self.merge_parent.materialize()) - - changes = self.tree.change_set.exclude(parent=None).filter( - revision__lte=self.revision).order_by('revision') - text = '' - for change in changes: - text = change.apply_to(text) - return text.decode('utf-8') - - def make_child(self, patch, description, author=None, - author_name=None, author_email=None, tags=None): - ch = self.children.create(patch=patch, - tree=self.tree, author=author, - author_name=author_name, - author_email=author_email, - description=description) - if tags is not None: - ch.tags = tags - return ch - - def make_merge_child(self, patch, description, author=None, - author_name=None, author_email=None, tags=None): - ch = self.merge_children.create(patch=patch, - tree=self.tree, author=author, - author_name=author_name, - author_email=author_email, - description=description, - tags=tags) - if tags is not None: - ch.tags = tags - return ch - - def apply_to(self, text): - return mdiff.patch(text, pickle.loads(self.patch.encode('ascii'))) + f = self.data.storage.open(self.data) + text = f.read() + f.close() + return unicode(text, 'utf-8') def merge_with(self, other, author=None, author_name=None, author_email=None, description=u"Automatic merge."): + """Performs an automatic merge after straying commits.""" assert self.tree_id == other.tree_id # same tree if other.parent_id == self.pk: - # immediate child + # immediate child - fast forward return other - local = self.materialize() - base = other.merge_parent.materialize() - remote = other.apply_to(base) + local = self.materialize().encode('utf-8') + base = other.parent.materialize().encode('utf-8') + remote = other.materialize().encode('utf-8') merge = simplemerge.Merge3Text(base, local, remote) result = ''.join(merge.merge_lines()) - patch = self.make_patch(local, result) - return self.children.create( - patch=patch, merge_parent=other, tree=self.tree, + merge_node = self.children.create( + merge_parent=other, tree=self.tree, author=author, author_name=author_name, author_email=author_email, description=description) + merge_node.data.save('', ContentFile(result)) + return merge_node def revert(self, **kwargs): """ commit this version of a doc as new head """ @@ -256,23 +231,14 @@ class Document(models.Model): change = self.change_set.get(pk=change) return change.materialize() - def commit(self, **kwargs): + def commit(self, text, **kwargs): if 'parent' not in kwargs: parent = self.head else: parent = kwargs['parent'] - if not isinstance(parent, Change): + if parent is not None and not isinstance(parent, Change): parent = self.change_set.objects.get(pk=kwargs['parent']) - if 'patch' not in kwargs: - if 'text' not in kwargs: - raise ValueError("You must provide either patch or target document.") - patch = Change.make_patch(self.materialize(change=parent), kwargs['text']) - else: - if 'text' in kwargs: - raise ValueError("You can provide only text or patch - not both") - patch = kwargs['patch'] - author = kwargs.get('author', None) author_name = kwargs.get('author_name', None) author_email = kwargs.get('author_email', None) @@ -281,24 +247,23 @@ class Document(models.Model): # set stage to next tag after the commited one self.stage = max(tags, key=lambda t: t.ordering).next() - old_head = self.head - if parent != old_head: - change = parent.make_merge_child(patch, author=author, + change = self.change_set.create(author=author, author_name=author_name, author_email=author_email, description=kwargs.get('description', ''), - tags=tags) - # not Fast-Forward - perform a merge - self.head = old_head.merge_with(change, author=author, + parent=parent) + + change.tags = tags + change.data.save('', ContentFile(text.encode('utf-8'))) + change.save() + + if self.head: + # merge new change as new head + self.head = self.head.merge_with(change, author=author, author_name=author_name, author_email=author_email) else: - self.head = parent.make_child(patch, author=author, - author_name=author_name, - author_email=author_email, - description=kwargs.get('description', ''), - tags=tags) - + self.head = change self.save() return self.head @@ -308,13 +273,11 @@ class Document(models.Model): def revision(self): rev = self.change_set.aggregate( models.Max('revision'))['revision__max'] - return rev if rev is not None else -1 + return rev def at_revision(self, rev): - if rev is not None: - return self.change_set.get(revision=rev) - else: - return self.head + """Returns a Change with given revision number.""" + return self.change_set.get(revision=rev) def publishable(self): changes = self.change_set.filter(publishable=True).order_by('-created_at')[:1] @@ -322,18 +285,3 @@ class Document(models.Model): return changes[0] else: return None - - @staticmethod - def listener_initial_commit(sender, instance, created, **kwargs): - # run for Document and its subclasses - if not isinstance(instance, Document): - return - if created: - instance.head = instance.change_model.objects.create( - revision=-1, - author=instance.creator, - patch=Change.make_patch('', ''), - tree=instance) - instance.save() - -models.signals.post_save.connect(Document.listener_initial_commit) diff --git a/apps/dvcs/settings.py b/apps/dvcs/settings.py new file mode 100755 index 00000000..d7863bfa --- /dev/null +++ b/apps/dvcs/settings.py @@ -0,0 +1,3 @@ +from django.conf import settings + +REPO_PATH = settings.DVCS_REPO_PATH diff --git a/apps/dvcs/storage.py b/apps/dvcs/storage.py new file mode 100755 index 00000000..6bb5b595 --- /dev/null +++ b/apps/dvcs/storage.py @@ -0,0 +1,18 @@ +from zlib import compress, decompress + +from django.core.files.base import ContentFile, File +from django.core.files.storage import FileSystemStorage + + +class GzipFileSystemStorage(FileSystemStorage): + def _open(self, name, mode='rb'): + """TODO: This is good for reading; what about writing?""" + f = open(self.path(name), 'rb') + text = f.read() + f.close() + return ContentFile(decompress(text)) + + def _save(self, name, content): + content = ContentFile(compress(content.read())) + + return super(GzipFileSystemStorage, self)._save(name, content) diff --git a/apps/wiki/forms.py b/apps/wiki/forms.py index a8a57c88..1d4d8549 100644 --- a/apps/wiki/forms.py +++ b/apps/wiki/forms.py @@ -30,7 +30,7 @@ class DocumentTextSaveForm(forms.Form): """ - parent_revision = forms.IntegerField(widget=forms.HiddenInput) + parent_revision = forms.IntegerField(widget=forms.HiddenInput, required=False) text = forms.CharField(widget=forms.HiddenInput) author_name = forms.CharField( diff --git a/apps/wiki/views.py b/apps/wiki/views.py index 75650e09..adffcb76 100644 --- a/apps/wiki/views.py +++ b/apps/wiki/views.py @@ -114,7 +114,10 @@ def text(request, slug, chunk=None): author = None text = form.cleaned_data['text'] parent_revision = form.cleaned_data['parent_revision'] - parent = doc.at_revision(parent_revision) + if parent_revision is not None: + parent = doc.at_revision(parent_revision) + else: + parent = None stage = form.cleaned_data['stage_completed'] tags = [stage] if stage else [] doc.commit(author=author, @@ -137,12 +140,17 @@ def text(request, slug, chunk=None): try: revision = int(revision) except (ValueError, TypeError): - revision = None + revision = doc.revision() + + if revision is not None: + text = doc.at_revision(revision).materialize() + else: + text = '' return JSONResponse({ - 'text': doc.at_revision(revision).materialize(), + 'text': text, 'meta': {}, - 'revision': revision if revision else doc.revision(), + 'revision': revision, })