update librarian
[redakcja.git] / apps / dvcs / models.py
index 177b0d3..6ecb97c 100644 (file)
@@ -1,92 +1,72 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of MIL/PEER, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from __future__ import unicode_literals, print_function
+
 from datetime import datetime
+import os
+import re
+from subprocess import PIPE, Popen
+from tempfile import NamedTemporaryFile
 
+from django.conf import settings
+from django.core.files.base import ContentFile
 from django.db import models
-from django.db.models.base import ModelBase
-from django.contrib.auth.models import User
+from django.utils.encoding import python_2_unicode_compatible
 from django.utils.translation import ugettext_lazy as _
-from mercurial import mdiff, simplemerge
-import pickle
-
-
-class Tag(models.Model):
-    """
-        a tag (e.g. document stage) which can be applied to a change
-    """
-
-    name = models.CharField(_('name'), max_length=64)
-    slug = models.SlugField(_('slug'), unique=True, max_length=64, 
-            null=True, blank=True)
-    ordering = models.IntegerField(_('ordering'))
-
-    _object_cache = {}
-
-    class Meta:
-        abstract = True
-        ordering = ['ordering']
-
-    def __unicode__(self):
-        return self.name
-
-    @classmethod
-    def get(cls, slug):
-        if slug in cls._object_cache:
-            return cls._object_cache[slug]
-        else:
-            obj = cls.objects.get(slug=slug)
-            cls._object_cache[slug] = obj
-            return obj
-
-    @staticmethod
-    def listener_changed(sender, instance, **kwargs):
-        sender._object_cache = {}
 
-    def next(self):
-        """
-            Returns the next tag - stage to work on.
-            Returns None for the last stage.
-        """
-        try:
-            return Tag.objects.filter(ordering__gt=self.ordering)[0]
-        except IndexError:
-            return None
+from dvcs.signals import post_commit, post_merge
+from dvcs.storage import GzipFileSystemStorage
 
-models.signals.pre_save.connect(Tag.listener_changed, sender=Tag)
+# default repository path; make a setting for it
+REPO_PATH = os.path.join(settings.MEDIA_ROOT, 'dvcs')
+repo = GzipFileSystemStorage(location=REPO_PATH)
 
 
-class Change(models.Model):
+@python_2_unicode_compatible
+class Revision(models.Model):
     """
-        Single document change related to previous change. The "parent"
-        argument points to the version against which this change has been 
-        recorded. Initial text will have a null parent.
-        
-        Data contains a pickled diff needed to reproduce the initial document.
+    A document revision. The "parent"
+    argument points to the version against which this change has been 
+    recorded. Initial text will have a null parent.
+
+    Gzipped text of the document is stored in a file.
     """
-    author = models.ForeignKey(User, null=True, blank=True)
-    author_name = models.CharField(max_length=128, null=True, blank=True)
-    author_email = models.CharField(max_length=128, null=True, blank=True)
-    patch = models.TextField(blank=True)
-    revision = models.IntegerField(db_index=True)
+    author = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, blank=True, verbose_name=_('author'))
+    author_name = models.CharField(
+        _('author name'), max_length=128, null=True, blank=True, help_text=_("Used if author is not set."))
+    author_email = models.CharField(
+        _('author email'), max_length=128, null=True, blank=True, help_text=_("Used if author is not set."))
+    # Any other author data?
+    # How do we identify an author?
 
-    parent = models.ForeignKey('self',
-                        null=True, blank=True, default=None,
-                        related_name="children")
+    parent = models.ForeignKey(
+        'self', null=True, blank=True, default=None, verbose_name=_('parent'), related_name="children")
 
-    merge_parent = models.ForeignKey('self',
-                        null=True, blank=True, default=None,
-                        related_name="merge_children")
+    merge_parent = models.ForeignKey(
+        'self', null=True, blank=True, default=None, verbose_name=_('merge parent'), related_name="merge_children")
 
-    description = models.TextField(blank=True, default='')
-    created_at = models.DateTimeField(editable=False, db_index=True, 
-                        default=datetime.now)
-    publishable = models.BooleanField(default=False)
+    description = models.TextField(_('description'), blank=True, default='')
+    created_at = models.DateTimeField(editable=False, db_index=True, default=datetime.now)
 
     class Meta:
-        abstract = True
         ordering = ('created_at',)
-        unique_together = ['tree', 'revision']
+        verbose_name = _("revision")
+        verbose_name_plural = _("revisions")
+
+    def __str__(self):
+        return "Id: %r, Parent %r, Data: %s" % (self.id, self.parent_id, self.get_text_path())
+
+    def get_text_path(self):
+        if self.pk:
+            return re.sub(r'([0-9a-f]{2})([^.])', r'\1/\2', '%x.gz' % self.pk)
+        else:
+            return None
 
-    def __unicode__(self):
-        return u"Id: %r, Tree %r, Parent %r, Patch '''\n%s'''" % (self.id, self.tree_id, self.parent_id, self.patch)
+    def save_text(self, content):
+        return repo.save(self.get_text_path(), ContentFile(content.encode('utf-8')))
 
     def author_str(self):
         if self.author:
@@ -100,240 +80,182 @@ class Change(models.Model):
                 self.author_email
                 )
 
-
-    def save(self, *args, **kwargs):
-        """
-            take the next available revision number if none yet
-        """
-        if self.revision is None:
-            self.revision = self.tree.revision() + 1
-        return super(Change, self).save(*args, **kwargs)
-
-    @staticmethod
-    def make_patch(src, dst):
-        if isinstance(src, unicode):
-            src = src.encode('utf-8')
-        if isinstance(dst, unicode):
-            dst = dst.encode('utf-8')
-        return pickle.dumps(mdiff.textdiff(src, dst))
+    @classmethod
+    def create(cls, text, parent=None, merge_parent=None, author=None, author_name=None, author_email=None,
+               description=''):
+
+        if text:
+            text = text.replace(
+                '<dc:></dc:>', '').replace(
+                '<div class="img">', '<div>')
+
+        revision = cls.objects.create(
+            parent=parent,
+            merge_parent=merge_parent,
+            author=author,
+            author_name=author_name,
+            author_email=author_email,
+            description=description
+        )
+        revision.save_text(text)
+        return revision
 
     def materialize(self):
-        # special care for merged nodes
-        if self.parent is None and self.merge_parent is not None:
-            return self.apply_to(self.merge_parent.materialize())
-
-        changes = self.tree.change_set.exclude(parent=None).filter(
-                        revision__lte=self.revision).order_by('revision')
-        text = ''
-        for change in changes:
-            text = change.apply_to(text)
-        return text.decode('utf-8')
-
-    def make_child(self, patch, description, author=None,
-            author_name=None, author_email=None, tags=None):
-        ch = self.children.create(patch=patch,
-                        tree=self.tree, author=author,
-                        author_name=author_name,
-                        author_email=author_email,
-                        description=description)
-        if tags is not None:
-            ch.tags = tags
-        return ch
-
-    def make_merge_child(self, patch, description, author=None, 
-            author_name=None, author_email=None, tags=None):
-        ch = self.merge_children.create(patch=patch,
-                        tree=self.tree, author=author,
-                        author_name=author_name,
-                        author_email=author_email,
-                        description=description,
-                        tags=tags)
-        if tags is not None:
-            ch.tags = tags
-        return ch
-
-    def apply_to(self, text):
-        return mdiff.patch(text, pickle.loads(self.patch.encode('ascii')))
-
-    def merge_with(self, other, author=None, 
-            author_name=None, author_email=None, 
-            description=u"Automatic merge."):
-        assert self.tree_id == other.tree_id  # same tree
-        if other.parent_id == self.pk:
-            # immediate child 
+        f = repo.open(self.get_text_path())
+        text = f.read().decode('utf-8')
+        f.close()
+        if text:
+            text = text.replace(
+                '<dc:></dc:>', '').replace(
+                '<div class="img">', '<div>')
+        return text
+
+    def is_descendant_of(self, other):
+        # Naive approach.
+        return (
+            (
+                self.parent is not None and (
+                    self.parent.pk == other.pk or
+                    self.parent.is_descendant_of(other)
+                )
+            ) or (
+                self.merge_parent is not None and (
+                    self.merge_parent.pk == other.pk or
+                    self.merge_parent.is_descendant_of(other)
+                )
+            )
+        )
+
+    def get_common_ancestor_with(self, other):
+        # VERY naive approach.
+        if self.pk == other.pk:
+            return self
+        if self.is_descendant_of(other):
             return other
+        if other.is_descendant_of(self):
+            return self
 
-        local = self.materialize()
-        base = other.merge_parent.materialize()
-        remote = other.apply_to(base)
-
-        merge = simplemerge.Merge3Text(base, local, remote)
-        result = ''.join(merge.merge_lines())
-        patch = self.make_patch(local, result)
-        return self.children.create(
-                    patch=patch, merge_parent=other, tree=self.tree,
-                    author=author,
-                    author_name=author_name,
-                    author_email=author_email,
-                    description=description)
-
-    def revert(self, **kwargs):
-        """ commit this version of a doc as new head """
-        self.tree.commit(text=self.materialize(), **kwargs)
-
-
-def create_tag_model(model):
-    name = model.__name__ + 'Tag'
-    attrs = {
-        '__module__': model.__module__,
-    }
-    return type(name, (Tag,), attrs)
-
-
-def create_change_model(model):
-    name = model.__name__ + 'Change'
-
-    attrs = {
-        '__module__': model.__module__,
-        'tree': models.ForeignKey(model, related_name='change_set'),
-        'tags': models.ManyToManyField(model.tag_model, related_name='change_set'),
-    }
-    return type(name, (Change,), attrs)
-
-
-
-class DocumentMeta(ModelBase):
-    "Metaclass for Document models."
-    def __new__(cls, name, bases, attrs):
-        model = super(DocumentMeta, cls).__new__(cls, name, bases, attrs)
-        if not model._meta.abstract:
-            # create a real Tag object and `stage' fk
-            model.tag_model = create_tag_model(model)
-            models.ForeignKey(model.tag_model, 
-                null=True, blank=True).contribute_to_class(model, 'stage')
-
-            # create real Change model and `head' fk
-            model.change_model = create_change_model(model)
-            models.ForeignKey(model.change_model,
-                    null=True, blank=True, default=None,
-                    help_text=_("This document's current head."),
-                    editable=False).contribute_to_class(model, 'head')
-
-        return model
-
-
-
-class Document(models.Model):
-    """
-        File in repository.        
-    """
-    __metaclass__ = DocumentMeta
-
-    creator = models.ForeignKey(User, null=True, blank=True, editable=False,
-                related_name="created_documents")
-
-    user = models.ForeignKey(User, null=True, blank=True)
-
-    class Meta:
-        abstract = True
-
-    def __unicode__(self):
-        return u"{0}, HEAD: {1}".format(self.id, self.head_id)
-
-    @models.permalink
-    def get_absolute_url(self):
-        return ('dvcs.views.document_data', (), {
-                        'document_id': self.id,
-                        'version': self.head_id,
-        })
-
-    def materialize(self, change=None):
-        if self.head is None:
-            return u''
-        if change is None:
-            change = self.head
-        elif not isinstance(change, Change):
-            change = self.change_set.get(pk=change)
-        return change.materialize()
-
-    def commit(self, **kwargs):
-        if 'parent' not in kwargs:
-            parent = self.head
+        if self.parent is not None:
+            parent_ca = self.parent.get_common_ancestor_with(other)
         else:
-            parent = kwargs['parent']
-            if not isinstance(parent, Change):
-                parent = self.change_set.objects.get(pk=kwargs['parent'])
-
-        if 'patch' not in kwargs:
-            if 'text' not in kwargs:
-                raise ValueError("You must provide either patch or target document.")
-            patch = Change.make_patch(self.materialize(change=parent), kwargs['text'])
+            parent_ca = None
+
+        if self.merge_parent is not None:
+            merge_parent_ca = self.merge_parent.get_common_ancestor_with(other)
         else:
-            if 'text' in kwargs:
-                raise ValueError("You can provide only text or patch - not both")
-            patch = kwargs['patch']
-
-        author = kwargs.get('author', None)
-        author_name = kwargs.get('author_name', None)
-        author_email = kwargs.get('author_email', None)
-        tags = kwargs.get('tags', [])
-        if tags:
-            # set stage to next tag after the commited one
-            self.stage = max(tags, key=lambda t: t.ordering).next()
-
-        old_head = self.head
-        if parent != old_head:
-            change = parent.make_merge_child(patch, author=author, 
-                    author_name=author_name,
-                    author_email=author_email,
-                    description=kwargs.get('description', ''),
-                    tags=tags)
-            # not Fast-Forward - perform a merge
-            self.head = old_head.merge_with(change, author=author,
-                    author_name=author_name,
-                    author_email=author_email)
+            return parent_ca
+
+        if parent_ca is None or parent_ca.created_at < merge_parent_ca.created_at:
+            return merge_parent_ca
+
+        return parent_ca
+
+    def get_ancestors(self):
+        revs = set()
+        if self.parent is not None:
+            revs.add(self.parent)
+            revs.update(self.parent.get_ancestors())
+        if self.merge_parent is not None:
+            revs.add(self.merge_parent)
+            revs.update(self.merge_parent.get_ancestors())
+        return revs
+
+
+@python_2_unicode_compatible
+class Ref(models.Model):
+    """A reference pointing to a specific revision."""
+
+    revision = models.ForeignKey(
+        Revision, null=True, blank=True, default=None, verbose_name=_('revision'),
+        help_text=_("The document's revision."), editable=False)
+
+    def __str__(self):
+        return "ref:{0}->rev:{1}".format(self.id, self.revision_id)
+
+    def merge_text(self, base, local, remote):
+        """Override in subclass to have different kinds of merges."""
+        files = []
+        for f in local, base, remote:
+            temp = NamedTemporaryFile(delete=False)
+            temp.write(f)
+            temp.close()
+            files.append(temp.name)
+        p = Popen(['/usr/bin/diff3', '-mE', '-L', 'old', '-L', '', '-L', 'new'] + files, stdout=PIPE)
+        result, errs = p.communicate()
+
+        for f in files:
+            os.unlink(f)
+        return result.decode('utf-8')
+
+    def merge_with(self, revision, author=None, author_name=None, author_email=None, description="Automatic merge."):
+        """Merges a given revision into the ref."""
+        if self.revision is None:
+            fast_forward = True
+            self.revision = revision
+        elif self.revision.pk == revision.pk or self.revision.is_descendant_of(revision):
+            # Already merged.
+            return
+        elif revision.is_descendant_of(self.revision):
+            # Fast forward.
+            fast_forward = True
+            self.revision = revision
         else:
-            self.head = parent.make_child(patch, author=author, 
-                    author_name=author_name,
-                    author_email=author_email,
-                    description=kwargs.get('description', ''),
-                    tags=tags)
-
+            # Need to create a merge revision.
+            fast_forward = False
+            base = self.revision.get_common_ancestor_with(revision)
+
+            local_text = self.materialize().encode('utf-8')
+            base_text = base.materialize().encode('utf-8')
+            other_text = revision.materialize().encode('utf-8')
+
+            merge_text = self.merge_text(base_text, local_text, other_text)
+
+            merge_revision = Revision.create(
+                text=merge_text,
+                parent=self.revision,
+                merge_parent=revision,
+                author=author,
+                author_name=author_name,
+                author_email=author_email,
+                description=description
+            )
+            self.revision = merge_revision
         self.save()
-        return self.head
+        post_merge.send(sender=type(self), instance=self, fast_forward=fast_forward)
 
-    def history(self):
-        return self.change_set.filter(revision__gt=-1)
+    def materialize(self):
+        return self.revision.materialize() if self.revision is not None else ''
 
-    def revision(self):
-        rev = self.change_set.aggregate(
-                models.Max('revision'))['revision__max']
-        return rev if rev is not None else -1
+    def commit(self, text, parent=False, author=None, author_name=None, author_email=None, description=''):
+        """Creates a new revision and sets it as the ref.
 
-    def at_revision(self, rev):
-        if rev is not None:
-            return self.change_set.get(revision=rev)
-        else:
-            return self.head
+        This will automatically merge the commit into the main branch,
+        if parent is not document's head.
 
-    def publishable(self):
-        changes = self.change_set.filter(publishable=True).order_by('-created_at')[:1]
-        if changes.count():
-            return changes[0]
-        else:
-            return None
+        :param unicode text: new version of the document
+        :param User author: the commiter
+        :param unicode author_name: commiter name (if ``author`` not specified)
+        :param unicode author_email: commiter e-mail (if ``author`` not specified)
+        :returns: new head
+        """
+        if parent is False:
+            # If parent revision not set explicitly, use your head.
+            parent = self.revision
+
+        # Warning: this will silently leave revs unreferenced.
+        rev = Revision.create(
+                text=text,
+                author=author,
+                author_name=author_name,
+                author_email=author_email,
+                description=description,
+                parent=parent
+            )
+        self.merge_with(rev, author=author, author_name=author_name, author_email=author_email)
+
+        post_commit.send(sender=type(self), instance=self)
 
-    @staticmethod
-    def listener_initial_commit(sender, instance, created, **kwargs):
-        # run for Document and its subclasses
-        if not isinstance(instance, Document):
-            return
-        if created:
-            instance.head = instance.change_model.objects.create(
-                    revision=-1,
-                    author=instance.creator,
-                    patch=Change.make_patch('', ''),
-                    tree=instance)
-            instance.save()
-
-models.signals.post_save.connect(Document.listener_initial_commit)
+    def history(self):
+        revs = self.revision.get_ancestors()
+        revs.add(self.revision)
+        return sorted(revs, key=lambda x: x.created_at)