X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/351730405960130bae699f5459d440a28c072169..852ff1985a45be4a885e7b9f96ce7ef0b5d50c96:/lib/vstorage/__init__.py?ds=inline diff --git a/lib/vstorage/__init__.py b/lib/vstorage/__init__.py index 1060bc25..843bb6b7 100644 --- a/lib/vstorage/__init__.py +++ b/lib/vstorage/__init__.py @@ -1,36 +1,45 @@ # -*- coding: utf-8 -*- +# +# This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# import os import tempfile import datetime import mimetypes import urllib +import functools + +import logging +logger = logging.getLogger('fnp.hazlenut.vstorage') # Note: we have to set these before importing Mercurial os.environ['HGENCODING'] = 'utf-8' os.environ['HGMERGE'] = "internal:merge" import mercurial.hg -import mercurial.ui import mercurial.revlog import mercurial.util +from vstorage.hgui import SilentUI + def urlquote(url, safe='/'): - """Quotes URL - + """Quotes URL + >>> urlquote(u'Za\u017c\xf3\u0142\u0107 g\u0119\u015bl\u0105 ja\u017a\u0144') - 'Za%C5%BC%C3%B3%C5%82%C4%87_g%C4%99%C5%9Bl%C4%85_ja%C5%BA%C5%84' + 'Za%C5%BC%C3%B3%C5%82%C4%87%20g%C4%99%C5%9Bl%C4%85%20ja%C5%BA%C5%84' """ - return urllib.quote(url.replace(' ', '_').encode('utf-8', 'ignore'), safe) + return urllib.quote(url.encode('utf-8', 'ignore'), safe) def urlunquote(url): - """Unqotes URL - + """Unqotes URL + # >>> urlunquote('Za%C5%BC%C3%B3%C5%82%C4%87_g%C4%99%C5%9Bl%C4%85_ja%C5%BA%C5%84') - # u'Za\u017c\xf3\u0142\u0107 g\u0119\u015bl\u0105 ja\u017a\u0144' + # u'Za\u017c\xf3\u0142\u0107_g\u0119\u015bl\u0105 ja\u017a\u0144' """ - return unicode(urllib.unquote(url), 'utf-8', 'ignore').replace('_', ' ') + return unicode(urllib.unquote(url), 'utf-8', 'ignore') def find_repo_path(path): @@ -42,27 +51,38 @@ def find_repo_path(path): return path -def locked_repo(func): +def with_working_copy_locked(func): """A decorator for locking the repository when calling a method.""" - def new_func(self, *args, **kwargs): + @functools.wraps(func) + def wrapped(self, *args, **kwargs): """Wrap the original function in locks.""" - wlock = self.repo.wlock() - lock = self.repo.lock() try: - func(self, *args, **kwargs) + return func(self, *args, **kwargs) finally: - lock.release() wlock.release() + return wrapped + + +def with_storage_locked(func): + """A decorator for locking the repository when calling a method.""" - return new_func + @functools.wraps(func) + def wrapped(self, *args, **kwargs): + """Wrap the original function in locks.""" + lock = self.repo.lock() + try: + return func(self, *args, **kwargs) + finally: + lock.release() + return wrapped def guess_mime(file_name): """ Guess file's mime type based on extension. - Default ot text/x-wiki for files without an extension. + Default of text/x-wiki for files without an extension. >>> guess_mime('something.txt') 'text/plain' @@ -109,42 +129,36 @@ class VersionedStorage(object): if not os.path.exists(self.path): os.makedirs(self.path) self.repo_path = find_repo_path(self.path) - try: - self.ui = mercurial.ui.ui(report_untrusted=False, - interactive=False, quiet=True) - except TypeError: - # Mercurial 1.3 changed the way we setup the ui object. - self.ui = mercurial.ui.ui() - self.ui.quiet = True - self.ui._report_untrusted = False - self.ui.setconfig('ui', 'interactive', False) + + self.ui = SilentUI() + if self.repo_path is None: self.repo_path = self.path create = True else: create = False + self.repo_prefix = self.path[len(self.repo_path):].strip('/') self.repo = mercurial.hg.repository(self.ui, self.repo_path, create=create) def reopen(self): """Close and reopen the repo, to make sure we are up to date.""" - self.repo = mercurial.hg.repository(self.ui, self.repo_path) - def _file_path(self, title): - return os.path.join(self.path, urlquote(title, safe='')) + def _file_path(self, title, type='.xml'): + return os.path.join(self.path, urlquote(title, safe='')) + type - def _title_to_file(self, title): - return os.path.join(self.repo_prefix, urlquote(title, safe='')) + def _title_to_file(self, title, type=".xml"): + return os.path.join(self.repo_prefix, urlquote(title, safe='')) + type def _file_to_title(self, filename): assert filename.startswith(self.repo_prefix) - name = filename[len(self.repo_prefix):].strip('/') + name = filename[len(self.repo_prefix):].strip('/').split('.', 1)[0] return urlunquote(name) def __contains__(self, title): - return os.path.exists(self._file_path(title)) + return self._title_to_file(title) in self.repo['tip'] def __iter__(self): return self.all_pages() @@ -157,16 +171,19 @@ class VersionedStorage(object): self.repo.dirstate.setparents(parent_node) node = self._commit([repo_file], text, user) - + partial = lambda filename: repo_file == filename - + # If p1 is equal to p2, there is no work to do. Even the dirstate is correct. p1, p2 = self.repo[None].parents()[0], self.repo[tip_node] if p1 == p2: return text - - # TODO: Check if merge was successful - mercurial.merge.update(self.repo, tip_node, True, False, partial) + + try: + mercurial.merge.update(self.repo, tip_node, True, False, partial) + msg = 'merge of edit conflict' + except mercurial.util.Abort: + msg = 'failed merge of edit conflict' self.repo.dirstate.setparents(tip_node, node) # Mercurial 1.1 and later need updating the merge state @@ -174,52 +191,48 @@ class VersionedStorage(object): mercurial.merge.mergestate(self.repo).mark(repo_file, "r") except (AttributeError, KeyError): pass - return u'merge of edit conflict' + return msg - @locked_repo - def save_file(self, title, file_name, author=u'', comment=u'', parent=None): + @with_working_copy_locked + @with_storage_locked + def save_file(self, title, file_name, **kwargs): """Save an existing file as specified page.""" - user = author.encode('utf-8') or u'anon'.encode('utf-8') - text = comment.encode('utf-8') or u'comment'.encode('utf-8') + author = kwargs.get('author', u'anonymous').encode('utf-8') + comment = kwargs.get('comment', u'Empty comment.').encode('utf-8') + parent = kwargs.get('parent', None) + repo_file = self._title_to_file(title) file_path = self._file_path(title) mercurial.util.rename(file_name, file_path) changectx = self._changectx() + try: filectx_tip = changectx[repo_file] current_page_rev = filectx_tip.filerev() except mercurial.revlog.LookupError: self.repo.add([repo_file]) current_page_rev = -1 - if parent is not None and current_page_rev != parent: - msg = self.merge_changes(changectx, repo_file, text, user, parent) - user = '' - text = msg.encode('utf-8') - self._commit([repo_file], text, user) + if parent is not None and current_page_rev != parent: + msg = self.merge_changes(changectx, repo_file, comment, author, parent) + author = '' + comment = msg.encode('utf-8') - def _commit(self, files, text, user): - try: - return self.repo.commit(files=files, text=text, user=user, - force=True, empty_ok=True) - except TypeError: - # Mercurial 1.3 doesn't accept empty_ok or files parameter - match = mercurial.match.exact(self.repo_path, '', list(files)) - return self.repo.commit(match=match, text=text, user=user, - force=True) + logger.debug("Commiting %r", repo_file) + self._commit([repo_file], comment, author) - def save_data(self, title, data, author=u'', comment=u'', parent=None): + def save_data(self, title, data, **kwargs): """Save data as specified page.""" - try: temp_path = tempfile.mkdtemp(dir=self.path) file_path = os.path.join(temp_path, 'saved') f = open(file_path, "wb") f.write(data) f.close() - self.save_file(title, file_path, author, comment, parent) + + return self.save_file(title=title, file_name=file_path, **kwargs) finally: try: os.unlink(file_path) @@ -230,24 +243,17 @@ class VersionedStorage(object): except OSError: pass - def save_text(self, title, text, author=u'', comment=u'', parent=None): + def save_text(self, **kwargs): """Save text as specified page, encoded to charset.""" + text = kwargs.pop('text') + return self.save_data(data=text.encode(self.charset), **kwargs) - data = text.encode(self.charset) - self.save_data(title, data, author, comment, parent) - - def page_text(self, title): - """Read unicode text of a page.""" - - data = self.open_page(title).read() - text = unicode(data, self.charset, 'replace') - return text + def _commit(self, files, comment, user): + match = mercurial.match.exact(self.repo_path, '', list(files)) + return self.repo.commit(match=match, text=comment, user=user, force=True) - def page_lines(self, page): - for data in page: - yield unicode(data, self.charset, 'replace') - - @locked_repo + @with_working_copy_locked + @with_storage_locked def delete_page(self, title, author=u'', comment=u''): user = author.encode('utf-8') or 'anon' text = comment.encode('utf-8') or 'deleted' @@ -260,103 +266,137 @@ class VersionedStorage(object): self.repo.remove([repo_file]) self._commit([repo_file], text, user) - def open_page(self, title): + def page_text(self, title, revision=None): + """Read unicode text of a page.""" + ctx = self._find_filectx(title, revision) + + if ctx is None: + raise DocumentNotFound(title) + + return ctx.data().decode(self.charset, 'replace'), ctx.filerev() + + def page_text_by_tag(self, title, tag): + """Read unicode text of a taged page.""" + fname = self._title_to_file(title) + tag = u"{fname}#{tag}".format(**locals()).encode('utf-8') + try: - return open(self._file_path(title), "rb") - except IOError: - raise DocumentNotFound() + ctx = self.repo[tag][fname] + return ctx.data().decode(self.charset, 'replace'), ctx.filerev() + except IndexError: + raise DocumentNotFound(fname) + @with_working_copy_locked def page_file_meta(self, title): """Get page's inode number, size and last modification time.""" - try: - (st_mode, st_ino, st_dev, st_nlink, st_uid, st_gid, st_size, - st_atime, st_mtime, st_ctime) = os.stat(self._file_path(title)) + (_st_mode, st_ino, _st_dev, _st_nlink, _st_uid, _st_gid, st_size, + _st_atime, st_mtime, _st_ctime) = os.stat(self._file_path(title)) except OSError: return 0, 0, 0 return st_ino, st_size, st_mtime - def page_meta(self, title): + @with_working_copy_locked + def page_meta(self, title, revision=None): """Get page's revision, date, last editor and his edit comment.""" + fctx = self._find_filectx(title, revision) - filectx_tip = self._find_filectx(title) - if filectx_tip is None: - raise DocumentNotFound() - #return -1, None, u'', u'' - rev = filectx_tip.filerev() - filectx = filectx_tip.filectx(rev) - date = datetime.datetime.fromtimestamp(filectx.date()[0]) - author = unicode(filectx.user(), "utf-8", - 'replace').split('<')[0].strip() - comment = unicode(filectx.description(), "utf-8", 'replace') - return rev, date, author, comment + if fctx is None: + raise DocumentNotFound(title) + + return { + "revision": fctx.filerev(), + "date": datetime.datetime.fromtimestamp(fctx.date()[0]), + "author": fctx.user().decode("utf-8", 'replace'), + "comment": fctx.description().decode("utf-8", 'replace'), + } def repo_revision(self): - return self._changectx().rev() + return self.repo['tip'].rev() + + def _changectx(self): + return self.repo['tip'] def page_mime(self, title): """ Guess page's mime type based on corresponding file name. Default ot text/x-wiki for files without an extension. """ - return guess_type(self._file_path(title)) - - def _changectx(self): - """Get the changectx of the tip.""" - try: - # This is for Mercurial 1.0 - return self.repo.changectx() - except TypeError: - # Mercurial 1.3 (and possibly earlier) needs an argument - return self.repo.changectx('tip') + return guess_mime(self._file_path(title)) - def _find_filectx(self, title): + def _find_filectx(self, title, rev=None): """Find the last revision in which the file existed.""" + tip = self._changectx() # start with tip - repo_file = self._title_to_file(title) - changectx = self._changectx() - stack = [changectx] - while repo_file not in changectx: - if not stack: - return None - changectx = stack.pop() - for parent in changectx.parents(): - if parent != changectx: - stack.append(parent) - return changectx[repo_file] + def tree_search(tip, repo_file): + logging.info("Searching for %r", repo_file) + current = tip + visited = set() + + stack = [current] + visited.add(current) + + while repo_file not in current: + if not stack: + raise LookupError + + current = stack.pop() + for parent in current.parents(): + if parent not in visited: + stack.append(parent) + visited.add(parent) + + fctx = current[repo_file] + if rev is not None: + fctx = fctx.filectx(rev) + fctx.filerev() + return fctx + + try: + return tree_search(tip, self._title_to_file(title)) + except (IndexError, LookupError): + logging.info("XML file not found, trying plain") + try: + return tree_search(tip, self._title_to_file(title, type='')) + except (IndexError, LookupError): + raise DocumentNotFound(title) def page_history(self, title): """Iterate over the page's history.""" filectx_tip = self._find_filectx(title) - if filectx_tip is None: - return + maxrev = filectx_tip.filerev() minrev = 0 - for rev in range(maxrev, minrev-1, -1): + for rev in range(maxrev, minrev - 1, -1): filectx = filectx_tip.filectx(rev) date = datetime.datetime.fromtimestamp(filectx.date()[0]) - author = unicode(filectx.user(), "utf-8", - 'replace').split('<')[0].strip() - comment = unicode(filectx.description(), "utf-8", 'replace') - yield rev, date, author, comment + author = filectx.user().decode('utf-8', 'replace') + comment = filectx.description().decode("utf-8", 'replace') + tags = [t.rsplit('#', 1)[-1] for t in filectx.changectx().tags() if '#' in t] - def page_revision(self, title, rev): - """Get unicode contents of specified revision of the page.""" + yield { + "version": rev, + "date": date, + "author": author, + "description": comment, + "tag": tags, + } - filectx_tip = self._find_filectx(title) - if filectx_tip is None: - raise DocumentNotFound() - try: - data = filectx_tip.filectx(rev).data() - except IndexError: - raise DocumentNotFound() - return data + @with_working_copy_locked + def add_page_tag(self, title, rev, tag, user, doctag=True): + ctitle = self._title_to_file(title) + + if doctag: + tag = u"{ctitle}#{tag}".format(**locals()).encode('utf-8') + + message = u"Assigned tag {tag!r} to version {rev!r} of {ctitle!r}".format(**locals()).encode('utf-8') - def revision_text(self, title, rev): - data = self.page_revision(title, rev) - text = unicode(data, self.charset, 'replace') - return text + fctx = self._find_filectx(title, rev) + self.repo.tag( + names=tag, node=fctx.node(), local=False, + user=user, message=message, date=None, + ) def history(self): """Iterate over the history of entire wiki.""" @@ -364,12 +404,11 @@ class VersionedStorage(object): changectx = self._changectx() maxrev = changectx.rev() minrev = 0 - for wiki_rev in range(maxrev, minrev-1, -1): + for wiki_rev in range(maxrev, minrev - 1, -1): change = self.repo.changectx(wiki_rev) date = datetime.datetime.fromtimestamp(change.date()[0]) - author = unicode(change.user(), "utf-8", - 'replace').split('<')[0].strip() - comment = unicode(change.description(), "utf-8", 'replace') + author = change.user().decode('utf-8', 'replace') + comment = change.description().decode("utf-8", 'replace') for repo_file in change.files(): if repo_file.startswith(self.repo_prefix): title = self._file_to_title(repo_file) @@ -379,13 +418,12 @@ class VersionedStorage(object): rev = -1 yield title, rev, date, author, comment - def all_pages(self): + def all_pages(self, type=''): + tip = self.repo['tip'] """Iterate over the titles of all pages in the wiki.""" - - for filename in os.listdir(self.path): - if (os.path.isfile(os.path.join(self.path, filename)) - and not filename.startswith('.')): - yield urlunquote(filename) + return [self._file_to_title(filename) for filename in tip + if not filename.startswith('.') + and filename.endswith(type) ] def changed_since(self, rev): """Return all pages that changed since specified repository revision.""" @@ -399,6 +437,15 @@ class VersionedStorage(object): current = self.repo.lookup('tip') status = self.repo.status(current, last) modified, added, removed, deleted, unknown, ignored, clean = status - for filename in modified+added+removed+deleted: + for filename in modified + added + removed + deleted: if filename.startswith(self.repo_prefix): yield self._file_to_title(filename) + + def revert(self, pageid, rev, **commit_args): + """ Make the given version of page the current version (reverting changes). """ + + # Find the old version + fctx = self._find_filectx(pageid, rev) + + # Restore the contents + self.save_data(pageid, fctx.data(), **commit_args)