lib/vstorage/__init__.py

   1 # -*- coding: utf-8 -*-
   2 import os
   3 import tempfile
   4 import datetime
   5 import mimetypes
   6 import urllib
   7
   8 # Note: we have to set these before importing Mercurial
   9 os.environ['HGENCODING'] = 'utf-8'
  10 os.environ['HGMERGE'] = "internal:merge"
  11
  12 import mercurial.hg
  13 import mercurial.ui
  14 import mercurial.revlog
  15 import mercurial.util
  16
  17
  18 def urlquote(url, safe='/'):
  19     """Quotes URL
  20
  21     >>> urlquote(u'Za\u017c\xf3\u0142\u0107 g\u0119\u015bl\u0105 ja\u017a\u0144')
  22     'Za%C5%BC%C3%B3%C5%82%C4%87_g%C4%99%C5%9Bl%C4%85_ja%C5%BA%C5%84'
  23     """
  24     return urllib.quote(url.replace(' ', '_').encode('utf-8', 'ignore'), safe)
  25
  26 def urlunquote(url):
  27     """Unqotes URL
  28
  29     # >>> urlunquote('Za%C5%BC%C3%B3%C5%82%C4%87_g%C4%99%C5%9Bl%C4%85_ja%C5%BA%C5%84')
  30     # u'Za\u017c\xf3\u0142\u0107 g\u0119\u015bl\u0105 ja\u017a\u0144'
  31     """
  32     return unicode(urllib.unquote(url), 'utf-8', 'ignore').replace('_', ' ')
  33
  34 def find_repo_path(path):
  35     """Go up the directory tree looking for a Mercurial repository (a directory containing a .hg subdirectory)."""
  36     while not os.path.isdir(os.path.join(path, ".hg")):
  37         old_path, path = path, os.path.dirname(path)
  38         if path == old_path:
  39             return None
  40     return path
  41
  42 def locked_repo(func):
  43     """A decorator for locking the repository when calling a method."""
  44
  45     def new_func(self, *args, **kwargs):
  46         """Wrap the original function in locks."""
  47
  48         wlock = self.repo.wlock()
  49         lock = self.repo.lock()
  50         try:
  51             func(self, *args, **kwargs)
  52         finally:
  53             lock.release()
  54             wlock.release()
  55
  56     return new_func
  57
  58
  59 class DocumentNotFound(Exception):
  60     pass
  61
  62
  63 class VersionedStorage(object):
  64     """
  65     Provides means of storing text pages and keeping track of their
  66     change history, using Mercurial repository as the storage method.
  67     """
  68
  69     def __init__(self, path, charset=None):
  70         """
  71         Takes the path to the directory where the pages are to be kept.
  72         If the directory doen't exist, it will be created. If it's inside
  73         a Mercurial repository, that repository will be used, otherwise
  74         a new repository will be created in it.
  75         """
  76
  77         self.charset = charset or 'utf-8'
  78         self.path = path
  79         if not os.path.exists(self.path):
  80             os.makedirs(self.path)
  81         self.repo_path = find_repo_path(self.path)
  82         try:
  83             self.ui = mercurial.ui.ui(report_untrusted=False,
  84                                       interactive=False, quiet=True)
  85         except TypeError:
  86             # Mercurial 1.3 changed the way we setup the ui object.
  87             self.ui = mercurial.ui.ui()
  88             self.ui.quiet = True
  89             self.ui._report_untrusted = False
  90             self.ui.setconfig('ui', 'interactive', False)
  91         if self.repo_path is None:
  92             self.repo_path = self.path
  93             create = True
  94         else:
  95             create = False
  96         self.repo_prefix = self.path[len(self.repo_path):].strip('/')
  97         self.repo = mercurial.hg.repository(self.ui, self.repo_path,
  98                                             create=create)
  99
 100     def reopen(self):
 101         """Close and reopen the repo, to make sure we are up to date."""
 102
 103         self.repo = mercurial.hg.repository(self.ui, self.repo_path)
 104
 105     def _file_path(self, title):
 106         return os.path.join(self.path, urlquote(title, safe=''))
 107
 108     def _title_to_file(self, title):
 109         return os.path.join(self.repo_prefix, urlquote(title, safe=''))
 110
 111     def _file_to_title(self, filename):
 112         assert filename.startswith(self.repo_prefix)
 113         name = filename[len(self.repo_prefix):].strip('/')
 114         return urlunquote(name)
 115
 116     def __contains__(self, title):
 117         return os.path.exists(self._file_path(title))
 118
 119     def __iter__(self):
 120         return self.all_pages()
 121
 122     def merge_changes(self, changectx, repo_file, text, user, parent):
 123         """Commits and merges conflicting changes in the repository."""
 124         tip_node = changectx.node()
 125         filectx = changectx[repo_file].filectx(parent)
 126         parent_node = filectx.changectx().node()
 127
 128         self.repo.dirstate.setparents(parent_node)
 129         node = self._commit([repo_file], text, user)
 130
 131         partial = lambda filename: repo_file == filename
 132
 133         # If p1 is equal to p2, there is no work to do. Even the dirstate is correct.
 134         p1, p2 = self.repo[None].parents()[0], self.repo[tip_node]
 135         if p1 == p2:
 136             return text
 137
 138         # TODO: Check if merge was successful
 139         mercurial.merge.update(self.repo, tip_node, True, False, partial)
 140
 141         self.repo.dirstate.setparents(tip_node, node)
 142         # Mercurial 1.1 and later need updating the merge state
 143         try:
 144             mercurial.merge.mergestate(self.repo).mark(repo_file, "r")
 145         except (AttributeError, KeyError):
 146             pass
 147         return u'merge of edit conflict'
 148
 149     @locked_repo
 150     def save_file(self, title, file_name, author=u'', comment=u'', parent=None):
 151         """Save an existing file as specified page."""
 152
 153         user = author.encode('utf-8') or u'anon'.encode('utf-8')
 154         text = comment.encode('utf-8') or u'comment'.encode('utf-8')
 155         repo_file = self._title_to_file(title)
 156         file_path = self._file_path(title)
 157         mercurial.util.rename(file_name, file_path)
 158         changectx = self._changectx()
 159         try:
 160             filectx_tip = changectx[repo_file]
 161             current_page_rev = filectx_tip.filerev()
 162         except mercurial.revlog.LookupError:
 163             self.repo.add([repo_file])
 164             current_page_rev = -1
 165         if parent is not None and current_page_rev != parent:
 166             msg = self.merge_changes(changectx, repo_file, text, user, parent)
 167             user = '<wiki>'
 168             text = msg.encode('utf-8')
 169         self._commit([repo_file], text, user)
 170
 171
 172     def _commit(self, files, text, user):
 173         try:
 174             return self.repo.commit(files=files, text=text, user=user,
 175                                     force=True, empty_ok=True)
 176         except TypeError:
 177             # Mercurial 1.3 doesn't accept empty_ok or files parameter
 178             match = mercurial.match.exact(self.repo_path, '', list(files))
 179             return self.repo.commit(match=match, text=text, user=user,
 180                                     force=True)
 181
 182
 183     def save_data(self, title, data, author=u'', comment=u'', parent=None):
 184         """Save data as specified page."""
 185
 186         try:
 187             temp_path = tempfile.mkdtemp(dir=self.path)
 188             file_path = os.path.join(temp_path, 'saved')
 189             f = open(file_path, "wb")
 190             f.write(data)
 191             f.close()
 192             self.save_file(title, file_path, author, comment, parent)
 193         finally:
 194             try:
 195                 os.unlink(file_path)
 196             except OSError:
 197                 pass
 198             try:
 199                 os.rmdir(temp_path)
 200             except OSError:
 201                 pass
 202
 203     def save_text(self, title, text, author=u'', comment=u'', parent=None):
 204         """Save text as specified page, encoded to charset."""
 205
 206         data = text.encode(self.charset)
 207         self.save_data(title, data, author, comment, parent)
 208
 209     def page_text(self, title):
 210         """Read unicode text of a page."""
 211
 212         data = self.open_page(title).read()
 213         text = unicode(data, self.charset, 'replace')
 214         return text
 215
 216     def page_lines(self, page):
 217         for data in page:
 218             yield unicode(data, self.charset, 'replace')
 219
 220     @locked_repo
 221     def delete_page(self, title, author=u'', comment=u''):
 222         user = author.encode('utf-8') or 'anon'
 223         text = comment.encode('utf-8') or 'deleted'
 224         repo_file = self._title_to_file(title)
 225         file_path = self._file_path(title)
 226         try:
 227             os.unlink(file_path)
 228         except OSError:
 229             pass
 230         self.repo.remove([repo_file])
 231         self._commit([repo_file], text, user)
 232
 233     def open_page(self, title):
 234         try:
 235             return open(self._file_path(title), "rb")
 236         except IOError:
 237             raise DocumentNotFound()
 238
 239     def page_file_meta(self, title):
 240         """Get page's inode number, size and last modification time."""
 241
 242         try:
 243             (st_mode, st_ino, st_dev, st_nlink, st_uid, st_gid, st_size,
 244              st_atime, st_mtime, st_ctime) = os.stat(self._file_path(title))
 245         except OSError:
 246             return 0, 0, 0
 247         return st_ino, st_size, st_mtime
 248
 249     def page_meta(self, title):
 250         """Get page's revision, date, last editor and his edit comment."""
 251
 252         filectx_tip = self._find_filectx(title)
 253         if filectx_tip is None:
 254             raise DocumentNotFound()
 255             #return -1, None, u'', u''
 256         rev = filectx_tip.filerev()
 257         filectx = filectx_tip.filectx(rev)
 258         date = datetime.datetime.fromtimestamp(filectx.date()[0])
 259         author = unicode(filectx.user(), "utf-8",
 260                          'replace').split('<')[0].strip()
 261         comment = unicode(filectx.description(), "utf-8", 'replace')
 262         return rev, date, author, comment
 263
 264     def repo_revision(self):
 265         return self._changectx().rev()
 266
 267     def page_mime(self, title):
 268         """
 269         Guess page's mime type ased on corresponding file name.
 270         Default ot text/x-wiki for files without an extension.
 271
 272         # >>> page_mime('something.txt')
 273         # 'text/plain'
 274         # >>> page_mime('SomePage')
 275         # 'text/x-wiki'
 276         # >>> page_mime(u'ąęśUnicodePage')
 277         # 'text/x-wiki'
 278         # >>> page_mime('image.png')
 279         # 'image/png'
 280         # >>> page_mime('style.css')
 281         # 'text/css'
 282         # >>> page_mime('archive.tar.gz')
 283         # 'archive/gzip'
 284         """
 285
 286         addr = self._file_path(title)
 287         mime, encoding = mimetypes.guess_type(addr, strict=False)
 288         if encoding:
 289             mime = 'archive/%s' % encoding
 290         if mime is None:
 291             mime = 'text/x-wiki'
 292         return mime
 293
 294     def _changectx(self):
 295         """Get the changectx of the tip."""
 296         try:
 297             # This is for Mercurial 1.0
 298             return self.repo.changectx()
 299         except TypeError:
 300             # Mercurial 1.3 (and possibly earlier) needs an argument
 301             return self.repo.changectx('tip')
 302
 303     def _find_filectx(self, title):
 304         """Find the last revision in which the file existed."""
 305
 306         repo_file = self._title_to_file(title)
 307         changectx = self._changectx()
 308         stack = [changectx]
 309         while repo_file not in changectx:
 310             if not stack:
 311                 return None
 312             changectx = stack.pop()
 313             for parent in changectx.parents():
 314                 if parent != changectx:
 315                     stack.append(parent)
 316         return changectx[repo_file]
 317
 318     def page_history(self, title):
 319         """Iterate over the page's history."""
 320
 321         filectx_tip = self._find_filectx(title)
 322         if filectx_tip is None:
 323             return
 324         maxrev = filectx_tip.filerev()
 325         minrev = 0
 326         for rev in range(maxrev, minrev-1, -1):
 327             filectx = filectx_tip.filectx(rev)
 328             date = datetime.datetime.fromtimestamp(filectx.date()[0])
 329             author = unicode(filectx.user(), "utf-8",
 330                              'replace').split('<')[0].strip()
 331             comment = unicode(filectx.description(), "utf-8", 'replace')
 332             yield rev, date, author, comment
 333
 334     def page_revision(self, title, rev):
 335         """Get unicode contents of specified revision of the page."""
 336
 337         filectx_tip = self._find_filectx(title)
 338         if filectx_tip is None:
 339             raise DocumentNotFound()
 340         try:
 341             data = filectx_tip.filectx(rev).data()
 342         except IndexError:
 343             raise DocumentNotFound()
 344         return data
 345
 346     def revision_text(self, title, rev):
 347         data = self.page_revision(title, rev)
 348         text = unicode(data, self.charset, 'replace')
 349         return text
 350
 351     def history(self):
 352         """Iterate over the history of entire wiki."""
 353
 354         changectx = self._changectx()
 355         maxrev = changectx.rev()
 356         minrev = 0
 357         for wiki_rev in range(maxrev, minrev-1, -1):
 358             change = self.repo.changectx(wiki_rev)
 359             date = datetime.datetime.fromtimestamp(change.date()[0])
 360             author = unicode(change.user(), "utf-8",
 361                              'replace').split('<')[0].strip()
 362             comment = unicode(change.description(), "utf-8", 'replace')
 363             for repo_file in change.files():
 364                 if repo_file.startswith(self.repo_prefix):
 365                     title = self._file_to_title(repo_file)
 366                     try:
 367                         rev = change[repo_file].filerev()
 368                     except mercurial.revlog.LookupError:
 369                         rev = -1
 370                     yield title, rev, date, author, comment
 371
 372     def all_pages(self):
 373         """Iterate over the titles of all pages in the wiki."""
 374
 375         for filename in os.listdir(self.path):
 376             if (os.path.isfile(os.path.join(self.path, filename))
 377                 and not filename.startswith('.')):
 378                 yield urlunquote(filename)
 379
 380     def changed_since(self, rev):
 381         """Return all pages that changed since specified repository revision."""
 382
 383         try:
 384             last = self.repo.lookup(int(rev))
 385         except IndexError:
 386             for page in self.all_pages():
 387                 yield page
 388                 return
 389         current = self.repo.lookup('tip')
 390         status = self.repo.status(current, last)
 391         modified, added, removed, deleted, unknown, ignored, clean = status
 392         for filename in modified+added+removed+deleted:
 393             if filename.startswith(self.repo_prefix):
 394                 yield self._file_to_title(filename)