-import os.path
# -*- encoding: utf-8 -*-
+import os.path
+
+import logging
+log = logging.getLogger('platforma.api.library')
__author__= "Łukasz Rekucki"
__date__ = "$2009-09-25 15:49:50$"
from django.core.urlresolvers import reverse
from django.utils import simplejson as json
+from django.db import IntegrityError
import librarian
import librarian.html
-from librarian import dcparser
+from librarian import dcparser, parser
from wlrepo import *
-from explorer.models import PullRequest, GalleryForDocument
+from api.models import PullRequest
+from explorer.models import GalleryForDocument
# internal imports
import api.forms as forms
#
import settings
+
+def is_prq(username):
+ return username.startswith('$prq-')
+
+def check_user(request, user):
+ log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
+ #pull request
+ if is_prq(user):
+ if not request.user.has_perm('api.view_prq'):
+ yield response.AccessDenied().django_response({
+ 'reason': 'access-denied',
+ 'message': "You don't have enough priviliges to view pull requests."
+ })
+ # other users
+ elif request.user.username != user:
+ if not request.user.has_perm('api.view_other_document'):
+ yield response.AccessDenied().django_response({
+ 'reason': 'access-denied',
+ 'message': "You don't have enough priviliges to view other people's document."
+ })
+ pass
+
#
# Document List Handlers
#
+# TODO: security check
class BasicLibraryHandler(AnonymousBaseHandler):
allowed_methods = ('GET',)
document_list = [{
'url': reverse('document_view', args=[docid]),
'name': docid } for docid in lib.documents() ]
-
return {'documents' : document_list}
-
+#
+# This handler controlls the document collection
+#
class LibraryHandler(BaseHandler):
allowed_methods = ('GET', 'POST')
anonymous = BasicLibraryHandler
documents = {}
- for docid in lib.documents():
- docid = docid.decode('utf-8')
+ for docid in lib.documents():
documents[docid] = {
'url': reverse('document_view', args=[docid]),
'name': docid,
for part, docid in parts:
# this way, we won't display broken links
if not documents.has_key(part):
- print "NOT FOUND:", part
+ log.info("NOT FOUND: %s", part)
continue
parent = documents[docid]
return {'documents': sorted(document_tree.itervalues(),
key=natural_order(lambda d: d['name']) ) }
+
@validate_form(forms.DocumentUploadForm, 'POST')
@hglibrary
def create(self, request, form, lib):
else:
data = request.FILES['ocr_file'].read().decode('utf-8')
+ if data is None:
+ return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
+
if form.cleaned_data['generate_dc']:
data = librarian.wrap_text(data, unicode(date.today()))
try:
lock = lib.lock()
try:
- print "DOCID", docid
+ log.info("DOCID %s", docid)
doc = lib.document_create(docid)
# document created, but no content yet
doc = doc.quickwrite('xml', data.encode('utf-8'),
'$AUTO$ XML data uploaded.', user=request.user.username)
except Exception,e:
+ import traceback
# rollback branch creation
lib._rollback()
- raise LibraryException("Exception occured:" + repr(e))
+ raise LibraryException(traceback.format_exc())
url = reverse('document_view', args=[doc.id])
finally:
lock.release()
except LibraryException, e:
- return response.InternalError().django_response(\
- {'exception': repr(e) })
+ import traceback
+ return response.InternalError().django_response({
+ "reason": traceback.format_exc()
+ })
except DocumentAlreadyExists:
# Document is already there
- return response.EntityConflict().django_response(\
- {"reason": "Document %s already exists." % docid})
+ return response.EntityConflict().django_response({
+ "reason": "already-exists",
+ "message": "Document already exists." % docid
+ })
#
# Document Handlers
allowed_methods = ('GET', 'PUT')
anonymous = BasicDocumentHandler
+ @validate_form(forms.DocumentRetrieveForm, 'GET')
@hglibrary
- def read(self, request, docid, lib):
+ def read(self, request, form, docid, lib):
"""Read document's meta data"""
- try:
- doc = lib.document(docid)
- udoc = doc.take(request.user.username)
- except RevisionNotFound, e:
- return response.EntityNotFound().django_response({
- 'exception': type(e), 'message': e.message})
+ log.info(u"User '%s' wants to %s(%s) as %s" % \
+ (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
- # is_shared = udoc.ancestorof(doc)
- # is_uptodate = is_shared or shared.ancestorof(document)
+ user = form.cleaned_data['user'] or request.user.username
+ rev = form.cleaned_data['revision'] or 'latest'
- result = {
- 'name': udoc.id,
- 'html_url': reverse('dochtml_view', args=[udoc.id]),
- 'text_url': reverse('doctext_view', args=[udoc.id]),
- 'dc_url': reverse('docdc_view', args=[udoc.id]),
- 'gallery_url': reverse('docgallery_view', args=[udoc.id]),
- 'merge_url': reverse('docmerge_view', args=[udoc.id]),
- 'user_revision': udoc.revision,
- 'user_timestamp': udoc.revision.timestamp,
- 'public_revision': doc.revision,
- 'public_timestamp': doc.revision.timestamp,
- }
+ for error in check_user(request, user):
+ return error
+
+ try:
+ doc = lib.document(docid, user, rev=rev)
+ except RevisionMismatch, e:
+ # the document exists, but the revision is bad
+ return response.EntityNotFound().django_response({
+ 'reason': 'revision-mismatch',
+ 'message': e.message,
+ 'docid': docid,
+ 'user': user,
+ })
+ except RevisionNotFound, e:
+ # the user doesn't have this document checked out
+ # or some other weird error occured
+ # try to do the checkout
+ if is_prq(user) or (user == request.user.username):
+ try:
+ mdoc = lib.document(docid)
+ doc = mdoc.take(user)
+
+ if is_prq(user):
+ # source revision, should probably change
+ # but there are no changes yet, so...
+ pass
+
+ except RevisionNotFound, e:
+ return response.EntityNotFound().django_response({
+ 'reason': 'document-not-found',
+ 'message': e.message,
+ 'docid': docid
+ })
+ else:
+ return response.EntityNotFound().django_response({
+ 'reason': 'document-not-found',
+ 'message': e.message,
+ 'docid': docid,
+ 'user': user,
+ })
- return result
+ return {
+ 'name': doc.id,
+ 'user': user,
+ 'html_url': reverse('dochtml_view', args=[doc.id]),
+ 'text_url': reverse('doctext_view', args=[doc.id]),
+ # 'dc_url': reverse('docdc_view', args=[doc.id]),
+ 'gallery_url': reverse('docgallery_view', args=[doc.id]),
+ 'merge_url': reverse('docmerge_view', args=[doc.id]),
+ 'revision': doc.revision,
+ 'timestamp': doc.revision.timestamp,
+ # 'public_revision': doc.revision,
+ # 'public_timestamp': doc.revision.timestamp,
+ }
- @hglibrary
- def update(self, request, docid, lib):
- """Update information about the document, like display not"""
- return
+
+# @hglibrary
+# def update(self, request, docid, lib):
+# """Update information about the document, like display not"""
+# return
#
#
#
class DocumentHTMLHandler(BaseHandler):
allowed_methods = ('GET')
+ @validate_form(forms.DocumentRetrieveForm, 'GET')
@hglibrary
- def read(self, request, docid, lib):
+ def read(self, request, form, docid, lib, stylesheet='partial'):
"""Read document as html text"""
try:
- revision = request.GET.get('revision', 'latest')
-
- if revision == 'latest':
- document = lib.document(docid)
- else:
- document = lib.document_for_rev(revision)
+ revision = form.cleaned_data['revision']
+ user = form.cleaned_data['user'] or request.user.username
+ document = lib.document_for_rev(revision)
if document.id != docid:
- return response.BadRequest().django_response({'reason': 'name-mismatch',
- 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
+ return response.BadRequest().django_response({
+ 'reason': 'name-mismatch',
+ 'message': 'Provided revision is not valid for this document'
+ })
+
+ if document.owner != user:
+ return response.BadRequest().django_response({
+ 'reason': 'user-mismatch',
+ 'message': "Provided revision doesn't belong to user %s" % user
+ })
+
+ for error in check_user(request, user):
+ return error
- return librarian.html.transform(document.data('xml'), is_file=False)
+ return librarian.html.transform(document.data('xml'), is_file=False, \
+ parse_dublincore=False, stylesheet=stylesheet,\
+ options={
+ "with-paths": 'boolean(1)',
+ })
+
except (EntryNotFound, RevisionNotFound), e:
return response.EntityNotFound().django_response({
- 'exception': type(e), 'message': e.message})
-
+ 'reason': 'not-found', 'message': e.message})
+ except librarian.ParseError, e:
+ return response.InternalError().django_response({
+ 'reason': 'xml-parse-error', 'message': e.message })
#
# Image Gallery
#
-from django.core.files.storage import FileSystemStorage
class DocumentGalleryHandler(BaseHandler):
allowed_methods = ('GET')
+
def read(self, request, docid):
"""Read meta-data about scans for gallery of this document."""
galleries = []
+ from urllib import quote
for assoc in GalleryForDocument.objects.filter(document=docid):
dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
if not os.path.isdir(dirpath):
- print u"[WARNING]: missing gallery %s" % dirpath
+ log.warn(u"[WARNING]: missing gallery %s", dirpath)
continue
gallery = {'name': assoc.name, 'pages': []}
- for file in sorted(os.listdir(dirpath), key=natural_order()):
- print file
- name, ext = os.path.splitext(os.path.basename(file))
-
- if ext.lower() not in ['.png', '.jpeg', '.jpg']:
- print "Ignoring:", name, ext
- continue
-
- url = settings.MEDIA_URL + assoc.subpath + u'/' + file.decode('utf-8');
- gallery['pages'].append(url)
+ for file in sorted(os.listdir(dirpath)):
+ if not isinstance(file, unicode):
+ try:
+ file = file.decode('utf-8')
+ except:
+ log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
+ % (file, dirpath) )
+ file = None
+
+ if file is not None:
+ name, ext = os.path.splitext(os.path.basename(file))
+
+ if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
+ log.warn(u"Ignoring: %s %s", name, ext)
+ url = None
+
+ url = settings.MEDIA_URL + assoc.subpath + u'/' + file
+ if url is None:
+ url = settings.MEDIA_URL + u'/missing.png'
+
+ gallery['pages'].append( quote(url.encode('utf-8')) )
+
+# gallery['pages'].sort()
galleries.append(gallery)
return galleries
#
#
#
+
class DocumentTextHandler(BaseHandler):
allowed_methods = ('GET', 'POST')
+ @validate_form(forms.TextRetrieveForm, 'GET')
@hglibrary
- def read(self, request, docid, lib):
- """Read document as raw text"""
- revision = request.GET.get('revision', 'latest')
+ def read(self, request, form, docid, lib):
+ """Read document as raw text"""
try:
- if revision == 'latest':
- document = lib.document(docid)
- else:
- document = lib.document_for_rev(revision)
-
+ revision = form.cleaned_data['revision']
+ part = form.cleaned_data['part']
+ user = form.cleaned_data['user'] or request.user.username
+
+ document = lib.document_for_rev(revision)
+
if document.id != docid:
- return response.BadRequest().django_response({'reason': 'name-mismatch',
- 'message': 'Provided revision is not valid for this document'})
+ return response.BadRequest().django_response({
+ 'reason': 'name-mismatch',
+ 'message': 'Provided revision is not valid for this document'
+ })
+
+ if document.owner != user:
+ return response.BadRequest().django_response({
+ 'reason': 'user-mismatch',
+ 'message': "Provided revision doesn't belong to user %s" % user
+ })
+
+ for error in check_user(request, user):
+ return error
- # TODO: some finer-grained access control
- return document.data('xml')
+ if not part:
+ return document.data('xml')
+
+ xdoc = parser.WLDocument.from_string(document.data('xml'),\
+ parse_dublincore=False)
+ ptext = xdoc.part_as_text(part)
+
+ if ptext is None:
+ return response.EntityNotFound().django_response({
+ 'reason': 'no-part-in-document'
+ })
+
+ return ptext
+ except librarian.ParseError, e:
+ return response.EntityNotFound().django_response({
+ 'reason': 'invalid-document-state',
+ 'exception': type(e),
+ 'message': e.message
+ })
except (EntryNotFound, RevisionNotFound), e:
return response.EntityNotFound().django_response({
- 'exception': type(e), 'message': e.message})
+ 'reason': 'not-found',
+ 'exception': type(e), 'message': e.message
+ })
+ @validate_form(forms.TextUpdateForm, 'POST')
@hglibrary
- def create(self, request, docid, lib):
+ def create(self, request, form, docid, lib):
try:
- data = request.POST['contents']
- revision = request.POST['revision']
-
- if request.POST.has_key('message'):
- msg = u"$USER$ " + request.POST['message']
- else:
- msg = u"$AUTO$ XML content update."
+ revision = form.cleaned_data['revision']
+ msg = form.cleaned_data['message']
+ user = form.cleaned_data['user'] or request.user.username
- current = lib.document(docid, request.user.username)
+ # do not allow changing not owned documents
+ # (for now... )
+
+
+ if user != request.user.username:
+ return response.AccessDenied().django_response({
+ 'reason': 'insufficient-priviliges',
+ })
+
+ current = lib.document(docid, user)
orig = lib.document_for_rev(revision)
if current != orig:
"reason": "out-of-date",
"provided_revision": orig.revision,
"latest_revision": current.revision })
+
+ if form.cleaned_data.has_key('contents'):
+ data = form.cleaned_data['contents']
+ else:
+ chunks = form.cleaned_data['chunks']
+ xdoc = parser.WLDocument.from_string(current.data('xml'))
+ errors = xdoc.merge_chunks(chunks)
+
+ if len(errors):
+ return response.EntityConflict().django_response({
+ "reason": "invalid-chunks",
+ "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
+ })
+
+ data = xdoc.serialize()
# try to find any Xinclude tags
includes = [m.groupdict()['link'] for m in (re.finditer(\
XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
- print "INCLUDES: ", includes
+ log.info("INCLUDES: %s", includes)
# TODO: provide useful routines to make this simpler
def xml_update_action(lib, resolve):
ndoc = None
ndoc = current.invoke_and_commit(\
- xml_update_action, lambda d: (msg, current.owner) )
+ xml_update_action, lambda d: (msg, user) )
try:
# return the new revision number
return response.SuccessAllOk().django_response({
"document": ndoc.id,
+ "user": user,
"subview": "xml",
"previous_revision": current.revision,
"revision": ndoc.revision,
#
# @requires librarian
#
-class DocumentDublinCoreHandler(BaseHandler):
- allowed_methods = ('GET', 'POST')
-
- @hglibrary
- def read(self, request, docid, lib):
- """Read document as raw text"""
- try:
- revision = request.GET.get('revision', 'latest')
-
- if revision == 'latest':
- doc = lib.document(docid)
- else:
- doc = lib.document_for_rev(revision)
-
-
- if document.id != docid:
- return response.BadRequest().django_response({'reason': 'name-mismatch',
- 'message': 'Provided revision is not valid for this document'})
-
- bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
- return bookinfo.serialize()
- except (EntryNotFound, RevisionNotFound), e:
- return response.EntityNotFound().django_response({
- 'exception': type(e), 'message': e.message})
-
- @hglibrary
- def create(self, request, docid, lib):
- try:
- bi_json = request.POST['contents']
- revision = request.POST['revision']
-
- if request.POST.has_key('message'):
- msg = u"$USER$ " + request.PUT['message']
- else:
- msg = u"$AUTO$ Dublin core update."
-
- current = lib.document(docid, request.user.username)
- orig = lib.document_for_rev(revision)
-
- if current != orig:
- return response.EntityConflict().django_response({
- "reason": "out-of-date",
- "provided": orig.revision,
- "latest": current.revision })
-
- xmldoc = parser.WLDocument.from_string(current.data('xml'))
- document.book_info = dcparser.BookInfo.from_json(bi_json)
-
- # zapisz
- ndoc = current.quickwrite('xml', \
- document.serialize().encode('utf-8'),\
- message=msg, user=request.user.username)
-
- try:
- # return the new revision number
- return {
- "document": ndoc.id,
- "subview": "dc",
- "previous_revision": current.revision,
- "revision": ndoc.revision,
- 'timestamp': ndoc.revision.timestamp,
- "url": reverse("docdc_view", args=[ndoc.id])
- }
- except Exception, e:
- if ndoc: lib._rollback()
- raise e
- except RevisionNotFound:
- return response.EntityNotFound().django_response()
+#class DocumentDublinCoreHandler(BaseHandler):
+# allowed_methods = ('GET', 'POST')
+#
+# @hglibrary
+# def read(self, request, docid, lib):
+# """Read document as raw text"""
+# try:
+# revision = request.GET.get('revision', 'latest')
+#
+# if revision == 'latest':
+# doc = lib.document(docid)
+# else:
+# doc = lib.document_for_rev(revision)
+#
+#
+# if document.id != docid:
+# return response.BadRequest().django_response({'reason': 'name-mismatch',
+# 'message': 'Provided revision is not valid for this document'})
+#
+# bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
+# return bookinfo.serialize()
+# except (EntryNotFound, RevisionNotFound), e:
+# return response.EntityNotFound().django_response({
+# 'exception': type(e), 'message': e.message})
+#
+# @hglibrary
+# def create(self, request, docid, lib):
+# try:
+# bi_json = request.POST['contents']
+# revision = request.POST['revision']
+#
+# if request.POST.has_key('message'):
+# msg = u"$USER$ " + request.PUT['message']
+# else:
+# msg = u"$AUTO$ Dublin core update."
+#
+# current = lib.document(docid, request.user.username)
+# orig = lib.document_for_rev(revision)
+#
+# if current != orig:
+# return response.EntityConflict().django_response({
+# "reason": "out-of-date",
+# "provided": orig.revision,
+# "latest": current.revision })
+#
+# xmldoc = parser.WLDocument.from_string(current.data('xml'))
+# document.book_info = dcparser.BookInfo.from_json(bi_json)
+#
+# # zapisz
+# ndoc = current.quickwrite('xml', \
+# document.serialize().encode('utf-8'),\
+# message=msg, user=request.user.username)
+#
+# try:
+# # return the new revision number
+# return {
+# "document": ndoc.id,
+# "subview": "dc",
+# "previous_revision": current.revision,
+# "revision": ndoc.revision,
+# 'timestamp': ndoc.revision.timestamp,
+# "url": reverse("docdc_view", args=[ndoc.id])
+# }
+# except Exception, e:
+# if ndoc: lib._rollback()
+# raise e
+# except RevisionNotFound:
+# return response.EntityNotFound().django_response()
class MergeHandler(BaseHandler):
allowed_methods = ('POST',)
@hglibrary
def create(self, request, form, docid, lib):
"""Create a new document revision from the information provided by user"""
+ revision = form.cleaned_data['revision']
- target_rev = form.cleaned_data['target_revision']
-
+ # fetch the main branch document
doc = lib.document(docid)
- udoc = doc.take(request.user.username)
- if target_rev == 'latest':
- target_rev = udoc.revision
+ # fetch the base document
+ user_doc = lib.document_for_rev(revision)
+ base_doc = user_doc.latest()
- if str(udoc.revision) != target_rev:
- # user think doesn't know he has an old version
- # of his own branch.
-
- # Updating is teorericly ok, but we need would
- # have to force a refresh. Sharing may be not safe,
- # 'cause it doesn't always result in update.
-
- # In other words, we can't lie about the resource's state
- # So we should just yield and 'out-of-date' conflict
- # and let the client ask again with updated info.
-
- # NOTE: this could result in a race condition, when there
- # are 2 instances of the same user editing the same document.
- # Instance "A" trying to update, and instance "B" always changing
- # the document right before "A". The anwser to this problem is
- # for the "A" to request a merge from 'latest' and then
- # check the parent revisions in response, if he actually
- # merge from where he thinks he should. If not, the client SHOULD
- # update his internal state.
+ if base_doc != user_doc:
return response.EntityConflict().django_response({
- "reason": "out-of-date",
- "provided": target_rev,
- "latest": udoc.revision })
-
- if not request.user.has_perm('explorer.book.can_share'):
- # User is not permitted to make a merge, right away
- # So we instead create a pull request in the database
- prq = PullRequest(
- comitter=request.user,
- document=docid,
- source_revision = str(udoc.revision),
- status="N",
- comment = form.cleaned_data['message'] or '$AUTO$ Document shared.'
- )
-
- prq.save()
- return response.RequestAccepted().django_response(\
- ticket_status=prq.status, \
- ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
+ "reason": "out-of-date",
+ "provided": str(user_doc.revision),
+ "latest": str(base_doc.revision)
+ })
if form.cleaned_data['type'] == 'update':
# update is always performed from the file branch
# to the user branch
- success, changed = udoc.update(request.user.username)
+ changed, clean = base_doc.update(request.user.username)
+
+ # update user document
+ if changed:
+ user_doc_new = user_doc.latest()
+ else:
+ user_doc_new = user_doc
+
+ # shared document is the same
+ doc_new = doc
if form.cleaned_data['type'] == 'share':
- success, changed = udoc.share(form.cleaned_data['message'])
+ if not base_doc.up_to_date():
+ return response.BadRequest().django_response({
+ "reason": "not-fast-forward",
+ "message": "You must first update yout branch to the latest version."
+ })
- if not success:
- return response.EntityConflict().django_response({
- 'reason': 'merge-failure',
- })
+ # check for unresolved conflicts
+ if base_doc.has_conflict_marks():
+ return response.BadRequest().django_response({
+ "reason": "unresolved-conflicts",
+ "message": "There are unresolved conflicts in your file. Fix them, and try again."
+ })
- if not changed:
- return response.SuccessNoContent().django_response()
+ if not request.user.has_perm('api.share_document'):
+ # User is not permitted to make a merge, right away
+ # So we instead create a pull request in the database
+ try:
+ prq, created = PullRequest.objects.get_or_create(
+ comitter = request.user,
+ document = docid,
+ status = "N",
+ defaults = {
+ 'source_revision': str(base_doc.revision),
+ 'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
+ }
+ )
+
+ # there can't be 2 pending request from same user
+ # for the same document
+ if not created:
+ prq.source_revision = str(base_doc.revision)
+ prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
+ prq.save()
+
+ return response.RequestAccepted().django_response(\
+ ticket_status=prq.status, \
+ ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
+ except IntegrityError:
+ return response.EntityConflict().django_response({
+ 'reason': 'request-already-exist'
+ })
+
+ changed = base_doc.share(form.cleaned_data['message'])
+
+ # update shared version if needed
+ if changed:
+ doc_new = doc.latest()
+ else:
+ doc_new = doc
- new_udoc = udoc.latest()
+ # the user wersion is the same
+ user_doc_new = base_doc
+ # The client can compare parent_revision to revision
+ # to see if he needs to update user's view
+ # Same goes for shared view
+
return response.SuccessAllOk().django_response({
- "name": udoc.id,
- "parent_user_resivion": udoc.revision,
- "parent_revision": doc.revision,
- "revision": ndoc.revision,
- 'timestamp': ndoc.revision.timestamp,
- })
+ "name": user_doc_new.id,
+ "user": user_doc_new.owner,
+
+ "revision": user_doc_new.revision,
+ 'timestamp': user_doc_new.revision.timestamp,
+
+ "parent_revision": user_doc_new.revision,
+ "parent_timestamp": user_doc_new.revision.timestamp,
+
+ "shared_revision": doc_new.revision,
+ "shared_timestamp": doc_new.revision.timestamp,
+
+ "shared_parent_revision": doc.revision,
+ "shared_parent_timestamp": doc.revision.timestamp,
+ })
\ No newline at end of file