1 # -*- encoding: utf-8 -*-
5 log = logging.getLogger('platforma.api.library')
7 __author__= "Łukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
11 from piston.handler import BaseHandler, AnonymousBaseHandler
14 from datetime import date
16 from django.core.urlresolvers import reverse
17 from django.utils import simplejson as json
18 from django.db import IntegrityError
22 from librarian import dcparser, parser
25 from api.models import PullRequest
26 from explorer.models import GalleryForDocument
29 import api.forms as forms
30 import api.response as response
31 from api.utils import validate_form, hglibrary, natural_order
32 from api.models import PartCache, PullRequest
39 return username.startswith('$prq-')
41 def prq_for_user(username):
43 return PullRequest.objects.get(id=int(username[5:]))
47 def check_user(request, user):
48 log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
51 if not request.user.has_perm('api.view_prq'):
52 yield response.AccessDenied().django_response({
53 'reason': 'access-denied',
54 'message': "You don't have enough priviliges to view pull requests."
57 elif request.user.username != user:
58 if not request.user.has_perm('api.view_other_document'):
59 yield response.AccessDenied().django_response({
60 'reason': 'access-denied',
61 'message': "You don't have enough priviliges to view other people's document."
66 # Document List Handlers
68 # TODO: security check
69 class BasicLibraryHandler(AnonymousBaseHandler):
70 allowed_methods = ('GET',)
73 def read(self, request, lib):
74 """Return the list of documents."""
76 'url': reverse('document_view', args=[docid]),
77 'name': docid } for docid in lib.documents() ]
78 return {'documents' : document_list}
81 # This handler controlls the document collection
83 class LibraryHandler(BaseHandler):
84 allowed_methods = ('GET', 'POST')
85 anonymous = BasicLibraryHandler
88 def read(self, request, lib):
89 """Return the list of documents."""
93 for docid in lib.documents():
95 'url': reverse('document_view', args=[docid]),
100 parts = PartCache.objects.defer('part_id')\
101 .values_list('part_id', 'document_id').distinct()
103 document_tree = dict(documents)
105 for part, docid in parts:
106 # this way, we won't display broken links
107 if not documents.has_key(part):
108 log.info("NOT FOUND: %s", part)
111 parent = documents[docid]
112 child = documents[part]
114 # not top-level anymore
115 document_tree.pop(part)
116 parent['parts'].append(child)
118 for doc in documents.itervalues():
119 doc['parts'].sort(key=natural_order(lambda d: d['name']))
121 return {'documents': sorted(document_tree.itervalues(),
122 key=natural_order(lambda d: d['name']) ) }
125 @validate_form(forms.DocumentUploadForm, 'POST')
127 def create(self, request, form, lib):
128 """Create a new document."""
130 if form.cleaned_data['ocr_data']:
131 data = form.cleaned_data['ocr_data']
133 data = request.FILES['ocr_file'].read().decode('utf-8')
136 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
138 if form.cleaned_data['generate_dc']:
139 data = librarian.wrap_text(data, unicode(date.today()))
141 docid = form.cleaned_data['bookname']
146 log.info("DOCID %s", docid)
147 doc = lib.document_create(docid)
148 # document created, but no content yet
150 doc = doc.quickwrite('xml', data.encode('utf-8'),
151 '$AUTO$ XML data uploaded.', user=request.user.username)
154 # rollback branch creation
156 raise LibraryException(traceback.format_exc())
158 url = reverse('document_view', args=[doc.id])
160 return response.EntityCreated().django_response(\
164 'revision': doc.revision },
168 except LibraryException, e:
170 return response.InternalError().django_response({
171 "reason": traceback.format_exc()
173 except DocumentAlreadyExists:
174 # Document is already there
175 return response.EntityConflict().django_response({
176 "reason": "already-exists",
177 "message": "Document already exists." % docid
183 class BasicDocumentHandler(AnonymousBaseHandler):
184 allowed_methods = ('GET',)
187 def read(self, request, docid, lib):
189 doc = lib.document(docid)
190 except RevisionNotFound:
195 'html_url': reverse('dochtml_view', args=[doc.id]),
196 'text_url': reverse('doctext_view', args=[doc.id]),
197 'dc_url': reverse('docdc_view', args=[doc.id]),
198 'public_revision': doc.revision,
206 class DocumentHandler(BaseHandler):
207 allowed_methods = ('GET', 'PUT')
208 anonymous = BasicDocumentHandler
210 @validate_form(forms.DocumentRetrieveForm, 'GET')
212 def read(self, request, form, docid, lib):
213 """Read document's meta data"""
214 log.info(u"User '%s' wants to %s(%s) as %s" % \
215 (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
217 user = form.cleaned_data['user'] or request.user.username
218 rev = form.cleaned_data['revision'] or 'latest'
220 for error in check_user(request, user):
224 doc = lib.document(docid, user, rev=rev)
225 except RevisionMismatch, e:
226 # the document exists, but the revision is bad
227 return response.EntityNotFound().django_response({
228 'reason': 'revision-mismatch',
229 'message': e.message,
233 except RevisionNotFound, e:
234 # the user doesn't have this document checked out
235 # or some other weird error occured
236 # try to do the checkout
238 if user == request.user.username:
239 mdoc = lib.document(docid)
240 doc = mdoc.take(user)
242 prq = prq_for_user(user)
243 # commiter's document
244 prq_doc = lib.document_for_rev(prq.source_revision)
245 doc = prq_doc.take(user)
247 return response.EntityNotFound().django_response({
248 'reason': 'document-not-found',
249 'message': e.message,
253 except RevisionNotFound, e:
254 return response.EntityNotFound().django_response({
255 'reason': 'document-not-found',
256 'message': e.message,
264 'html_url': reverse('dochtml_view', args=[doc.id]),
265 'text_url': reverse('doctext_view', args=[doc.id]),
266 # 'dc_url': reverse('docdc_view', args=[doc.id]),
267 'gallery_url': reverse('docgallery_view', args=[doc.id]),
268 'merge_url': reverse('docmerge_view', args=[doc.id]),
269 'revision': doc.revision,
270 'timestamp': doc.revision.timestamp,
271 # 'public_revision': doc.revision,
272 # 'public_timestamp': doc.revision.timestamp,
277 # def update(self, request, docid, lib):
278 # """Update information about the document, like display not"""
283 class DocumentHTMLHandler(BaseHandler):
284 allowed_methods = ('GET')
286 @validate_form(forms.DocumentRetrieveForm, 'GET')
288 def read(self, request, form, docid, lib, stylesheet='partial'):
289 """Read document as html text"""
291 revision = form.cleaned_data['revision']
292 user = form.cleaned_data['user'] or request.user.username
293 document = lib.document_for_rev(revision)
295 if document.id != docid:
296 return response.BadRequest().django_response({
297 'reason': 'name-mismatch',
298 'message': 'Provided revision is not valid for this document'
301 if document.owner != user:
302 return response.BadRequest().django_response({
303 'reason': 'user-mismatch',
304 'message': "Provided revision doesn't belong to user %s" % user
307 for error in check_user(request, user):
310 return librarian.html.transform(document.data('xml'), is_file=False, \
311 parse_dublincore=False, stylesheet=stylesheet,\
313 "with-paths": 'boolean(1)',
316 except (EntryNotFound, RevisionNotFound), e:
317 return response.EntityNotFound().django_response({
318 'reason': 'not-found', 'message': e.message})
319 except librarian.ParseError, e:
320 return response.InternalError().django_response({
321 'reason': 'xml-parse-error', 'message': e.message })
327 class DocumentGalleryHandler(BaseHandler):
328 allowed_methods = ('GET')
331 def read(self, request, docid):
332 """Read meta-data about scans for gallery of this document."""
334 from urllib import quote
336 for assoc in GalleryForDocument.objects.filter(document=docid):
337 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
339 if not os.path.isdir(dirpath):
340 log.warn(u"[WARNING]: missing gallery %s", dirpath)
343 gallery = {'name': assoc.name, 'pages': []}
345 for file in os.listdir(dirpath):
346 if not isinstance(file, unicode):
348 file = file.decode('utf-8')
350 log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
355 name, ext = os.path.splitext(os.path.basename(file))
357 if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
358 log.warn(u"Ignoring: %s %s", name, ext)
361 url = settings.MEDIA_URL + assoc.subpath + u'/' + file
364 url = settings.MEDIA_URL + u'/missing.png'
366 gallery['pages'].append( quote(url.encode('utf-8')) )
368 # gallery['pages'].sort()
369 galleries.append(gallery)
377 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
382 class DocumentTextHandler(BaseHandler):
383 allowed_methods = ('GET', 'POST')
385 @validate_form(forms.TextRetrieveForm, 'GET')
387 def read(self, request, form, docid, lib):
388 """Read document as raw text"""
390 revision = form.cleaned_data['revision']
391 part = form.cleaned_data['part']
392 user = form.cleaned_data['user'] or request.user.username
394 document = lib.document_for_rev(revision)
396 if document.id != docid:
397 return response.BadRequest().django_response({
398 'reason': 'name-mismatch',
399 'message': 'Provided revision is not valid for this document'
402 if document.owner != user:
403 return response.BadRequest().django_response({
404 'reason': 'user-mismatch',
405 'message': "Provided revision doesn't belong to user %s" % user
408 for error in check_user(request, user):
412 return document.data('xml')
414 xdoc = parser.WLDocument.from_string(document.data('xml'),\
415 parse_dublincore=False)
416 ptext = xdoc.part_as_text(part)
419 return response.EntityNotFound().django_response({
420 'reason': 'no-part-in-document'
424 except librarian.ParseError, e:
425 return response.EntityNotFound().django_response({
426 'reason': 'invalid-document-state',
427 'exception': type(e),
430 except (EntryNotFound, RevisionNotFound), e:
431 return response.EntityNotFound().django_response({
432 'reason': 'not-found',
433 'exception': type(e), 'message': e.message
436 @validate_form(forms.TextUpdateForm, 'POST')
438 def create(self, request, form, docid, lib):
440 revision = form.cleaned_data['revision']
441 msg = form.cleaned_data['message']
442 user = form.cleaned_data['user'] or request.user.username
444 # do not allow changing not owned documents
448 if user != request.user.username:
449 return response.AccessDenied().django_response({
450 'reason': 'insufficient-priviliges',
453 current = lib.document(docid, user)
454 orig = lib.document_for_rev(revision)
457 return response.EntityConflict().django_response({
458 "reason": "out-of-date",
459 "provided_revision": orig.revision,
460 "latest_revision": current.revision })
462 if form.cleaned_data.has_key('contents'):
463 data = form.cleaned_data['contents']
465 chunks = form.cleaned_data['chunks']
466 xdoc = parser.WLDocument.from_string(current.data('xml'))
467 errors = xdoc.merge_chunks(chunks)
470 return response.EntityConflict().django_response({
471 "reason": "invalid-chunks",
472 "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
475 data = xdoc.serialize()
477 # try to find any Xinclude tags
478 includes = [m.groupdict()['link'] for m in (re.finditer(\
479 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
481 log.info("INCLUDES: %s", includes)
483 # TODO: provide useful routines to make this simpler
484 def xml_update_action(lib, resolve):
486 f = lib._fileopen(resolve('parts'), 'r')
487 stored_includes = json.loads(f.read())
492 if stored_includes != includes:
493 f = lib._fileopen(resolve('parts'), 'w+')
494 f.write(json.dumps(includes))
497 lib._fileadd(resolve('parts'))
499 # update the parts cache
500 PartCache.update_cache(docid, current.owner,\
501 stored_includes, includes)
503 # now that the parts are ok, write xml
504 f = lib._fileopen(resolve('xml'), 'w+')
505 f.write(data.encode('utf-8'))
509 ndoc = current.invoke_and_commit(\
510 xml_update_action, lambda d: (msg, user) )
513 # return the new revision number
514 return response.SuccessAllOk().django_response({
518 "previous_revision": current.revision,
519 "revision": ndoc.revision,
520 'timestamp': ndoc.revision.timestamp,
521 "url": reverse("doctext_view", args=[ndoc.id])
524 if ndoc: lib._rollback()
526 except RevisionNotFound, e:
527 return response.EntityNotFound(mimetype="text/plain").\
528 django_response(e.message)
532 # Dublin Core handlers
534 # @requires librarian
536 #class DocumentDublinCoreHandler(BaseHandler):
537 # allowed_methods = ('GET', 'POST')
540 # def read(self, request, docid, lib):
541 # """Read document as raw text"""
543 # revision = request.GET.get('revision', 'latest')
545 # if revision == 'latest':
546 # doc = lib.document(docid)
548 # doc = lib.document_for_rev(revision)
551 # if document.id != docid:
552 # return response.BadRequest().django_response({'reason': 'name-mismatch',
553 # 'message': 'Provided revision is not valid for this document'})
555 # bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
556 # return bookinfo.serialize()
557 # except (EntryNotFound, RevisionNotFound), e:
558 # return response.EntityNotFound().django_response({
559 # 'exception': type(e), 'message': e.message})
562 # def create(self, request, docid, lib):
564 # bi_json = request.POST['contents']
565 # revision = request.POST['revision']
567 # if request.POST.has_key('message'):
568 # msg = u"$USER$ " + request.PUT['message']
570 # msg = u"$AUTO$ Dublin core update."
572 # current = lib.document(docid, request.user.username)
573 # orig = lib.document_for_rev(revision)
575 # if current != orig:
576 # return response.EntityConflict().django_response({
577 # "reason": "out-of-date",
578 # "provided": orig.revision,
579 # "latest": current.revision })
581 # xmldoc = parser.WLDocument.from_string(current.data('xml'))
582 # document.book_info = dcparser.BookInfo.from_json(bi_json)
585 # ndoc = current.quickwrite('xml', \
586 # document.serialize().encode('utf-8'),\
587 # message=msg, user=request.user.username)
590 # # return the new revision number
592 # "document": ndoc.id,
594 # "previous_revision": current.revision,
595 # "revision": ndoc.revision,
596 # 'timestamp': ndoc.revision.timestamp,
597 # "url": reverse("docdc_view", args=[ndoc.id])
599 # except Exception, e:
600 # if ndoc: lib._rollback()
602 # except RevisionNotFound:
603 # return response.EntityNotFound().django_response()
605 class MergeHandler(BaseHandler):
606 allowed_methods = ('POST',)
608 @validate_form(forms.MergeRequestForm, 'POST')
610 def create(self, request, form, docid, lib):
611 """Create a new document revision from the information provided by user"""
612 revision = form.cleaned_data['revision']
614 # fetch the main branch document
615 doc = lib.document(docid)
617 # fetch the base document
618 user_doc = lib.document_for_rev(revision)
619 base_doc = user_doc.latest()
621 if base_doc != user_doc:
622 return response.EntityConflict().django_response({
623 "reason": "out-of-date",
624 "provided": str(user_doc.revision),
625 "latest": str(base_doc.revision)
628 if form.cleaned_data['type'] == 'update':
629 # update is always performed from the file branch
631 user_doc_new = base_doc.update(request.user.username)
633 # shared document is the same
636 if form.cleaned_data['type'] == 'share':
637 if not base_doc.up_to_date():
638 return response.BadRequest().django_response({
639 "reason": "not-fast-forward",
640 "message": "You must first update yout branch to the latest version."
643 # check for unresolved conflicts
644 if base_doc.has_conflict_marks():
645 return response.BadRequest().django_response({
646 "reason": "unresolved-conflicts",
647 "message": "There are unresolved conflicts in your file. Fix them, and try again."
650 if not request.user.has_perm('api.share_document'):
651 # User is not permitted to make a merge, right away
652 # So we instead create a pull request in the database
654 prq, created = PullRequest.objects.get_or_create(
655 comitter = request.user,
659 'source_revision': str(base_doc.revision),
660 'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
664 # there can't be 2 pending request from same user
665 # for the same document
667 prq.source_revision = str(base_doc.revision)
668 prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
671 return response.RequestAccepted().django_response(\
672 ticket_status=prq.status, \
673 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
674 except IntegrityError:
675 return response.EntityConflict().django_response({
676 'reason': 'request-already-exist'
679 changed = base_doc.share(form.cleaned_data['message'])
681 # update shared version if needed
683 doc_new = doc.latest()
687 # the user wersion is the same
688 user_doc_new = base_doc
690 # The client can compare parent_revision to revision
691 # to see if he needs to update user's view
692 # Same goes for shared view
694 return response.SuccessAllOk().django_response({
695 "name": user_doc_new.id,
696 "user": user_doc_new.owner,
698 "revision": user_doc_new.revision,
699 'timestamp': user_doc_new.revision.timestamp,
701 "parent_revision": user_doc.revision,
702 "parent_timestamp": user_doc.revision.timestamp,
704 "shared_revision": doc_new.revision,
705 "shared_timestamp": doc_new.revision.timestamp,
707 "shared_parent_revision": doc.revision,
708 "shared_parent_timestamp": doc.revision.timestamp,