1 # -*- encoding: utf-8 -*-
5 log = logging.getLogger('platforma.api.library')
7 __author__= "Ćukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
11 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from django.http import HttpResponse
15 from datetime import date
17 from django.core.urlresolvers import reverse
18 from django.utils import simplejson as json
19 from django.db import IntegrityError
24 from librarian import dcparser, parser
27 from api.models import PullRequest
28 from explorer.models import GalleryForDocument
31 import api.forms as forms
32 import api.response as response
33 from api.utils import validate_form, hglibrary, natural_order
34 from api.models import PartCache, PullRequest
41 return username.startswith('$prq-')
43 def prq_for_user(username):
45 return PullRequest.objects.get(id=int(username[5:]))
49 def check_user(request, user):
50 log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
53 if not request.user.has_perm('api.view_prq'):
54 yield response.AccessDenied().django_response({
55 'reason': 'access-denied',
56 'message': "You don't have enough priviliges to view pull requests."
59 elif request.user.username != user:
60 if not request.user.has_perm('api.view_other_document'):
61 yield response.AccessDenied().django_response({
62 'reason': 'access-denied',
63 'message': "You don't have enough priviliges to view other people's document."
68 # Document List Handlers
70 # TODO: security check
71 class BasicLibraryHandler(AnonymousBaseHandler):
72 allowed_methods = ('GET',)
75 def read(self, request, lib):
76 """Return the list of documents."""
78 'url': reverse('document_view', args=[docid]),
79 'name': docid } for docid in lib.documents() ]
80 return {'documents' : document_list}
83 # This handler controlls the document collection
85 class LibraryHandler(BaseHandler):
86 allowed_methods = ('GET', 'POST')
87 anonymous = BasicLibraryHandler
90 def read(self, request, lib):
91 """Return the list of documents."""
95 for docid in lib.documents():
97 'url': reverse('document_view', args=[docid]),
102 parts = PartCache.objects.defer('part_id')\
103 .values_list('part_id', 'document_id').distinct()
105 document_tree = dict(documents)
107 for part, docid in parts:
108 # this way, we won't display broken links
109 if not documents.has_key(part):
110 log.info("NOT FOUND: %s", part)
113 parent = documents[docid]
114 child = documents[part]
116 # not top-level anymore
117 document_tree.pop(part)
118 parent['parts'].append(child)
120 for doc in documents.itervalues():
121 doc['parts'].sort(key=natural_order(lambda d: d['name']))
123 return {'documents': sorted(document_tree.itervalues(),
124 key=natural_order(lambda d: d['name']) ) }
127 @validate_form(forms.DocumentUploadForm, 'POST')
129 def create(self, request, form, lib):
130 """Create a new document."""
132 if form.cleaned_data['ocr_data']:
133 data = form.cleaned_data['ocr_data']
135 data = request.FILES['ocr_file'].read().decode('utf-8')
138 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
140 if form.cleaned_data['generate_dc']:
141 data = librarian.wrap_text(data, unicode(date.today()))
143 docid = form.cleaned_data['bookname']
148 log.info("DOCID %s", docid)
149 doc = lib.document_create(docid)
150 # document created, but no content yet
152 doc = doc.quickwrite('xml', data.encode('utf-8'),
153 '$AUTO$ XML data uploaded.', user=request.user.username)
156 # rollback branch creation
158 raise LibraryException(traceback.format_exc())
160 url = reverse('document_view', args=[doc.id])
162 return response.EntityCreated().django_response(\
166 'revision': doc.revision },
170 except LibraryException, e:
172 return response.InternalError().django_response({
173 "reason": traceback.format_exc()
175 except DocumentAlreadyExists:
176 # Document is already there
177 return response.EntityConflict().django_response({
178 "reason": "already-exists",
179 "message": "Document already exists." % docid
185 class BasicDocumentHandler(AnonymousBaseHandler):
186 allowed_methods = ('GET',)
189 def read(self, request, docid, lib):
191 doc = lib.document(docid)
192 except RevisionNotFound:
197 'html_url': reverse('dochtml_view', args=[doc.id]),
198 'text_url': reverse('doctext_view', args=[doc.id]),
199 'dc_url': reverse('docdc_view', args=[doc.id]),
200 'public_revision': doc.revision,
206 class DiffHandler(BaseHandler):
207 allowed_methods = ('GET',)
210 def read(self, request, source_revision, target_revision, lib):
211 '''Return diff between source_revision and target_revision)'''
212 source_document = lib.document_for_rev(source_revision)
213 target_document = lib.document_for_rev(target_revision)
214 print source_document,
215 print target_document
216 diff = difflib.unified_diff(
217 source_document.data('xml').splitlines(True),
218 target_document.data('xml').splitlines(True),
222 return ''.join(list(diff))
228 class DocumentHandler(BaseHandler):
229 allowed_methods = ('GET', 'PUT')
230 anonymous = BasicDocumentHandler
232 @validate_form(forms.DocumentRetrieveForm, 'GET')
234 def read(self, request, form, docid, lib):
235 """Read document's meta data"""
236 log.info(u"User '%s' wants to %s(%s) as %s" % \
237 (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
239 user = form.cleaned_data['user'] or request.user.username
240 rev = form.cleaned_data['revision'] or 'latest'
242 for error in check_user(request, user):
246 doc = lib.document(docid, user, rev=rev)
247 except RevisionMismatch, e:
248 # the document exists, but the revision is bad
249 return response.EntityNotFound().django_response({
250 'reason': 'revision-mismatch',
251 'message': e.message,
255 except RevisionNotFound, e:
256 # the user doesn't have this document checked out
257 # or some other weird error occured
258 # try to do the checkout
260 if user == request.user.username:
261 mdoc = lib.document(docid)
262 doc = mdoc.take(user)
264 prq = prq_for_user(user)
265 # commiter's document
266 prq_doc = lib.document_for_rev(prq.source_revision)
267 doc = prq_doc.take(user)
269 return response.EntityNotFound().django_response({
270 'reason': 'document-not-found',
271 'message': e.message,
275 except RevisionNotFound, e:
276 return response.EntityNotFound().django_response({
277 'reason': 'document-not-found',
278 'message': e.message,
286 'html_url': reverse('dochtml_view', args=[doc.id]),
287 'text_url': reverse('doctext_view', args=[doc.id]),
288 # 'dc_url': reverse('docdc_view', args=[doc.id]),
289 'gallery_url': reverse('docgallery_view', args=[doc.id]),
290 'merge_url': reverse('docmerge_view', args=[doc.id]),
291 'revision': doc.revision,
292 'timestamp': doc.revision.timestamp,
293 # 'public_revision': doc.revision,
294 # 'public_timestamp': doc.revision.timestamp,
299 # def update(self, request, docid, lib):
300 # """Update information about the document, like display not"""
305 class DocumentHTMLHandler(BaseHandler):
306 allowed_methods = ('GET')
308 @validate_form(forms.DocumentRetrieveForm, 'GET')
310 def read(self, request, form, docid, lib, stylesheet='partial'):
311 """Read document as html text"""
313 revision = form.cleaned_data['revision']
314 user = form.cleaned_data['user'] or request.user.username
315 document = lib.document_for_rev(revision)
317 if document.id != docid:
318 return response.BadRequest().django_response({
319 'reason': 'name-mismatch',
320 'message': 'Provided revision is not valid for this document'
323 if document.owner != user:
324 return response.BadRequest().django_response({
325 'reason': 'user-mismatch',
326 'message': "Provided revision doesn't belong to user %s" % user
329 for error in check_user(request, user):
332 return librarian.html.transform(document.data('xml'), is_file=False, \
333 parse_dublincore=False, stylesheet=stylesheet,\
335 "with-paths": 'boolean(1)',
338 except (EntryNotFound, RevisionNotFound), e:
339 return response.EntityNotFound().django_response({
340 'reason': 'not-found', 'message': e.message})
341 except librarian.ParseError, e:
342 return response.InternalError().django_response({
343 'reason': 'xml-parse-error', 'message': e.message })
349 class DocumentGalleryHandler(BaseHandler):
350 allowed_methods = ('GET')
353 def read(self, request, docid):
354 """Read meta-data about scans for gallery of this document."""
356 from urllib import quote
358 for assoc in GalleryForDocument.objects.filter(document=docid):
359 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
361 if not os.path.isdir(dirpath):
362 log.warn(u"[WARNING]: missing gallery %s", dirpath)
365 gallery = {'name': assoc.name, 'pages': []}
367 for file in os.listdir(dirpath):
368 if not isinstance(file, unicode):
370 file = file.decode('utf-8')
372 log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
377 name, ext = os.path.splitext(os.path.basename(file))
379 if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
380 log.warn(u"Ignoring: %s %s", name, ext)
383 url = settings.MEDIA_URL + assoc.subpath + u'/' + file
386 url = settings.MEDIA_URL + u'/missing.png'
388 gallery['pages'].append( quote(url.encode('utf-8')) )
390 # gallery['pages'].sort()
391 galleries.append(gallery)
399 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
404 class DocumentTextHandler(BaseHandler):
405 allowed_methods = ('GET', 'POST')
407 @validate_form(forms.TextRetrieveForm, 'GET')
409 def read(self, request, form, docid, lib):
410 """Read document as raw text"""
412 revision = form.cleaned_data['revision']
413 part = form.cleaned_data['part']
414 user = form.cleaned_data['user'] or request.user.username
416 document = lib.document_for_rev(revision)
418 if document.id != docid:
419 return response.BadRequest().django_response({
420 'reason': 'name-mismatch',
421 'message': 'Provided revision is not valid for this document'
424 if document.owner != user:
425 return response.BadRequest().django_response({
426 'reason': 'user-mismatch',
427 'message': "Provided revision doesn't belong to user %s" % user
430 for error in check_user(request, user):
434 return document.data('xml')
436 xdoc = parser.WLDocument.from_string(document.data('xml'),\
437 parse_dublincore=False)
438 ptext = xdoc.part_as_text(part)
441 return response.EntityNotFound().django_response({
442 'reason': 'no-part-in-document'
446 except librarian.ParseError, e:
447 return response.EntityNotFound().django_response({
448 'reason': 'invalid-document-state',
449 'exception': type(e),
452 except (EntryNotFound, RevisionNotFound), e:
453 return response.EntityNotFound().django_response({
454 'reason': 'not-found',
455 'exception': type(e), 'message': e.message
458 @validate_form(forms.TextUpdateForm, 'POST')
460 def create(self, request, form, docid, lib):
462 revision = form.cleaned_data['revision']
463 msg = form.cleaned_data['message']
464 user = form.cleaned_data['user'] or request.user.username
466 # do not allow changing not owned documents
470 if user != request.user.username:
471 return response.AccessDenied().django_response({
472 'reason': 'insufficient-priviliges',
475 current = lib.document(docid, user)
476 orig = lib.document_for_rev(revision)
479 return response.EntityConflict().django_response({
480 "reason": "out-of-date",
481 "provided_revision": orig.revision,
482 "latest_revision": current.revision })
484 if form.cleaned_data.has_key('contents'):
485 data = form.cleaned_data['contents']
487 chunks = form.cleaned_data['chunks']
488 xdoc = parser.WLDocument.from_string(current.data('xml'))
489 errors = xdoc.merge_chunks(chunks)
492 return response.EntityConflict().django_response({
493 "reason": "invalid-chunks",
494 "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
497 data = xdoc.serialize()
499 # try to find any Xinclude tags
500 includes = [m.groupdict()['link'] for m in (re.finditer(\
501 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
503 log.info("INCLUDES: %s", includes)
505 # TODO: provide useful routines to make this simpler
506 def xml_update_action(lib, resolve):
508 f = lib._fileopen(resolve('parts'), 'r')
509 stored_includes = json.loads(f.read())
514 if stored_includes != includes:
515 f = lib._fileopen(resolve('parts'), 'w+')
516 f.write(json.dumps(includes))
519 lib._fileadd(resolve('parts'))
521 # update the parts cache
522 PartCache.update_cache(docid, current.owner,\
523 stored_includes, includes)
525 # now that the parts are ok, write xml
526 f = lib._fileopen(resolve('xml'), 'w+')
527 f.write(data.encode('utf-8'))
531 ndoc = current.invoke_and_commit(\
532 xml_update_action, lambda d: (msg, user) )
535 # return the new revision number
536 return response.SuccessAllOk().django_response({
540 "previous_revision": current.revision,
541 "revision": ndoc.revision,
542 'timestamp': ndoc.revision.timestamp,
543 "url": reverse("doctext_view", args=[ndoc.id])
546 if ndoc: lib._rollback()
548 except RevisionNotFound, e:
549 return response.EntityNotFound(mimetype="text/plain").\
550 django_response(e.message)
554 # Dublin Core handlers
556 # @requires librarian
558 #class DocumentDublinCoreHandler(BaseHandler):
559 # allowed_methods = ('GET', 'POST')
562 # def read(self, request, docid, lib):
563 # """Read document as raw text"""
565 # revision = request.GET.get('revision', 'latest')
567 # if revision == 'latest':
568 # doc = lib.document(docid)
570 # doc = lib.document_for_rev(revision)
573 # if document.id != docid:
574 # return response.BadRequest().django_response({'reason': 'name-mismatch',
575 # 'message': 'Provided revision is not valid for this document'})
577 # bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
578 # return bookinfo.serialize()
579 # except (EntryNotFound, RevisionNotFound), e:
580 # return response.EntityNotFound().django_response({
581 # 'exception': type(e), 'message': e.message})
584 # def create(self, request, docid, lib):
586 # bi_json = request.POST['contents']
587 # revision = request.POST['revision']
589 # if request.POST.has_key('message'):
590 # msg = u"$USER$ " + request.PUT['message']
592 # msg = u"$AUTO$ Dublin core update."
594 # current = lib.document(docid, request.user.username)
595 # orig = lib.document_for_rev(revision)
597 # if current != orig:
598 # return response.EntityConflict().django_response({
599 # "reason": "out-of-date",
600 # "provided": orig.revision,
601 # "latest": current.revision })
603 # xmldoc = parser.WLDocument.from_string(current.data('xml'))
604 # document.book_info = dcparser.BookInfo.from_json(bi_json)
607 # ndoc = current.quickwrite('xml', \
608 # document.serialize().encode('utf-8'),\
609 # message=msg, user=request.user.username)
612 # # return the new revision number
614 # "document": ndoc.id,
616 # "previous_revision": current.revision,
617 # "revision": ndoc.revision,
618 # 'timestamp': ndoc.revision.timestamp,
619 # "url": reverse("docdc_view", args=[ndoc.id])
621 # except Exception, e:
622 # if ndoc: lib._rollback()
624 # except RevisionNotFound:
625 # return response.EntityNotFound().django_response()
627 class MergeHandler(BaseHandler):
628 allowed_methods = ('POST',)
630 @validate_form(forms.MergeRequestForm, 'POST')
632 def create(self, request, form, docid, lib):
633 """Create a new document revision from the information provided by user"""
634 revision = form.cleaned_data['revision']
636 # fetch the main branch document
637 doc = lib.document(docid)
639 # fetch the base document
640 user_doc = lib.document_for_rev(revision)
641 base_doc = user_doc.latest()
643 if base_doc != user_doc:
644 return response.EntityConflict().django_response({
645 "reason": "out-of-date",
646 "provided": str(user_doc.revision),
647 "latest": str(base_doc.revision)
650 if form.cleaned_data['type'] == 'update':
651 # update is always performed from the file branch
653 user_doc_new = base_doc.update(request.user.username)
655 # shared document is the same
658 if form.cleaned_data['type'] == 'share':
659 if not base_doc.up_to_date():
660 return response.BadRequest().django_response({
661 "reason": "not-fast-forward",
662 "message": "You must first update yout branch to the latest version."
665 # check for unresolved conflicts
666 if base_doc.has_conflict_marks():
667 return response.BadRequest().django_response({
668 "reason": "unresolved-conflicts",
669 "message": "There are unresolved conflicts in your file. Fix them, and try again."
672 if not request.user.has_perm('api.share_document'):
673 # User is not permitted to make a merge, right away
674 # So we instead create a pull request in the database
676 prq, created = PullRequest.objects.get_or_create(
677 comitter = request.user,
681 'source_revision': str(base_doc.revision),
682 'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
686 # there can't be 2 pending request from same user
687 # for the same document
689 prq.source_revision = str(base_doc.revision)
690 prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
693 return response.RequestAccepted().django_response(\
694 ticket_status=prq.status, \
695 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
696 except IntegrityError:
697 return response.EntityConflict().django_response({
698 'reason': 'request-already-exist'
701 changed = base_doc.share(form.cleaned_data['message'])
703 # update shared version if needed
705 doc_new = doc.latest()
709 # the user wersion is the same
710 user_doc_new = base_doc
712 # The client can compare parent_revision to revision
713 # to see if he needs to update user's view
714 # Same goes for shared view
716 return response.SuccessAllOk().django_response({
717 "name": user_doc_new.id,
718 "user": user_doc_new.owner,
720 "revision": user_doc_new.revision,
721 'timestamp': user_doc_new.revision.timestamp,
723 "parent_revision": user_doc.revision,
724 "parent_timestamp": user_doc.revision.timestamp,
726 "shared_revision": doc_new.revision,
727 "shared_timestamp": doc_new.revision.timestamp,
729 "shared_parent_revision": doc.revision,
730 "shared_parent_timestamp": doc.revision.timestamp,