1 # -*- encoding: utf-8 -*-
5 log = logging.getLogger('platforma.api.library')
7 __author__= "Ćukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
11 from piston.handler import BaseHandler, AnonymousBaseHandler
14 from datetime import date
16 from django.core.urlresolvers import reverse
17 from django.utils import simplejson as json
18 from django.db import IntegrityError
22 from librarian import dcparser, parser
25 from api.models import PullRequest
26 from explorer.models import GalleryForDocument
29 import api.forms as forms
30 import api.response as response
31 from api.utils import validate_form, hglibrary, natural_order
32 from api.models import PartCache
39 return username.startswith('$prq-')
41 def check_user(request, user):
42 log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
45 if not request.user.has_perm('api.pullrequest.can_view'):
46 yield response.AccessDenied().django_response({
47 'reason': 'access-denied',
48 'message': "You don't have enough priviliges to view pull requests."
51 elif request.user.username != user:
52 if not request.user.has_perm('api.document.can_view_other'):
53 yield response.AccessDenied().django_response({
54 'reason': 'access-denied',
55 'message': "You don't have enough priviliges to view other people's document."
60 # Document List Handlers
62 # TODO: security check
63 class BasicLibraryHandler(AnonymousBaseHandler):
64 allowed_methods = ('GET',)
67 def read(self, request, lib):
68 """Return the list of documents."""
70 'url': reverse('document_view', args=[docid]),
71 'name': docid } for docid in lib.documents() ]
72 return {'documents' : document_list}
75 # This handler controlls the document collection
77 class LibraryHandler(BaseHandler):
78 allowed_methods = ('GET', 'POST')
79 anonymous = BasicLibraryHandler
82 def read(self, request, lib):
83 """Return the list of documents."""
87 for docid in lib.documents():
89 'url': reverse('document_view', args=[docid]),
94 parts = PartCache.objects.defer('part_id')\
95 .values_list('part_id', 'document_id').distinct()
97 document_tree = dict(documents)
99 for part, docid in parts:
100 # this way, we won't display broken links
101 if not documents.has_key(part):
102 log.info("NOT FOUND: %s", part)
105 parent = documents[docid]
106 child = documents[part]
108 # not top-level anymore
109 document_tree.pop(part)
110 parent['parts'].append(child)
112 for doc in documents.itervalues():
113 doc['parts'].sort(key=natural_order(lambda d: d['name']))
115 return {'documents': sorted(document_tree.itervalues(),
116 key=natural_order(lambda d: d['name']) ) }
119 @validate_form(forms.DocumentUploadForm, 'POST')
121 def create(self, request, form, lib):
122 """Create a new document."""
124 if form.cleaned_data['ocr_data']:
125 data = form.cleaned_data['ocr_data']
127 data = request.FILES['ocr_file'].read().decode('utf-8')
130 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
132 if form.cleaned_data['generate_dc']:
133 data = librarian.wrap_text(data, unicode(date.today()))
135 docid = form.cleaned_data['bookname']
140 log.info("DOCID %s", docid)
141 doc = lib.document_create(docid)
142 # document created, but no content yet
145 doc = doc.quickwrite('xml', data.encode('utf-8'),
146 '$AUTO$ XML data uploaded.', user=request.user.username)
149 # rollback branch creation
151 raise LibraryException(traceback.format_exc())
153 url = reverse('document_view', args=[doc.id])
155 return response.EntityCreated().django_response(\
159 'revision': doc.revision },
163 except LibraryException, e:
165 return response.InternalError().django_response({
166 "reason": traceback.format_exc()
168 except DocumentAlreadyExists:
169 # Document is already there
170 return response.EntityConflict().django_response({
171 "reason": "already-exists",
172 "message": "Document already exists." % docid
178 class BasicDocumentHandler(AnonymousBaseHandler):
179 allowed_methods = ('GET',)
182 def read(self, request, docid, lib):
184 doc = lib.document(docid)
185 except RevisionNotFound:
190 'html_url': reverse('dochtml_view', args=[doc.id]),
191 'text_url': reverse('doctext_view', args=[doc.id]),
192 'dc_url': reverse('docdc_view', args=[doc.id]),
193 'public_revision': doc.revision,
201 class DocumentHandler(BaseHandler):
202 allowed_methods = ('GET', 'PUT')
203 anonymous = BasicDocumentHandler
205 @validate_form(forms.DocumentRetrieveForm, 'GET')
207 def read(self, request, form, docid, lib):
208 """Read document's meta data"""
209 log.info(u"User '%s' wants to %s(%s) as %s" % \
210 (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
212 user = form.cleaned_data['user'] or request.user.username
213 rev = form.cleaned_data['revision'] or 'latest'
215 for error in check_user(request, user):
219 doc = lib.document(docid, user, rev=rev)
220 except RevisionMismatch, e:
221 # the document exists, but the revision is bad
222 return response.EntityNotFound().django_response({
223 'reason': 'revision-mismatch',
224 'message': e.message,
228 except RevisionNotFound, e:
229 # the user doesn't have this document checked out
230 # or some other weird error occured
231 # try to do the checkout
232 if is_prq(user) or (user == request.user.username):
234 mdoc = lib.document(docid)
235 doc = mdoc.take(user)
238 # source revision, should probably change
239 # but there are no changes yet, so...
242 except RevisionNotFound, e:
243 return response.EntityNotFound().django_response({
244 'reason': 'document-not-found',
245 'message': e.message,
249 return response.EntityNotFound().django_response({
250 'reason': 'document-not-found',
251 'message': e.message,
259 'html_url': reverse('dochtml_view', args=[doc.id]),
260 'text_url': reverse('doctext_view', args=[doc.id]),
261 # 'dc_url': reverse('docdc_view', args=[doc.id]),
262 'gallery_url': reverse('docgallery_view', args=[doc.id]),
263 'merge_url': reverse('docmerge_view', args=[doc.id]),
264 'user_revision': doc.revision,
265 'user_timestamp': doc.revision.timestamp,
266 # 'public_revision': doc.revision,
267 # 'public_timestamp': doc.revision.timestamp,
272 # def update(self, request, docid, lib):
273 # """Update information about the document, like display not"""
278 class DocumentHTMLHandler(BaseHandler):
279 allowed_methods = ('GET')
281 @validate_form(forms.DocumentRetrieveForm, 'GET')
283 def read(self, request, form, docid, lib, stylesheet='partial'):
284 """Read document as html text"""
286 revision = form.cleaned_data['revision']
287 user = form.cleaned_data['user'] or request.user.username
288 document = lib.document_for_rev(revision)
290 if document.id != docid:
291 return response.BadRequest().django_response({
292 'reason': 'name-mismatch',
293 'message': 'Provided revision is not valid for this document'
296 if document.owner != user:
297 return response.BadRequest().django_response({
298 'reason': 'user-mismatch',
299 'message': "Provided revision doesn't belong to user %s" % user
302 for error in check_user(request, user):
305 return librarian.html.transform(document.data('xml'), is_file=False, \
306 parse_dublincore=False, stylesheet=stylesheet,\
308 "with-paths": 'boolean(1)',
311 except (EntryNotFound, RevisionNotFound), e:
312 return response.EntityNotFound().django_response({
313 'reason': 'not-found', 'message': e.message})
314 except librarian.ParseError, e:
315 return response.InternalError().django_response({
316 'reason': 'xml-parse-error', 'message': e.message })
322 class DocumentGalleryHandler(BaseHandler):
323 allowed_methods = ('GET')
326 def read(self, request, docid):
327 """Read meta-data about scans for gallery of this document."""
329 from urllib import quote
331 for assoc in GalleryForDocument.objects.filter(document=docid):
332 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
334 if not os.path.isdir(dirpath):
335 log.warn(u"[WARNING]: missing gallery %s", dirpath)
338 gallery = {'name': assoc.name, 'pages': []}
340 for file in sorted(os.listdir(dirpath)):
341 if not isinstance(file, unicode):
343 file = file.decode('utf-8')
345 log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
350 name, ext = os.path.splitext(os.path.basename(file))
352 if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
353 log.warn(u"Ignoring: %s %s", name, ext)
356 url = settings.MEDIA_URL + assoc.subpath + u'/' + file
359 url = settings.MEDIA_URL + u'/missing.png'
361 gallery['pages'].append( quote(url.encode('utf-8')) )
363 # gallery['pages'].sort()
364 galleries.append(gallery)
372 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
377 class DocumentTextHandler(BaseHandler):
378 allowed_methods = ('GET', 'POST')
380 @validate_form(forms.TextRetrieveForm, 'GET')
382 def read(self, request, form, docid, lib):
383 """Read document as raw text"""
385 revision = form.cleaned_data['revision']
386 part = form.cleaned_data['part']
387 user = form.cleaned_data['user'] or request.user.username
389 document = lib.document_for_rev(revision)
391 if document.id != docid:
392 return response.BadRequest().django_response({
393 'reason': 'name-mismatch',
394 'message': 'Provided revision is not valid for this document'
397 if document.owner != user:
398 return response.BadRequest().django_response({
399 'reason': 'user-mismatch',
400 'message': "Provided revision doesn't belong to user %s" % user
403 for error in check_user(request, user):
407 return document.data('xml')
409 xdoc = parser.WLDocument.from_string(document.data('xml'),\
410 parse_dublincore=False)
411 ptext = xdoc.part_as_text(part)
414 return response.EntityNotFound().django_response({
415 'reason': 'no-part-in-document'
419 except librarian.ParseError, e:
420 return response.EntityNotFound().django_response({
421 'reason': 'invalid-document-state',
422 'exception': type(e),
425 except (EntryNotFound, RevisionNotFound), e:
426 return response.EntityNotFound().django_response({
427 'reason': 'not-found',
428 'exception': type(e), 'message': e.message
431 @validate_form(forms.TextUpdateForm, 'POST')
433 def create(self, request, form, docid, lib):
435 revision = form.cleaned_data['revision']
436 msg = form.cleaned_data['message']
437 user = form.cleaned_data['user'] or request.user.username
439 # do not allow changing not owned documents
443 if user != request.user.username:
444 return response.AccessDenied().django_response({
445 'reason': 'insufficient-priviliges',
448 current = lib.document(docid, user)
449 orig = lib.document_for_rev(revision)
452 return response.EntityConflict().django_response({
453 "reason": "out-of-date",
454 "provided_revision": orig.revision,
455 "latest_revision": current.revision })
457 if form.cleaned_data.has_key('contents'):
458 data = form.cleaned_data['contents']
460 chunks = form.cleaned_data['chunks']
461 xdoc = parser.WLDocument.from_string(current.data('xml'))
462 errors = xdoc.merge_chunks(chunks)
465 return response.EntityConflict().django_response({
466 "reason": "invalid-chunks",
467 "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
470 data = xdoc.serialize()
472 # try to find any Xinclude tags
473 includes = [m.groupdict()['link'] for m in (re.finditer(\
474 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
476 log.info("INCLUDES: %s", includes)
478 # TODO: provide useful routines to make this simpler
479 def xml_update_action(lib, resolve):
481 f = lib._fileopen(resolve('parts'), 'r')
482 stored_includes = json.loads(f.read())
487 if stored_includes != includes:
488 f = lib._fileopen(resolve('parts'), 'w+')
489 f.write(json.dumps(includes))
492 lib._fileadd(resolve('parts'))
494 # update the parts cache
495 PartCache.update_cache(docid, current.owner,\
496 stored_includes, includes)
498 # now that the parts are ok, write xml
499 f = lib._fileopen(resolve('xml'), 'w+')
500 f.write(data.encode('utf-8'))
504 ndoc = current.invoke_and_commit(\
505 xml_update_action, lambda d: (msg, user) )
508 # return the new revision number
509 return response.SuccessAllOk().django_response({
513 "previous_revision": current.revision,
514 "revision": ndoc.revision,
515 'timestamp': ndoc.revision.timestamp,
516 "url": reverse("doctext_view", args=[ndoc.id])
519 if ndoc: lib._rollback()
521 except RevisionNotFound, e:
522 return response.EntityNotFound(mimetype="text/plain").\
523 django_response(e.message)
527 # Dublin Core handlers
529 # @requires librarian
531 #class DocumentDublinCoreHandler(BaseHandler):
532 # allowed_methods = ('GET', 'POST')
535 # def read(self, request, docid, lib):
536 # """Read document as raw text"""
538 # revision = request.GET.get('revision', 'latest')
540 # if revision == 'latest':
541 # doc = lib.document(docid)
543 # doc = lib.document_for_rev(revision)
546 # if document.id != docid:
547 # return response.BadRequest().django_response({'reason': 'name-mismatch',
548 # 'message': 'Provided revision is not valid for this document'})
550 # bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
551 # return bookinfo.serialize()
552 # except (EntryNotFound, RevisionNotFound), e:
553 # return response.EntityNotFound().django_response({
554 # 'exception': type(e), 'message': e.message})
557 # def create(self, request, docid, lib):
559 # bi_json = request.POST['contents']
560 # revision = request.POST['revision']
562 # if request.POST.has_key('message'):
563 # msg = u"$USER$ " + request.PUT['message']
565 # msg = u"$AUTO$ Dublin core update."
567 # current = lib.document(docid, request.user.username)
568 # orig = lib.document_for_rev(revision)
570 # if current != orig:
571 # return response.EntityConflict().django_response({
572 # "reason": "out-of-date",
573 # "provided": orig.revision,
574 # "latest": current.revision })
576 # xmldoc = parser.WLDocument.from_string(current.data('xml'))
577 # document.book_info = dcparser.BookInfo.from_json(bi_json)
580 # ndoc = current.quickwrite('xml', \
581 # document.serialize().encode('utf-8'),\
582 # message=msg, user=request.user.username)
585 # # return the new revision number
587 # "document": ndoc.id,
589 # "previous_revision": current.revision,
590 # "revision": ndoc.revision,
591 # 'timestamp': ndoc.revision.timestamp,
592 # "url": reverse("docdc_view", args=[ndoc.id])
594 # except Exception, e:
595 # if ndoc: lib._rollback()
597 # except RevisionNotFound:
598 # return response.EntityNotFound().django_response()
600 class MergeHandler(BaseHandler):
601 allowed_methods = ('POST',)
603 @validate_form(forms.MergeRequestForm, 'POST')
605 def create(self, request, form, docid, lib):
606 """Create a new document revision from the information provided by user"""
607 revision = form.cleaned_data['revision']
609 # fetch the main branch document
610 doc = lib.document(docid)
612 # fetch the base document
613 user_doc = lib.document_for_rev(revision)
614 base_doc = user_doc.latest()
616 if base_doc != user_doc:
617 return response.EntityConflict().django_response({
618 "reason": "out-of-date",
619 "provided": str(user_doc.revision),
620 "latest": str(base_doc.revision)
623 if form.cleaned_data['type'] == 'update':
624 # update is always performed from the file branch
626 changed, clean = base_doc.update(request.user.username)
628 # update user document
630 user_doc_new = user_doc.latest()
632 # shared document is the same
635 if form.cleaned_data['type'] == 'share':
636 if not base_doc.up_to_date():
637 return response.BadRequest().django_response({
638 "reason": "not-fast-forward",
639 "message": "You must first update yout branch to the latest version."
642 # check for unresolved conflicts
643 if base_doc.has_conflict_marks():
644 return response.BadRequest().django_response({
645 "reason": "unresolved-conflicts",
646 "message": "There are unresolved conflicts in your file. Fix them, and try again."
649 if not request.user.has_perm('api.document.can_share'):
650 # User is not permitted to make a merge, right away
651 # So we instead create a pull request in the database
653 prq, created = PullRequest.objects.get_or_create(
654 comitter = request.user,
658 'source_revision': str(base_doc.revision),
659 'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
663 # there can't be 2 pending request from same user
664 # for the same document
666 prq.source_revision = str(base_doc.revision)
667 prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
670 return response.RequestAccepted().django_response(\
671 ticket_status=prq.status, \
672 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
673 except IntegrityError:
674 return response.EntityConflict().django_response({
675 'reason': 'request-already-exist'
678 changed = base_doc.share(form.cleaned_data['message'])
680 # update shared version if needed
682 doc_new = doc.latest()
684 # the user wersion is the same
685 user_doc_new = base_doc
687 # The client can compare parent_revision to revision
688 # to see if he needs to update user's view
689 # Same goes for shared view
691 return response.SuccessAllOk().django_response({
692 "name": user_doc_new.id,
693 "user": user_doc_new.owner,
694 "parent_revision": user_doc_new.revision,
695 "parent_shared_revision": doc.revision,
696 "revision": user_doc_new.revision,
697 "shared_revision": doc_new.revision,
698 'timestamp': user_doc_new.revision.timestamp,