1 # -*- encoding: utf-8 -*-
5 __author__= "Łukasz Rekucki"
6 __date__ = "$2009-09-25 15:49:50$"
7 __doc__ = "Module documentation."
9 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from datetime import date
14 from django.core.urlresolvers import reverse
15 from django.utils import simplejson as json
16 from django.db import IntegrityError
20 from librarian import dcparser, parser
23 from explorer.models import PullRequest, GalleryForDocument
26 import api.forms as forms
27 import api.response as response
28 from api.utils import validate_form, hglibrary, natural_order
29 from api.models import PartCache
35 log = logging.getLogger('platforma.api')
39 # Document List Handlers
41 # TODO: security check
42 class BasicLibraryHandler(AnonymousBaseHandler):
43 allowed_methods = ('GET',)
46 def read(self, request, lib):
47 """Return the list of documents."""
49 'url': reverse('document_view', args=[docid]),
50 'name': docid } for docid in lib.documents() ]
52 return {'documents' : document_list}
56 # This handler controlls the document collection
58 class LibraryHandler(BaseHandler):
59 allowed_methods = ('GET', 'POST')
60 anonymous = BasicLibraryHandler
64 def read(self, request, lib):
65 """Return the list of documents."""
69 for docid in lib.documents():
71 'url': reverse('document_view', args=[docid]),
76 parts = PartCache.objects.defer('part_id')\
77 .values_list('part_id', 'document_id').distinct()
79 document_tree = dict(documents)
81 for part, docid in parts:
82 # this way, we won't display broken links
83 if not documents.has_key(part):
84 log.info("NOT FOUND: %s", part)
87 parent = documents[docid]
88 child = documents[part]
90 # not top-level anymore
91 document_tree.pop(part)
92 parent['parts'].append(child)
94 for doc in documents.itervalues():
95 doc['parts'].sort(key=natural_order(lambda d: d['name']))
97 return {'documents': sorted(document_tree.itervalues(),
98 key=natural_order(lambda d: d['name']) ) }
100 @validate_form(forms.DocumentUploadForm, 'POST')
102 def create(self, request, form, lib):
103 """Create a new document."""
105 if form.cleaned_data['ocr_data']:
106 data = form.cleaned_data['ocr_data']
108 data = request.FILES['ocr_file'].read().decode('utf-8')
111 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
113 if form.cleaned_data['generate_dc']:
114 data = librarian.wrap_text(data, unicode(date.today()))
116 docid = form.cleaned_data['bookname']
121 log.info("DOCID %s", docid)
122 doc = lib.document_create(docid)
123 # document created, but no content yet
126 doc = doc.quickwrite('xml', data.encode('utf-8'),
127 '$AUTO$ XML data uploaded.', user=request.user.username)
130 # rollback branch creation
132 raise LibraryException(traceback.format_exc())
134 url = reverse('document_view', args=[doc.id])
136 return response.EntityCreated().django_response(\
140 'revision': doc.revision },
144 except LibraryException, e:
146 return response.InternalError().django_response({
147 "reason": traceback.format_exc()
149 except DocumentAlreadyExists:
150 # Document is already there
151 return response.EntityConflict().django_response({
152 "reason": "already-exists",
153 "message": "Document already exists." % docid
159 class BasicDocumentHandler(AnonymousBaseHandler):
160 allowed_methods = ('GET',)
163 def read(self, request, docid, lib):
165 doc = lib.document(docid)
166 except RevisionNotFound:
171 'html_url': reverse('dochtml_view', args=[doc.id]),
172 'text_url': reverse('doctext_view', args=[doc.id]),
173 'dc_url': reverse('docdc_view', args=[doc.id]),
174 'public_revision': doc.revision,
182 class DocumentHandler(BaseHandler):
183 allowed_methods = ('GET', 'PUT')
184 anonymous = BasicDocumentHandler
187 def read(self, request, docid, lib):
188 """Read document's meta data"""
189 log.info(u"Read %s (%s)" % (docid, type(docid)) )
191 doc = lib.document(docid)
192 udoc = doc.take(request.user.username)
193 except RevisionNotFound, e:
194 return response.EntityNotFound().django_response({
195 'exception': type(e), 'message': e.message,
198 # is_shared = udoc.ancestorof(doc)
199 # is_uptodate = is_shared or shared.ancestorof(document)
203 'html_url': reverse('dochtml_view', args=[udoc.id]),
204 'text_url': reverse('doctext_view', args=[udoc.id]),
205 'dc_url': reverse('docdc_view', args=[udoc.id]),
206 'gallery_url': reverse('docgallery_view', args=[udoc.id]),
207 'merge_url': reverse('docmerge_view', args=[udoc.id]),
208 'user_revision': udoc.revision,
209 'user_timestamp': udoc.revision.timestamp,
210 'public_revision': doc.revision,
211 'public_timestamp': doc.revision.timestamp,
217 def update(self, request, docid, lib):
218 """Update information about the document, like display not"""
223 class DocumentHTMLHandler(BaseHandler):
224 allowed_methods = ('GET')
227 def read(self, request, docid, lib, stylesheet='partial'):
228 """Read document as html text"""
230 revision = request.GET.get('revision', 'latest')
232 if revision == 'latest':
233 document = lib.document(docid)
235 document = lib.document_for_rev(revision)
237 if document.id != docid:
238 return response.BadRequest().django_response({'reason': 'name-mismatch',
239 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
241 return librarian.html.transform(document.data('xml'), is_file=False, \
242 parse_dublincore=False, stylesheet=stylesheet,\
244 "with-paths": 'boolean(1)',
247 except (EntryNotFound, RevisionNotFound), e:
248 return response.EntityNotFound().django_response({
249 'reason': 'not-found', 'message': e.message})
250 except librarian.ParseError, e:
251 return response.InternalError().django_response({
252 'reason': 'xml-parse-error', 'message': e.message })
258 class DocumentGalleryHandler(BaseHandler):
259 allowed_methods = ('GET')
262 def read(self, request, docid):
263 """Read meta-data about scans for gallery of this document."""
265 from urllib import quote
267 for assoc in GalleryForDocument.objects.filter(document=docid):
268 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
270 if not os.path.isdir(dirpath):
271 log.warn(u"[WARNING]: missing gallery %s", dirpath)
274 gallery = {'name': assoc.name, 'pages': []}
276 for file in os.listdir(dirpath):
277 if not isinstance(file, unicode):
278 log.warn(u"File %r is gallery %r is not unicode. Ommiting."\
282 name, ext = os.path.splitext(os.path.basename(file))
284 if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
285 log.info(u"Ignoring: %s %s", name, ext)
288 url = settings.MEDIA_URL + assoc.subpath + u'/' + file;
289 gallery['pages'].append( quote(url.encode('utf-8')) )
291 gallery['pages'].sort()
292 galleries.append(gallery)
300 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
305 class DocumentTextHandler(BaseHandler):
306 allowed_methods = ('GET', 'POST')
309 def read(self, request, docid, lib):
310 """Read document as raw text"""
311 revision = request.GET.get('revision', 'latest')
312 part = request.GET.get('part', False)
315 if revision == 'latest':
316 document = lib.document(docid)
318 document = lib.document_for_rev(revision)
320 if document.id != docid:
321 return response.BadRequest().django_response({'reason': 'name-mismatch',
322 'message': 'Provided revision is not valid for this document'})
324 # TODO: some finer-grained access control
327 return document.data('xml')
329 xdoc = parser.WLDocument.from_string(document.data('xml'),\
330 parse_dublincore=False)
331 ptext = xdoc.part_as_text(part)
334 return response.EntityNotFound().django_response({
335 'reason': 'no-part-in-document'
339 except librarian.ParseError:
340 return response.EntityNotFound().django_response({
341 'reason': 'invalid-document-state',
342 'exception': type(e), 'message': e.message
344 except (EntryNotFound, RevisionNotFound), e:
345 return response.EntityNotFound().django_response({
346 'reason': 'not-found',
347 'exception': type(e), 'message': e.message
351 def create(self, request, docid, lib):
353 revision = request.POST['revision']
355 current = lib.document(docid, request.user.username)
356 orig = lib.document_for_rev(revision)
359 return response.EntityConflict().django_response({
360 "reason": "out-of-date",
361 "provided_revision": orig.revision,
362 "latest_revision": current.revision })
364 if request.POST.has_key('message'):
365 msg = u"$USER$ " + request.POST['message']
367 msg = u"$AUTO$ XML content update."
369 if request.POST.has_key('contents'):
370 data = request.POST['contents']
372 if not request.POST.has_key('chunks'):
374 return response.BadRequest().django_response({'reason': 'invalid-arguments',
375 'message': 'No contents nor chunks specified.'})
378 parts = json.loads(request.POST['chunks'])
379 xdoc = parser.WLDocument.from_string(current.data('xml'))
381 errors = xdoc.merge_chunks(parts)
384 return response.EntityConflict().django_response({
385 "reason": "invalid-chunks",
386 "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
389 data = xdoc.serialize()
391 # try to find any Xinclude tags
392 includes = [m.groupdict()['link'] for m in (re.finditer(\
393 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
395 log.info("INCLUDES: %s", includes)
397 # TODO: provide useful routines to make this simpler
398 def xml_update_action(lib, resolve):
400 f = lib._fileopen(resolve('parts'), 'r')
401 stored_includes = json.loads(f.read())
406 if stored_includes != includes:
407 f = lib._fileopen(resolve('parts'), 'w+')
408 f.write(json.dumps(includes))
411 lib._fileadd(resolve('parts'))
413 # update the parts cache
414 PartCache.update_cache(docid, current.owner,\
415 stored_includes, includes)
417 # now that the parts are ok, write xml
418 f = lib._fileopen(resolve('xml'), 'w+')
419 f.write(data.encode('utf-8'))
423 ndoc = current.invoke_and_commit(\
424 xml_update_action, lambda d: (msg, current.owner) )
427 # return the new revision number
428 return response.SuccessAllOk().django_response({
431 "previous_revision": current.revision,
432 "revision": ndoc.revision,
433 'timestamp': ndoc.revision.timestamp,
434 "url": reverse("doctext_view", args=[ndoc.id])
437 if ndoc: lib._rollback()
439 except RevisionNotFound, e:
440 return response.EntityNotFound(mimetype="text/plain").\
441 django_response(e.message)
445 # Dublin Core handlers
447 # @requires librarian
449 class DocumentDublinCoreHandler(BaseHandler):
450 allowed_methods = ('GET', 'POST')
453 def read(self, request, docid, lib):
454 """Read document as raw text"""
456 revision = request.GET.get('revision', 'latest')
458 if revision == 'latest':
459 doc = lib.document(docid)
461 doc = lib.document_for_rev(revision)
464 if document.id != docid:
465 return response.BadRequest().django_response({'reason': 'name-mismatch',
466 'message': 'Provided revision is not valid for this document'})
468 bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
469 return bookinfo.serialize()
470 except (EntryNotFound, RevisionNotFound), e:
471 return response.EntityNotFound().django_response({
472 'exception': type(e), 'message': e.message})
475 def create(self, request, docid, lib):
477 bi_json = request.POST['contents']
478 revision = request.POST['revision']
480 if request.POST.has_key('message'):
481 msg = u"$USER$ " + request.PUT['message']
483 msg = u"$AUTO$ Dublin core update."
485 current = lib.document(docid, request.user.username)
486 orig = lib.document_for_rev(revision)
489 return response.EntityConflict().django_response({
490 "reason": "out-of-date",
491 "provided": orig.revision,
492 "latest": current.revision })
494 xmldoc = parser.WLDocument.from_string(current.data('xml'))
495 document.book_info = dcparser.BookInfo.from_json(bi_json)
498 ndoc = current.quickwrite('xml', \
499 document.serialize().encode('utf-8'),\
500 message=msg, user=request.user.username)
503 # return the new revision number
507 "previous_revision": current.revision,
508 "revision": ndoc.revision,
509 'timestamp': ndoc.revision.timestamp,
510 "url": reverse("docdc_view", args=[ndoc.id])
513 if ndoc: lib._rollback()
515 except RevisionNotFound:
516 return response.EntityNotFound().django_response()
518 class MergeHandler(BaseHandler):
519 allowed_methods = ('POST',)
521 @validate_form(forms.MergeRequestForm, 'POST')
523 def create(self, request, form, docid, lib):
524 """Create a new document revision from the information provided by user"""
526 target_rev = form.cleaned_data['target_revision']
528 doc = lib.document(docid)
529 udoc = doc.take(request.user.username)
531 if target_rev == 'latest':
532 target_rev = udoc.revision
534 if str(udoc.revision) != target_rev:
535 # user think doesn't know he has an old version
538 # Updating is teorericly ok, but we need would
539 # have to force a refresh. Sharing may be not safe,
540 # 'cause it doesn't always result in update.
542 # In other words, we can't lie about the resource's state
543 # So we should just yield and 'out-of-date' conflict
544 # and let the client ask again with updated info.
546 # NOTE: this could result in a race condition, when there
547 # are 2 instances of the same user editing the same document.
548 # Instance "A" trying to update, and instance "B" always changing
549 # the document right before "A". The anwser to this problem is
550 # for the "A" to request a merge from 'latest' and then
551 # check the parent revisions in response, if he actually
552 # merge from where he thinks he should. If not, the client SHOULD
553 # update his internal state.
554 return response.EntityConflict().django_response({
555 "reason": "out-of-date",
556 "provided": target_rev,
557 "latest": udoc.revision })
559 if form.cleaned_data['type'] == 'update':
560 # update is always performed from the file branch
562 success, changed = udoc.update(request.user.username)
564 if form.cleaned_data['type'] == 'share':
565 if not request.user.has_perm('explorer.document.can_share'):
566 # User is not permitted to make a merge, right away
567 # So we instead create a pull request in the database
569 prq, created = PullRequest.objects.get_or_create(
570 source_revision = str(udoc.revision),
572 'comitter': request.user,
575 'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
579 return response.RequestAccepted().django_response(\
580 ticket_status=prq.status, \
581 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
582 except IntegrityError:
583 return response.EntityConflict().django_response({
584 'reason': 'request-already-exist'
587 success, changed = udoc.share(form.cleaned_data['message'])
590 return response.EntityConflict().django_response({
591 'reason': 'merge-failure',
595 return response.SuccessNoContent().django_response()
597 nudoc = udoc.latest()
599 return response.SuccessAllOk().django_response({
601 "parent_user_resivion": udoc.revision,
602 "parent_revision": doc.revision,
603 "revision": nudoc.revision,
604 'timestamp': nudoc.revision.timestamp,