1 # -*- encoding: utf-8 -*-
5 __author__= "Ćukasz Rekucki"
6 __date__ = "$2009-09-25 15:49:50$"
7 __doc__ = "Module documentation."
9 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from datetime import date
14 from django.core.urlresolvers import reverse
15 from django.utils import simplejson as json
16 from django.db import IntegrityError
20 from librarian import dcparser, parser
23 from explorer.models import PullRequest, GalleryForDocument
26 import api.forms as forms
27 import api.response as response
28 from api.utils import validate_form, hglibrary, natural_order
29 from api.models import PartCache
35 log = logging.getLogger('platforma.api')
39 # Document List Handlers
41 class BasicLibraryHandler(AnonymousBaseHandler):
42 allowed_methods = ('GET',)
45 def read(self, request, lib):
46 """Return the list of documents."""
48 'url': reverse('document_view', args=[docid]),
49 'name': docid } for docid in lib.documents() ]
51 return {'documents' : document_list}
54 class LibraryHandler(BaseHandler):
55 allowed_methods = ('GET', 'POST')
56 anonymous = BasicLibraryHandler
59 def read(self, request, lib):
60 """Return the list of documents."""
64 for docid in lib.documents():
66 'url': reverse('document_view', args=[docid]),
71 parts = PartCache.objects.defer('part_id')\
72 .values_list('part_id', 'document_id').distinct()
74 document_tree = dict(documents)
76 for part, docid in parts:
77 # this way, we won't display broken links
78 if not documents.has_key(part):
79 log.info("NOT FOUND: %s", part)
82 parent = documents[docid]
83 child = documents[part]
85 # not top-level anymore
86 document_tree.pop(part)
87 parent['parts'].append(child)
89 for doc in documents.itervalues():
90 doc['parts'].sort(key=natural_order(lambda d: d['name']))
92 return {'documents': sorted(document_tree.itervalues(),
93 key=natural_order(lambda d: d['name']) ) }
95 @validate_form(forms.DocumentUploadForm, 'POST')
97 def create(self, request, form, lib):
98 """Create a new document."""
100 if form.cleaned_data['ocr_data']:
101 data = form.cleaned_data['ocr_data']
103 data = request.FILES['ocr_file'].read().decode('utf-8')
106 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
108 if form.cleaned_data['generate_dc']:
109 data = librarian.wrap_text(data, unicode(date.today()))
111 docid = form.cleaned_data['bookname']
116 log.info("DOCID %s", docid)
117 doc = lib.document_create(docid)
118 # document created, but no content yet
121 doc = doc.quickwrite('xml', data.encode('utf-8'),
122 '$AUTO$ XML data uploaded.', user=request.user.username)
125 # rollback branch creation
127 raise LibraryException(traceback.format_exc())
129 url = reverse('document_view', args=[doc.id])
131 return response.EntityCreated().django_response(\
135 'revision': doc.revision },
139 except LibraryException, e:
141 return response.InternalError().django_response(\
142 {'exception': traceback.format_exc()} )
143 except DocumentAlreadyExists:
144 # Document is already there
145 return response.EntityConflict().django_response(\
146 {"reason": "Document %s already exists." % docid})
151 class BasicDocumentHandler(AnonymousBaseHandler):
152 allowed_methods = ('GET',)
155 def read(self, request, docid, lib):
157 doc = lib.document(docid)
158 except RevisionNotFound:
163 'html_url': reverse('dochtml_view', args=[doc.id]),
164 'text_url': reverse('doctext_view', args=[doc.id]),
165 'dc_url': reverse('docdc_view', args=[doc.id]),
166 'public_revision': doc.revision,
174 class DocumentHandler(BaseHandler):
175 allowed_methods = ('GET', 'PUT')
176 anonymous = BasicDocumentHandler
179 def read(self, request, docid, lib):
180 """Read document's meta data"""
181 log.info(u"Read %s (%s)" % (docid, type(docid)) )
183 doc = lib.document(docid)
184 udoc = doc.take(request.user.username)
185 except RevisionNotFound, e:
186 return response.EntityNotFound().django_response({
187 'exception': type(e), 'message': e.message,
190 # is_shared = udoc.ancestorof(doc)
191 # is_uptodate = is_shared or shared.ancestorof(document)
195 'html_url': reverse('dochtml_view', args=[udoc.id]),
196 'text_url': reverse('doctext_view', args=[udoc.id]),
197 'dc_url': reverse('docdc_view', args=[udoc.id]),
198 'gallery_url': reverse('docgallery_view', args=[udoc.id]),
199 'merge_url': reverse('docmerge_view', args=[udoc.id]),
200 'user_revision': udoc.revision,
201 'user_timestamp': udoc.revision.timestamp,
202 'public_revision': doc.revision,
203 'public_timestamp': doc.revision.timestamp,
209 def update(self, request, docid, lib):
210 """Update information about the document, like display not"""
215 class DocumentHTMLHandler(BaseHandler):
216 allowed_methods = ('GET')
219 def read(self, request, docid, lib, stylesheet='partial'):
220 """Read document as html text"""
222 revision = request.GET.get('revision', 'latest')
224 if revision == 'latest':
225 document = lib.document(docid)
227 document = lib.document_for_rev(revision)
229 if document.id != docid:
230 return response.BadRequest().django_response({'reason': 'name-mismatch',
231 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
233 return librarian.html.transform(document.data('xml'), is_file=False, \
234 parse_dublincore=False, stylesheet=stylesheet,\
236 "with-paths": 'boolean(1)',
239 except (EntryNotFound, RevisionNotFound), e:
240 return response.EntityNotFound().django_response({
241 'reason': 'not-found', 'message': e.message})
242 except librarian.ParseError, e:
243 return response.InternalError().django_response({
244 'reason': 'xml-parse-error', 'message': e.message })
250 class DocumentGalleryHandler(BaseHandler):
251 allowed_methods = ('GET')
254 def read(self, request, docid):
255 """Read meta-data about scans for gallery of this document."""
257 from urllib import quote
259 for assoc in GalleryForDocument.objects.filter(document=docid):
260 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
262 if not os.path.isdir(dirpath):
263 log.warn(u"[WARNING]: missing gallery %s", dirpath)
266 gallery = {'name': assoc.name, 'pages': []}
268 for file in os.listdir(dirpath):
269 if not isinstance(file, unicode):
270 log.warn(u"File %r is gallery %r is not unicode. Ommiting."\
274 name, ext = os.path.splitext(os.path.basename(file))
276 if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
277 log.info(u"Ignoring: %s %s", name, ext)
280 url = settings.MEDIA_URL + assoc.subpath + u'/' + file;
281 gallery['pages'].append( quote(url.encode('utf-8')) )
283 gallery['pages'].sort()
284 galleries.append(gallery)
292 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
297 class DocumentTextHandler(BaseHandler):
298 allowed_methods = ('GET', 'POST')
301 def read(self, request, docid, lib):
302 """Read document as raw text"""
303 revision = request.GET.get('revision', 'latest')
304 part = request.GET.get('part', False)
307 if revision == 'latest':
308 document = lib.document(docid)
310 document = lib.document_for_rev(revision)
312 if document.id != docid:
313 return response.BadRequest().django_response({'reason': 'name-mismatch',
314 'message': 'Provided revision is not valid for this document'})
316 # TODO: some finer-grained access control
319 return document.data('xml')
321 xdoc = parser.WLDocument.from_string(document.data('xml'))
322 ptext = xdoc.part_as_text(part)
325 return response.EntityNotFound().django_response({
326 'reason': 'no-part-in-document'
330 except librarian.ParseError:
331 return response.EntityNotFound().django_response({
332 'reason': 'invalid-document-state',
333 'exception': type(e), 'message': e.message
335 except (EntryNotFound, RevisionNotFound), e:
336 return response.EntityNotFound().django_response({
337 'reason': 'not-found',
338 'exception': type(e), 'message': e.message
342 def create(self, request, docid, lib):
344 revision = request.POST['revision']
346 current = lib.document(docid, request.user.username)
347 orig = lib.document_for_rev(revision)
350 return response.EntityConflict().django_response({
351 "reason": "out-of-date",
352 "provided_revision": orig.revision,
353 "latest_revision": current.revision })
355 if request.POST.has_key('message'):
356 msg = u"$USER$ " + request.POST['message']
358 msg = u"$AUTO$ XML content update."
360 if request.POST.has_key('contents'):
361 data = request.POST['contents']
363 if not request.POST.has_key('chunks'):
365 return response.BadRequest().django_response({'reason': 'invalid-arguments',
366 'message': 'No contents nor chunks specified.'})
369 parts = json.loads(request.POST['chunks'])
370 xdoc = parser.WLDocument.from_string(current.data('xml'))
372 errors = xdoc.merge_chunks(parts)
375 return response.EntityConflict().django_response({
376 "reason": "invalid-chunks",
377 "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
380 data = xdoc.serialize()
382 # try to find any Xinclude tags
383 includes = [m.groupdict()['link'] for m in (re.finditer(\
384 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
386 log.info("INCLUDES: %s", includes)
388 # TODO: provide useful routines to make this simpler
389 def xml_update_action(lib, resolve):
391 f = lib._fileopen(resolve('parts'), 'r')
392 stored_includes = json.loads(f.read())
397 if stored_includes != includes:
398 f = lib._fileopen(resolve('parts'), 'w+')
399 f.write(json.dumps(includes))
402 lib._fileadd(resolve('parts'))
404 # update the parts cache
405 PartCache.update_cache(docid, current.owner,\
406 stored_includes, includes)
408 # now that the parts are ok, write xml
409 f = lib._fileopen(resolve('xml'), 'w+')
410 f.write(data.encode('utf-8'))
414 ndoc = current.invoke_and_commit(\
415 xml_update_action, lambda d: (msg, current.owner) )
418 # return the new revision number
419 return response.SuccessAllOk().django_response({
422 "previous_revision": current.revision,
423 "revision": ndoc.revision,
424 'timestamp': ndoc.revision.timestamp,
425 "url": reverse("doctext_view", args=[ndoc.id])
428 if ndoc: lib._rollback()
430 except RevisionNotFound, e:
431 return response.EntityNotFound(mimetype="text/plain").\
432 django_response(e.message)
436 # Dublin Core handlers
438 # @requires librarian
440 class DocumentDublinCoreHandler(BaseHandler):
441 allowed_methods = ('GET', 'POST')
444 def read(self, request, docid, lib):
445 """Read document as raw text"""
447 revision = request.GET.get('revision', 'latest')
449 if revision == 'latest':
450 doc = lib.document(docid)
452 doc = lib.document_for_rev(revision)
455 if document.id != docid:
456 return response.BadRequest().django_response({'reason': 'name-mismatch',
457 'message': 'Provided revision is not valid for this document'})
459 bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
460 return bookinfo.serialize()
461 except (EntryNotFound, RevisionNotFound), e:
462 return response.EntityNotFound().django_response({
463 'exception': type(e), 'message': e.message})
466 def create(self, request, docid, lib):
468 bi_json = request.POST['contents']
469 revision = request.POST['revision']
471 if request.POST.has_key('message'):
472 msg = u"$USER$ " + request.PUT['message']
474 msg = u"$AUTO$ Dublin core update."
476 current = lib.document(docid, request.user.username)
477 orig = lib.document_for_rev(revision)
480 return response.EntityConflict().django_response({
481 "reason": "out-of-date",
482 "provided": orig.revision,
483 "latest": current.revision })
485 xmldoc = parser.WLDocument.from_string(current.data('xml'))
486 document.book_info = dcparser.BookInfo.from_json(bi_json)
489 ndoc = current.quickwrite('xml', \
490 document.serialize().encode('utf-8'),\
491 message=msg, user=request.user.username)
494 # return the new revision number
498 "previous_revision": current.revision,
499 "revision": ndoc.revision,
500 'timestamp': ndoc.revision.timestamp,
501 "url": reverse("docdc_view", args=[ndoc.id])
504 if ndoc: lib._rollback()
506 except RevisionNotFound:
507 return response.EntityNotFound().django_response()
509 class MergeHandler(BaseHandler):
510 allowed_methods = ('POST',)
512 @validate_form(forms.MergeRequestForm, 'POST')
514 def create(self, request, form, docid, lib):
515 """Create a new document revision from the information provided by user"""
517 target_rev = form.cleaned_data['target_revision']
519 doc = lib.document(docid)
520 udoc = doc.take(request.user.username)
522 if target_rev == 'latest':
523 target_rev = udoc.revision
525 if str(udoc.revision) != target_rev:
526 # user think doesn't know he has an old version
529 # Updating is teorericly ok, but we need would
530 # have to force a refresh. Sharing may be not safe,
531 # 'cause it doesn't always result in update.
533 # In other words, we can't lie about the resource's state
534 # So we should just yield and 'out-of-date' conflict
535 # and let the client ask again with updated info.
537 # NOTE: this could result in a race condition, when there
538 # are 2 instances of the same user editing the same document.
539 # Instance "A" trying to update, and instance "B" always changing
540 # the document right before "A". The anwser to this problem is
541 # for the "A" to request a merge from 'latest' and then
542 # check the parent revisions in response, if he actually
543 # merge from where he thinks he should. If not, the client SHOULD
544 # update his internal state.
545 return response.EntityConflict().django_response({
546 "reason": "out-of-date",
547 "provided": target_rev,
548 "latest": udoc.revision })
550 if not request.user.has_perm('explorer.document.can_share'):
551 # User is not permitted to make a merge, right away
552 # So we instead create a pull request in the database
554 prq, created = PullRequest.objects.get_or_create(
555 source_revision = str(udoc.revision),
557 'comitter': request.user,
560 'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
564 return response.RequestAccepted().django_response(\
565 ticket_status=prq.status, \
566 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
567 except IntegrityError, e:
568 return response.InternalError().django_response()
570 if form.cleaned_data['type'] == 'update':
571 # update is always performed from the file branch
573 success, changed = udoc.update(request.user.username)
575 if form.cleaned_data['type'] == 'share':
576 success, changed = udoc.share(form.cleaned_data['message'])
579 return response.EntityConflict().django_response({
580 'reason': 'merge-failure',
584 return response.SuccessNoContent().django_response()
586 nudoc = udoc.latest()
588 return response.SuccessAllOk().django_response({
590 "parent_user_resivion": udoc.revision,
591 "parent_revision": doc.revision,
592 "revision": nudoc.revision,
593 'timestamp': nudoc.revision.timestamp,