1 # -*- encoding: utf-8 -*-
5 __author__= "Ćukasz Rekucki"
6 __date__ = "$2009-09-25 15:49:50$"
7 __doc__ = "Module documentation."
9 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from datetime import date
14 from django.core.urlresolvers import reverse
15 from django.utils import simplejson as json
19 from librarian import dcparser
22 from explorer.models import PullRequest, GalleryForDocument
25 import api.forms as forms
26 import api.response as response
27 from api.utils import validate_form, hglibrary, natural_order
28 from api.models import PartCache
34 log = logging.getLogger('platforma.api')
38 # Document List Handlers
40 class BasicLibraryHandler(AnonymousBaseHandler):
41 allowed_methods = ('GET',)
44 def read(self, request, lib):
45 """Return the list of documents."""
47 'url': reverse('document_view', args=[docid]),
48 'name': docid } for docid in lib.documents() ]
50 return {'documents' : document_list}
53 class LibraryHandler(BaseHandler):
54 allowed_methods = ('GET', 'POST')
55 anonymous = BasicLibraryHandler
58 def read(self, request, lib):
59 """Return the list of documents."""
63 for docid in lib.documents():
64 docid = docid.decode('utf-8')
66 'url': reverse('document_view', args=[docid]),
71 parts = PartCache.objects.defer('part_id')\
72 .values_list('part_id', 'document_id').distinct()
74 document_tree = dict(documents)
76 for part, docid in parts:
77 # this way, we won't display broken links
78 if not documents.has_key(part):
79 log.info("NOT FOUND: %s", part)
82 parent = documents[docid]
83 child = documents[part]
85 # not top-level anymore
86 document_tree.pop(part)
87 parent['parts'].append(child)
89 for doc in documents.itervalues():
90 doc['parts'].sort(key=natural_order(lambda d: d['name']))
92 return {'documents': sorted(document_tree.itervalues(),
93 key=natural_order(lambda d: d['name']) ) }
95 @validate_form(forms.DocumentUploadForm, 'POST')
97 def create(self, request, form, lib):
98 """Create a new document."""
100 if form.cleaned_data['ocr_data']:
101 data = form.cleaned_data['ocr_data']
103 data = request.FILES['ocr_file'].read().decode('utf-8')
106 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
108 if form.cleaned_data['generate_dc']:
109 data = librarian.wrap_text(data, unicode(date.today()))
111 docid = form.cleaned_data['bookname']
116 log.info("DOCID %s", docid)
117 doc = lib.document_create(docid)
118 # document created, but no content yet
121 doc = doc.quickwrite('xml', data.encode('utf-8'),
122 '$AUTO$ XML data uploaded.', user=request.user.username)
125 # rollback branch creation
127 raise LibraryException(traceback.format_exc())
129 url = reverse('document_view', args=[doc.id])
131 return response.EntityCreated().django_response(\
135 'revision': doc.revision },
139 except LibraryException, e:
141 return response.InternalError().django_response(\
142 {'exception': traceback.format_exc()} )
143 except DocumentAlreadyExists:
144 # Document is already there
145 return response.EntityConflict().django_response(\
146 {"reason": "Document %s already exists." % docid})
151 class BasicDocumentHandler(AnonymousBaseHandler):
152 allowed_methods = ('GET',)
155 def read(self, request, docid, lib):
157 doc = lib.document(docid)
158 except RevisionNotFound:
163 'html_url': reverse('dochtml_view', args=[doc.id]),
164 'text_url': reverse('doctext_view', args=[doc.id]),
165 'dc_url': reverse('docdc_view', args=[doc.id]),
166 'public_revision': doc.revision,
174 class DocumentHandler(BaseHandler):
175 allowed_methods = ('GET', 'PUT')
176 anonymous = BasicDocumentHandler
179 def read(self, request, docid, lib):
180 """Read document's meta data"""
181 log.info("Read %s", docid)
183 doc = lib.document(docid)
184 udoc = doc.take(request.user.username)
185 except RevisionNotFound, e:
186 return response.EntityNotFound().django_response({
187 'exception': type(e), 'message': e.message})
189 # is_shared = udoc.ancestorof(doc)
190 # is_uptodate = is_shared or shared.ancestorof(document)
194 'html_url': reverse('dochtml_view', args=[udoc.id]),
195 'text_url': reverse('doctext_view', args=[udoc.id]),
196 'dc_url': reverse('docdc_view', args=[udoc.id]),
197 'gallery_url': reverse('docgallery_view', args=[udoc.id]),
198 'merge_url': reverse('docmerge_view', args=[udoc.id]),
199 'user_revision': udoc.revision,
200 'user_timestamp': udoc.revision.timestamp,
201 'public_revision': doc.revision,
202 'public_timestamp': doc.revision.timestamp,
208 def update(self, request, docid, lib):
209 """Update information about the document, like display not"""
214 class DocumentHTMLHandler(BaseHandler):
215 allowed_methods = ('GET')
218 def read(self, request, docid, lib):
219 """Read document as html text"""
221 revision = request.GET.get('revision', 'latest')
223 if revision == 'latest':
224 document = lib.document(docid)
226 document = lib.document_for_rev(revision)
228 if document.id != docid:
229 return response.BadRequest().django_response({'reason': 'name-mismatch',
230 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
232 return librarian.html.transform(document.data('xml'), is_file=False, parse_dublincore=False)
233 except (EntryNotFound, RevisionNotFound), e:
234 return response.EntityNotFound().django_response({
235 'exception': type(e), 'message': e.message})
242 class DocumentGalleryHandler(BaseHandler):
243 allowed_methods = ('GET')
245 def read(self, request, docid):
246 """Read meta-data about scans for gallery of this document."""
249 for assoc in GalleryForDocument.objects.filter(document=docid):
250 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
252 if not os.path.isdir(dirpath):
253 log.info(u"[WARNING]: missing gallery %s", dirpath)
256 gallery = {'name': assoc.name, 'pages': []}
258 for file in sorted( os.listdir(dirpath) ):
259 file = file.encode('utf-8')
262 name, ext = os.path.splitext(os.path.basename(file))
264 if ext.lower() not in ['.png', '.jpeg', '.jpg']:
265 log.info("Ignoring: %s %s", name, ext)
268 url = settings.MEDIA_URL + assoc.subpath + u'/' + file.decode('utf-8');
269 gallery['pages'].append(url)
271 galleries.append(gallery)
279 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
283 class DocumentTextHandler(BaseHandler):
284 allowed_methods = ('GET', 'POST')
287 def read(self, request, docid, lib):
288 """Read document as raw text"""
289 revision = request.GET.get('revision', 'latest')
291 if revision == 'latest':
292 document = lib.document(docid)
294 document = lib.document_for_rev(revision)
296 if document.id != docid:
297 return response.BadRequest().django_response({'reason': 'name-mismatch',
298 'message': 'Provided revision is not valid for this document'})
300 # TODO: some finer-grained access control
301 return document.data('xml')
302 except (EntryNotFound, RevisionNotFound), e:
303 return response.EntityNotFound().django_response({
304 'exception': type(e), 'message': e.message})
307 def create(self, request, docid, lib):
309 data = request.POST['contents']
310 revision = request.POST['revision']
312 if request.POST.has_key('message'):
313 msg = u"$USER$ " + request.POST['message']
315 msg = u"$AUTO$ XML content update."
317 current = lib.document(docid, request.user.username)
318 orig = lib.document_for_rev(revision)
321 return response.EntityConflict().django_response({
322 "reason": "out-of-date",
323 "provided_revision": orig.revision,
324 "latest_revision": current.revision })
326 # try to find any Xinclude tags
327 includes = [m.groupdict()['link'] for m in (re.finditer(\
328 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
330 log.info("INCLUDES: %s", includes)
332 # TODO: provide useful routines to make this simpler
333 def xml_update_action(lib, resolve):
335 f = lib._fileopen(resolve('parts'), 'r')
336 stored_includes = json.loads(f.read())
341 if stored_includes != includes:
342 f = lib._fileopen(resolve('parts'), 'w+')
343 f.write(json.dumps(includes))
346 lib._fileadd(resolve('parts'))
348 # update the parts cache
349 PartCache.update_cache(docid, current.owner,\
350 stored_includes, includes)
352 # now that the parts are ok, write xml
353 f = lib._fileopen(resolve('xml'), 'w+')
354 f.write(data.encode('utf-8'))
358 ndoc = current.invoke_and_commit(\
359 xml_update_action, lambda d: (msg, current.owner) )
362 # return the new revision number
363 return response.SuccessAllOk().django_response({
366 "previous_revision": current.revision,
367 "revision": ndoc.revision,
368 'timestamp': ndoc.revision.timestamp,
369 "url": reverse("doctext_view", args=[ndoc.id])
372 if ndoc: lib._rollback()
374 except RevisionNotFound, e:
375 return response.EntityNotFound(mimetype="text/plain").\
376 django_response(e.message)
380 # Dublin Core handlers
382 # @requires librarian
384 class DocumentDublinCoreHandler(BaseHandler):
385 allowed_methods = ('GET', 'POST')
388 def read(self, request, docid, lib):
389 """Read document as raw text"""
391 revision = request.GET.get('revision', 'latest')
393 if revision == 'latest':
394 doc = lib.document(docid)
396 doc = lib.document_for_rev(revision)
399 if document.id != docid:
400 return response.BadRequest().django_response({'reason': 'name-mismatch',
401 'message': 'Provided revision is not valid for this document'})
403 bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
404 return bookinfo.serialize()
405 except (EntryNotFound, RevisionNotFound), e:
406 return response.EntityNotFound().django_response({
407 'exception': type(e), 'message': e.message})
410 def create(self, request, docid, lib):
412 bi_json = request.POST['contents']
413 revision = request.POST['revision']
415 if request.POST.has_key('message'):
416 msg = u"$USER$ " + request.PUT['message']
418 msg = u"$AUTO$ Dublin core update."
420 current = lib.document(docid, request.user.username)
421 orig = lib.document_for_rev(revision)
424 return response.EntityConflict().django_response({
425 "reason": "out-of-date",
426 "provided": orig.revision,
427 "latest": current.revision })
429 xmldoc = parser.WLDocument.from_string(current.data('xml'))
430 document.book_info = dcparser.BookInfo.from_json(bi_json)
433 ndoc = current.quickwrite('xml', \
434 document.serialize().encode('utf-8'),\
435 message=msg, user=request.user.username)
438 # return the new revision number
442 "previous_revision": current.revision,
443 "revision": ndoc.revision,
444 'timestamp': ndoc.revision.timestamp,
445 "url": reverse("docdc_view", args=[ndoc.id])
448 if ndoc: lib._rollback()
450 except RevisionNotFound:
451 return response.EntityNotFound().django_response()
453 class MergeHandler(BaseHandler):
454 allowed_methods = ('POST',)
456 @validate_form(forms.MergeRequestForm, 'POST')
458 def create(self, request, form, docid, lib):
459 """Create a new document revision from the information provided by user"""
461 target_rev = form.cleaned_data['target_revision']
463 doc = lib.document(docid)
464 udoc = doc.take(request.user.username)
466 if target_rev == 'latest':
467 target_rev = udoc.revision
469 if str(udoc.revision) != target_rev:
470 # user think doesn't know he has an old version
473 # Updating is teorericly ok, but we need would
474 # have to force a refresh. Sharing may be not safe,
475 # 'cause it doesn't always result in update.
477 # In other words, we can't lie about the resource's state
478 # So we should just yield and 'out-of-date' conflict
479 # and let the client ask again with updated info.
481 # NOTE: this could result in a race condition, when there
482 # are 2 instances of the same user editing the same document.
483 # Instance "A" trying to update, and instance "B" always changing
484 # the document right before "A". The anwser to this problem is
485 # for the "A" to request a merge from 'latest' and then
486 # check the parent revisions in response, if he actually
487 # merge from where he thinks he should. If not, the client SHOULD
488 # update his internal state.
489 return response.EntityConflict().django_response({
490 "reason": "out-of-date",
491 "provided": target_rev,
492 "latest": udoc.revision })
494 if not request.user.has_perm('explorer.book.can_share'):
495 # User is not permitted to make a merge, right away
496 # So we instead create a pull request in the database
498 comitter=request.user,
500 source_revision = str(udoc.revision),
502 comment = form.cleaned_data['message'] or '$AUTO$ Document shared.'
506 return response.RequestAccepted().django_response(\
507 ticket_status=prq.status, \
508 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
510 if form.cleaned_data['type'] == 'update':
511 # update is always performed from the file branch
513 success, changed = udoc.update(request.user.username)
515 if form.cleaned_data['type'] == 'share':
516 success, changed = udoc.share(form.cleaned_data['message'])
519 return response.EntityConflict().django_response({
520 'reason': 'merge-failure',
524 return response.SuccessNoContent().django_response()
526 nudoc = udoc.latest()
528 return response.SuccessAllOk().django_response({
530 "parent_user_resivion": udoc.revision,
531 "parent_revision": doc.revision,
532 "revision": nudoc.revision,
533 'timestamp': nudoc.revision.timestamp,