1 # -*- encoding: utf-8 -*-
5 __author__= "Ćukasz Rekucki"
6 __date__ = "$2009-09-25 15:49:50$"
7 __doc__ = "Module documentation."
9 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from datetime import date
14 from django.core.urlresolvers import reverse
15 from django.utils import simplejson as json
19 from librarian import dcparser
22 from explorer.models import PullRequest, GalleryForDocument
25 import api.forms as forms
26 import api.response as response
27 from api.utils import validate_form, hglibrary, natural_order
28 from api.models import PartCache
34 log = logging.getLogger('platforma.api')
38 # Document List Handlers
40 class BasicLibraryHandler(AnonymousBaseHandler):
41 allowed_methods = ('GET',)
44 def read(self, request, lib):
45 """Return the list of documents."""
47 'url': reverse('document_view', args=[docid]),
48 'name': docid } for docid in lib.documents() ]
50 return {'documents' : document_list}
53 class LibraryHandler(BaseHandler):
54 allowed_methods = ('GET', 'POST')
55 anonymous = BasicLibraryHandler
58 def read(self, request, lib):
59 """Return the list of documents."""
63 for docid in lib.documents():
65 'url': reverse('document_view', args=[docid]),
70 parts = PartCache.objects.defer('part_id')\
71 .values_list('part_id', 'document_id').distinct()
73 document_tree = dict(documents)
75 for part, docid in parts:
76 # this way, we won't display broken links
77 if not documents.has_key(part):
78 log.info("NOT FOUND: %s", part)
81 parent = documents[docid]
82 child = documents[part]
84 # not top-level anymore
85 document_tree.pop(part)
86 parent['parts'].append(child)
88 for doc in documents.itervalues():
89 doc['parts'].sort(key=natural_order(lambda d: d['name']))
91 return {'documents': sorted(document_tree.itervalues(),
92 key=natural_order(lambda d: d['name']) ) }
94 @validate_form(forms.DocumentUploadForm, 'POST')
96 def create(self, request, form, lib):
97 """Create a new document."""
99 if form.cleaned_data['ocr_data']:
100 data = form.cleaned_data['ocr_data']
102 data = request.FILES['ocr_file'].read().decode('utf-8')
105 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
107 if form.cleaned_data['generate_dc']:
108 data = librarian.wrap_text(data, unicode(date.today()))
110 docid = form.cleaned_data['bookname']
115 log.info("DOCID %s", docid)
116 doc = lib.document_create(docid)
117 # document created, but no content yet
120 doc = doc.quickwrite('xml', data.encode('utf-8'),
121 '$AUTO$ XML data uploaded.', user=request.user.username)
124 # rollback branch creation
126 raise LibraryException(traceback.format_exc())
128 url = reverse('document_view', args=[doc.id])
130 return response.EntityCreated().django_response(\
134 'revision': doc.revision },
138 except LibraryException, e:
140 return response.InternalError().django_response(\
141 {'exception': traceback.format_exc()} )
142 except DocumentAlreadyExists:
143 # Document is already there
144 return response.EntityConflict().django_response(\
145 {"reason": "Document %s already exists." % docid})
150 class BasicDocumentHandler(AnonymousBaseHandler):
151 allowed_methods = ('GET',)
154 def read(self, request, docid, lib):
156 doc = lib.document(docid)
157 except RevisionNotFound:
162 'html_url': reverse('dochtml_view', args=[doc.id]),
163 'text_url': reverse('doctext_view', args=[doc.id]),
164 'dc_url': reverse('docdc_view', args=[doc.id]),
165 'public_revision': doc.revision,
173 class DocumentHandler(BaseHandler):
174 allowed_methods = ('GET', 'PUT')
175 anonymous = BasicDocumentHandler
178 def read(self, request, docid, lib):
179 """Read document's meta data"""
180 log.info(u"Read %s (%s)" % (docid, type(docid)) )
182 doc = lib.document(docid)
183 udoc = doc.take(request.user.username)
184 except RevisionNotFound, e:
185 return response.EntityNotFound().django_response({
186 'exception': type(e), 'message': e.message,
189 # is_shared = udoc.ancestorof(doc)
190 # is_uptodate = is_shared or shared.ancestorof(document)
194 'html_url': reverse('dochtml_view', args=[udoc.id]),
195 'text_url': reverse('doctext_view', args=[udoc.id]),
196 'dc_url': reverse('docdc_view', args=[udoc.id]),
197 'gallery_url': reverse('docgallery_view', args=[udoc.id]),
198 'merge_url': reverse('docmerge_view', args=[udoc.id]),
199 'user_revision': udoc.revision,
200 'user_timestamp': udoc.revision.timestamp,
201 'public_revision': doc.revision,
202 'public_timestamp': doc.revision.timestamp,
208 def update(self, request, docid, lib):
209 """Update information about the document, like display not"""
214 class DocumentHTMLHandler(BaseHandler):
215 allowed_methods = ('GET')
218 def read(self, request, docid, lib):
219 """Read document as html text"""
221 revision = request.GET.get('revision', 'latest')
223 if revision == 'latest':
224 document = lib.document(docid)
226 document = lib.document_for_rev(revision)
228 if document.id != docid:
229 return response.BadRequest().django_response({'reason': 'name-mismatch',
230 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
232 return librarian.html.transform(document.data('xml'), is_file=False, parse_dublincore=False)
233 except (EntryNotFound, RevisionNotFound), e:
234 return response.EntityNotFound().django_response({
235 'exception': type(e), 'message': e.message})
242 class DocumentGalleryHandler(BaseHandler):
243 allowed_methods = ('GET')
245 def read(self, request, docid):
246 """Read meta-data about scans for gallery of this document."""
249 for assoc in GalleryForDocument.objects.filter(document=docid):
250 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
252 if not os.path.isdir(dirpath):
253 log.info(u"[WARNING]: missing gallery %s", dirpath)
256 gallery = {'name': assoc.name, 'pages': []}
258 for file in os.listdir(dirpath):
259 file = file.decode('utf-8')
262 name, ext = os.path.splitext(os.path.basename(file))
264 if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
265 log.info("Ignoring: %s %s", name, ext)
268 url = settings.MEDIA_URL + assoc.subpath + u'/' + file;
269 gallery['pages'].append(url)
271 gallery['pages'].sort()
272 galleries.append(gallery)
280 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
284 class DocumentTextHandler(BaseHandler):
285 allowed_methods = ('GET', 'POST')
288 def read(self, request, docid, lib):
289 """Read document as raw text"""
290 revision = request.GET.get('revision', 'latest')
292 if revision == 'latest':
293 document = lib.document(docid)
295 document = lib.document_for_rev(revision)
297 if document.id != docid:
298 return response.BadRequest().django_response({'reason': 'name-mismatch',
299 'message': 'Provided revision is not valid for this document'})
301 # TODO: some finer-grained access control
302 return document.data('xml')
303 except (EntryNotFound, RevisionNotFound), e:
304 return response.EntityNotFound().django_response({
305 'exception': type(e), 'message': e.message})
308 def create(self, request, docid, lib):
310 data = request.POST['contents']
311 revision = request.POST['revision']
313 if request.POST.has_key('message'):
314 msg = u"$USER$ " + request.POST['message']
316 msg = u"$AUTO$ XML content update."
318 current = lib.document(docid, request.user.username)
319 orig = lib.document_for_rev(revision)
322 return response.EntityConflict().django_response({
323 "reason": "out-of-date",
324 "provided_revision": orig.revision,
325 "latest_revision": current.revision })
327 # try to find any Xinclude tags
328 includes = [m.groupdict()['link'] for m in (re.finditer(\
329 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
331 log.info("INCLUDES: %s", includes)
333 # TODO: provide useful routines to make this simpler
334 def xml_update_action(lib, resolve):
336 f = lib._fileopen(resolve('parts'), 'r')
337 stored_includes = json.loads(f.read())
342 if stored_includes != includes:
343 f = lib._fileopen(resolve('parts'), 'w+')
344 f.write(json.dumps(includes))
347 lib._fileadd(resolve('parts'))
349 # update the parts cache
350 PartCache.update_cache(docid, current.owner,\
351 stored_includes, includes)
353 # now that the parts are ok, write xml
354 f = lib._fileopen(resolve('xml'), 'w+')
355 f.write(data.encode('utf-8'))
359 ndoc = current.invoke_and_commit(\
360 xml_update_action, lambda d: (msg, current.owner) )
363 # return the new revision number
364 return response.SuccessAllOk().django_response({
367 "previous_revision": current.revision,
368 "revision": ndoc.revision,
369 'timestamp': ndoc.revision.timestamp,
370 "url": reverse("doctext_view", args=[ndoc.id])
373 if ndoc: lib._rollback()
375 except RevisionNotFound, e:
376 return response.EntityNotFound(mimetype="text/plain").\
377 django_response(e.message)
381 # Dublin Core handlers
383 # @requires librarian
385 class DocumentDublinCoreHandler(BaseHandler):
386 allowed_methods = ('GET', 'POST')
389 def read(self, request, docid, lib):
390 """Read document as raw text"""
392 revision = request.GET.get('revision', 'latest')
394 if revision == 'latest':
395 doc = lib.document(docid)
397 doc = lib.document_for_rev(revision)
400 if document.id != docid:
401 return response.BadRequest().django_response({'reason': 'name-mismatch',
402 'message': 'Provided revision is not valid for this document'})
404 bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
405 return bookinfo.serialize()
406 except (EntryNotFound, RevisionNotFound), e:
407 return response.EntityNotFound().django_response({
408 'exception': type(e), 'message': e.message})
411 def create(self, request, docid, lib):
413 bi_json = request.POST['contents']
414 revision = request.POST['revision']
416 if request.POST.has_key('message'):
417 msg = u"$USER$ " + request.PUT['message']
419 msg = u"$AUTO$ Dublin core update."
421 current = lib.document(docid, request.user.username)
422 orig = lib.document_for_rev(revision)
425 return response.EntityConflict().django_response({
426 "reason": "out-of-date",
427 "provided": orig.revision,
428 "latest": current.revision })
430 xmldoc = parser.WLDocument.from_string(current.data('xml'))
431 document.book_info = dcparser.BookInfo.from_json(bi_json)
434 ndoc = current.quickwrite('xml', \
435 document.serialize().encode('utf-8'),\
436 message=msg, user=request.user.username)
439 # return the new revision number
443 "previous_revision": current.revision,
444 "revision": ndoc.revision,
445 'timestamp': ndoc.revision.timestamp,
446 "url": reverse("docdc_view", args=[ndoc.id])
449 if ndoc: lib._rollback()
451 except RevisionNotFound:
452 return response.EntityNotFound().django_response()
454 class MergeHandler(BaseHandler):
455 allowed_methods = ('POST',)
457 @validate_form(forms.MergeRequestForm, 'POST')
459 def create(self, request, form, docid, lib):
460 """Create a new document revision from the information provided by user"""
462 target_rev = form.cleaned_data['target_revision']
464 doc = lib.document(docid)
465 udoc = doc.take(request.user.username)
467 if target_rev == 'latest':
468 target_rev = udoc.revision
470 if str(udoc.revision) != target_rev:
471 # user think doesn't know he has an old version
474 # Updating is teorericly ok, but we need would
475 # have to force a refresh. Sharing may be not safe,
476 # 'cause it doesn't always result in update.
478 # In other words, we can't lie about the resource's state
479 # So we should just yield and 'out-of-date' conflict
480 # and let the client ask again with updated info.
482 # NOTE: this could result in a race condition, when there
483 # are 2 instances of the same user editing the same document.
484 # Instance "A" trying to update, and instance "B" always changing
485 # the document right before "A". The anwser to this problem is
486 # for the "A" to request a merge from 'latest' and then
487 # check the parent revisions in response, if he actually
488 # merge from where he thinks he should. If not, the client SHOULD
489 # update his internal state.
490 return response.EntityConflict().django_response({
491 "reason": "out-of-date",
492 "provided": target_rev,
493 "latest": udoc.revision })
495 if not request.user.has_perm('explorer.book.can_share'):
496 # User is not permitted to make a merge, right away
497 # So we instead create a pull request in the database
499 comitter=request.user,
501 source_revision = str(udoc.revision),
503 comment = form.cleaned_data['message'] or '$AUTO$ Document shared.'
507 return response.RequestAccepted().django_response(\
508 ticket_status=prq.status, \
509 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
511 if form.cleaned_data['type'] == 'update':
512 # update is always performed from the file branch
514 success, changed = udoc.update(request.user.username)
516 if form.cleaned_data['type'] == 'share':
517 success, changed = udoc.share(form.cleaned_data['message'])
520 return response.EntityConflict().django_response({
521 'reason': 'merge-failure',
525 return response.SuccessNoContent().django_response()
527 nudoc = udoc.latest()
529 return response.SuccessAllOk().django_response({
531 "parent_user_resivion": udoc.revision,
532 "parent_revision": doc.revision,
533 "revision": nudoc.revision,
534 'timestamp': nudoc.revision.timestamp,