1 # -*- encoding: utf-8 -*-
5 __author__= "Łukasz Rekucki"
6 __date__ = "$2009-09-25 15:49:50$"
7 __doc__ = "Module documentation."
9 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from datetime import date
14 from django.core.urlresolvers import reverse
15 from django.utils import simplejson as json
19 from librarian import dcparser
22 from explorer.models import PullRequest, GalleryForDocument
25 import api.forms as forms
26 import api.response as response
27 from api.utils import validate_form, hglibrary, natural_order
28 from api.models import PartCache
34 log = logging.getLogger('platforma.api')
38 # Document List Handlers
40 class BasicLibraryHandler(AnonymousBaseHandler):
41 allowed_methods = ('GET',)
44 def read(self, request, lib):
45 """Return the list of documents."""
47 'url': reverse('document_view', args=[docid]),
48 'name': docid } for docid in lib.documents() ]
50 return {'documents' : document_list}
53 class LibraryHandler(BaseHandler):
54 allowed_methods = ('GET', 'POST')
55 anonymous = BasicLibraryHandler
58 def read(self, request, lib):
59 """Return the list of documents."""
63 for docid in lib.documents():
65 'url': reverse('document_view', args=[docid]),
70 parts = PartCache.objects.defer('part_id')\
71 .values_list('part_id', 'document_id').distinct()
73 document_tree = dict(documents)
75 for part, docid in parts:
76 # this way, we won't display broken links
77 if not documents.has_key(part):
78 log.info("NOT FOUND: %s", part)
81 parent = documents[docid]
82 child = documents[part]
84 # not top-level anymore
85 document_tree.pop(part)
86 parent['parts'].append(child)
88 for doc in documents.itervalues():
89 doc['parts'].sort(key=natural_order(lambda d: d['name']))
91 return {'documents': sorted(document_tree.itervalues(),
92 key=natural_order(lambda d: d['name']) ) }
94 @validate_form(forms.DocumentUploadForm, 'POST')
96 def create(self, request, form, lib):
97 """Create a new document."""
99 if form.cleaned_data['ocr_data']:
100 data = form.cleaned_data['ocr_data']
102 data = request.FILES['ocr_file'].read().decode('utf-8')
105 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
107 if form.cleaned_data['generate_dc']:
108 data = librarian.wrap_text(data, unicode(date.today()))
110 docid = form.cleaned_data['bookname']
115 log.info("DOCID %s", docid)
116 doc = lib.document_create(docid)
117 # document created, but no content yet
120 doc = doc.quickwrite('xml', data.encode('utf-8'),
121 '$AUTO$ XML data uploaded.', user=request.user.username)
124 # rollback branch creation
126 raise LibraryException(traceback.format_exc())
128 url = reverse('document_view', args=[doc.id])
130 return response.EntityCreated().django_response(\
134 'revision': doc.revision },
138 except LibraryException, e:
140 return response.InternalError().django_response(\
141 {'exception': traceback.format_exc()} )
142 except DocumentAlreadyExists:
143 # Document is already there
144 return response.EntityConflict().django_response(\
145 {"reason": "Document %s already exists." % docid})
150 class BasicDocumentHandler(AnonymousBaseHandler):
151 allowed_methods = ('GET',)
154 def read(self, request, docid, lib):
156 doc = lib.document(docid)
157 except RevisionNotFound:
162 'html_url': reverse('dochtml_view', args=[doc.id]),
163 'text_url': reverse('doctext_view', args=[doc.id]),
164 'dc_url': reverse('docdc_view', args=[doc.id]),
165 'public_revision': doc.revision,
173 class DocumentHandler(BaseHandler):
174 allowed_methods = ('GET', 'PUT')
175 anonymous = BasicDocumentHandler
178 def read(self, request, docid, lib):
179 """Read document's meta data"""
180 log.info(u"Read %s (%s)" % (docid, type(docid)) )
182 doc = lib.document(docid)
183 udoc = doc.take(request.user.username)
184 except RevisionNotFound, e:
185 return response.EntityNotFound().django_response({
186 'exception': type(e), 'message': e.message,
189 # is_shared = udoc.ancestorof(doc)
190 # is_uptodate = is_shared or shared.ancestorof(document)
194 'html_url': reverse('dochtml_view', args=[udoc.id]),
195 'text_url': reverse('doctext_view', args=[udoc.id]),
196 'dc_url': reverse('docdc_view', args=[udoc.id]),
197 'gallery_url': reverse('docgallery_view', args=[udoc.id]),
198 'merge_url': reverse('docmerge_view', args=[udoc.id]),
199 'user_revision': udoc.revision,
200 'user_timestamp': udoc.revision.timestamp,
201 'public_revision': doc.revision,
202 'public_timestamp': doc.revision.timestamp,
208 def update(self, request, docid, lib):
209 """Update information about the document, like display not"""
214 class DocumentHTMLHandler(BaseHandler):
215 allowed_methods = ('GET')
218 def read(self, request, docid, lib):
219 """Read document as html text"""
221 revision = request.GET.get('revision', 'latest')
223 if revision == 'latest':
224 document = lib.document(docid)
226 document = lib.document_for_rev(revision)
228 if document.id != docid:
229 return response.BadRequest().django_response({'reason': 'name-mismatch',
230 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
232 return librarian.html.transform(document.data('xml'), is_file=False, parse_dublincore=False)
233 except (EntryNotFound, RevisionNotFound), e:
234 return response.EntityNotFound().django_response({
235 'exception': type(e), 'message': e.message})
242 class DocumentGalleryHandler(BaseHandler):
243 allowed_methods = ('GET')
246 def read(self, request, docid):
247 """Read meta-data about scans for gallery of this document."""
249 from urllib import quote
251 for assoc in GalleryForDocument.objects.filter(document=docid):
252 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
254 if not os.path.isdir(dirpath):
255 log.warn(u"[WARNING]: missing gallery %s", dirpath)
258 gallery = {'name': assoc.name, 'pages': []}
260 for file in os.listdir(dirpath):
261 if not isinstance(file, unicode):
262 log.warn(u"File %r is gallery %r is not unicode. Ommiting."\
266 name, ext = os.path.splitext(os.path.basename(file))
268 if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
269 log.info(u"Ignoring: %s %s", name, ext)
272 url = settings.MEDIA_URL + assoc.subpath + u'/' + file;
273 gallery['pages'].append( quote(url.encode('utf-8')) )
275 gallery['pages'].sort()
276 galleries.append(gallery)
284 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
288 class DocumentTextHandler(BaseHandler):
289 allowed_methods = ('GET', 'POST')
292 def read(self, request, docid, lib):
293 """Read document as raw text"""
294 revision = request.GET.get('revision', 'latest')
296 if revision == 'latest':
297 document = lib.document(docid)
299 document = lib.document_for_rev(revision)
301 if document.id != docid:
302 return response.BadRequest().django_response({'reason': 'name-mismatch',
303 'message': 'Provided revision is not valid for this document'})
305 # TODO: some finer-grained access control
306 return document.data('xml')
307 except (EntryNotFound, RevisionNotFound), e:
308 return response.EntityNotFound().django_response({
309 'exception': type(e), 'message': e.message})
312 def create(self, request, docid, lib):
314 data = request.POST['contents']
315 revision = request.POST['revision']
317 if request.POST.has_key('message'):
318 msg = u"$USER$ " + request.POST['message']
320 msg = u"$AUTO$ XML content update."
322 current = lib.document(docid, request.user.username)
323 orig = lib.document_for_rev(revision)
326 return response.EntityConflict().django_response({
327 "reason": "out-of-date",
328 "provided_revision": orig.revision,
329 "latest_revision": current.revision })
331 # try to find any Xinclude tags
332 includes = [m.groupdict()['link'] for m in (re.finditer(\
333 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
335 log.info("INCLUDES: %s", includes)
337 # TODO: provide useful routines to make this simpler
338 def xml_update_action(lib, resolve):
340 f = lib._fileopen(resolve('parts'), 'r')
341 stored_includes = json.loads(f.read())
346 if stored_includes != includes:
347 f = lib._fileopen(resolve('parts'), 'w+')
348 f.write(json.dumps(includes))
351 lib._fileadd(resolve('parts'))
353 # update the parts cache
354 PartCache.update_cache(docid, current.owner,\
355 stored_includes, includes)
357 # now that the parts are ok, write xml
358 f = lib._fileopen(resolve('xml'), 'w+')
359 f.write(data.encode('utf-8'))
363 ndoc = current.invoke_and_commit(\
364 xml_update_action, lambda d: (msg, current.owner) )
367 # return the new revision number
368 return response.SuccessAllOk().django_response({
371 "previous_revision": current.revision,
372 "revision": ndoc.revision,
373 'timestamp': ndoc.revision.timestamp,
374 "url": reverse("doctext_view", args=[ndoc.id])
377 if ndoc: lib._rollback()
379 except RevisionNotFound, e:
380 return response.EntityNotFound(mimetype="text/plain").\
381 django_response(e.message)
385 # Dublin Core handlers
387 # @requires librarian
389 class DocumentDublinCoreHandler(BaseHandler):
390 allowed_methods = ('GET', 'POST')
393 def read(self, request, docid, lib):
394 """Read document as raw text"""
396 revision = request.GET.get('revision', 'latest')
398 if revision == 'latest':
399 doc = lib.document(docid)
401 doc = lib.document_for_rev(revision)
404 if document.id != docid:
405 return response.BadRequest().django_response({'reason': 'name-mismatch',
406 'message': 'Provided revision is not valid for this document'})
408 bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
409 return bookinfo.serialize()
410 except (EntryNotFound, RevisionNotFound), e:
411 return response.EntityNotFound().django_response({
412 'exception': type(e), 'message': e.message})
415 def create(self, request, docid, lib):
417 bi_json = request.POST['contents']
418 revision = request.POST['revision']
420 if request.POST.has_key('message'):
421 msg = u"$USER$ " + request.PUT['message']
423 msg = u"$AUTO$ Dublin core update."
425 current = lib.document(docid, request.user.username)
426 orig = lib.document_for_rev(revision)
429 return response.EntityConflict().django_response({
430 "reason": "out-of-date",
431 "provided": orig.revision,
432 "latest": current.revision })
434 xmldoc = parser.WLDocument.from_string(current.data('xml'))
435 document.book_info = dcparser.BookInfo.from_json(bi_json)
438 ndoc = current.quickwrite('xml', \
439 document.serialize().encode('utf-8'),\
440 message=msg, user=request.user.username)
443 # return the new revision number
447 "previous_revision": current.revision,
448 "revision": ndoc.revision,
449 'timestamp': ndoc.revision.timestamp,
450 "url": reverse("docdc_view", args=[ndoc.id])
453 if ndoc: lib._rollback()
455 except RevisionNotFound:
456 return response.EntityNotFound().django_response()
458 class MergeHandler(BaseHandler):
459 allowed_methods = ('POST',)
461 @validate_form(forms.MergeRequestForm, 'POST')
463 def create(self, request, form, docid, lib):
464 """Create a new document revision from the information provided by user"""
466 target_rev = form.cleaned_data['target_revision']
468 doc = lib.document(docid)
469 udoc = doc.take(request.user.username)
471 if target_rev == 'latest':
472 target_rev = udoc.revision
474 if str(udoc.revision) != target_rev:
475 # user think doesn't know he has an old version
478 # Updating is teorericly ok, but we need would
479 # have to force a refresh. Sharing may be not safe,
480 # 'cause it doesn't always result in update.
482 # In other words, we can't lie about the resource's state
483 # So we should just yield and 'out-of-date' conflict
484 # and let the client ask again with updated info.
486 # NOTE: this could result in a race condition, when there
487 # are 2 instances of the same user editing the same document.
488 # Instance "A" trying to update, and instance "B" always changing
489 # the document right before "A". The anwser to this problem is
490 # for the "A" to request a merge from 'latest' and then
491 # check the parent revisions in response, if he actually
492 # merge from where he thinks he should. If not, the client SHOULD
493 # update his internal state.
494 return response.EntityConflict().django_response({
495 "reason": "out-of-date",
496 "provided": target_rev,
497 "latest": udoc.revision })
499 if not request.user.has_perm('explorer.book.can_share'):
500 # User is not permitted to make a merge, right away
501 # So we instead create a pull request in the database
503 comitter=request.user,
505 source_revision = str(udoc.revision),
507 comment = form.cleaned_data['message'] or '$AUTO$ Document shared.'
511 return response.RequestAccepted().django_response(\
512 ticket_status=prq.status, \
513 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
515 if form.cleaned_data['type'] == 'update':
516 # update is always performed from the file branch
518 success, changed = udoc.update(request.user.username)
520 if form.cleaned_data['type'] == 'share':
521 success, changed = udoc.share(form.cleaned_data['message'])
524 return response.EntityConflict().django_response({
525 'reason': 'merge-failure',
529 return response.SuccessNoContent().django_response()
531 nudoc = udoc.latest()
533 return response.SuccessAllOk().django_response({
535 "parent_user_resivion": udoc.revision,
536 "parent_revision": doc.revision,
537 "revision": nudoc.revision,
538 'timestamp': nudoc.revision.timestamp,