1 # -*- encoding: utf-8 -*-
5 __author__= "Łukasz Rekucki"
6 __date__ = "$2009-09-25 15:49:50$"
7 __doc__ = "Module documentation."
9 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from datetime import date
14 from django.core.urlresolvers import reverse
15 from django.utils import simplejson as json
19 from librarian import dcparser
22 from explorer.models import PullRequest, GalleryForDocument
25 import api.forms as forms
26 import api.response as response
27 from api.utils import validate_form, hglibrary, natural_order
28 from api.models import PartCache
34 log = logging.getLogger('platforma.api')
38 # Document List Handlers
40 class BasicLibraryHandler(AnonymousBaseHandler):
41 allowed_methods = ('GET',)
44 def read(self, request, lib):
45 """Return the list of documents."""
47 'url': reverse('document_view', args=[docid]),
48 'name': docid } for docid in lib.documents() ]
50 return {'documents' : document_list}
53 class LibraryHandler(BaseHandler):
54 allowed_methods = ('GET', 'POST')
55 anonymous = BasicLibraryHandler
58 def read(self, request, lib):
59 """Return the list of documents."""
63 for docid in lib.documents():
64 docid = docid.decode('utf-8')
66 'url': reverse('document_view', args=[docid]),
71 parts = PartCache.objects.defer('part_id')\
72 .values_list('part_id', 'document_id').distinct()
74 document_tree = dict(documents)
76 for part, docid in parts:
77 # this way, we won't display broken links
78 if not documents.has_key(part):
79 log.info("NOT FOUND: %s", part)
82 parent = documents[docid]
83 child = documents[part]
85 # not top-level anymore
86 document_tree.pop(part)
87 parent['parts'].append(child)
89 for doc in documents.itervalues():
90 doc['parts'].sort(key=natural_order(lambda d: d['name']))
92 return {'documents': sorted(document_tree.itervalues(),
93 key=natural_order(lambda d: d['name']) ) }
95 @validate_form(forms.DocumentUploadForm, 'POST')
97 def create(self, request, form, lib):
98 """Create a new document."""
100 if form.cleaned_data['ocr_data']:
101 data = form.cleaned_data['ocr_data']
103 data = request.FILES['ocr_file'].read().decode('utf-8')
106 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
108 if form.cleaned_data['generate_dc']:
109 data = librarian.wrap_text(data, unicode(date.today()))
111 docid = form.cleaned_data['bookname']
116 log.info("DOCID %s", docid)
117 doc = lib.document_create(docid)
118 # document created, but no content yet
121 doc = doc.quickwrite('xml', data.encode('utf-8'),
122 '$AUTO$ XML data uploaded.', user=request.user.username)
125 # rollback branch creation
127 raise LibraryException(traceback.format_exc())
129 url = reverse('document_view', args=[doc.id])
131 return response.EntityCreated().django_response(\
135 'revision': doc.revision },
139 except LibraryException, e:
141 return response.InternalError().django_response(\
142 {'exception': traceback.format_exc()} )
143 except DocumentAlreadyExists:
144 # Document is already there
145 return response.EntityConflict().django_response(\
146 {"reason": "Document %s already exists." % docid})
151 class BasicDocumentHandler(AnonymousBaseHandler):
152 allowed_methods = ('GET',)
155 def read(self, request, docid, lib):
157 doc = lib.document(docid)
158 except RevisionNotFound:
163 'html_url': reverse('dochtml_view', args=[doc.id]),
164 'text_url': reverse('doctext_view', args=[doc.id]),
165 'dc_url': reverse('docdc_view', args=[doc.id]),
166 'public_revision': doc.revision,
174 class DocumentHandler(BaseHandler):
175 allowed_methods = ('GET', 'PUT')
176 anonymous = BasicDocumentHandler
179 def read(self, request, docid, lib):
180 """Read document's meta data"""
181 log.info("Read %s", docid)
183 doc = lib.document(docid)
184 udoc = doc.take(request.user.username)
185 except RevisionNotFound, e:
186 return response.EntityNotFound().django_response({
187 'exception': type(e), 'message': e.message})
189 # is_shared = udoc.ancestorof(doc)
190 # is_uptodate = is_shared or shared.ancestorof(document)
194 'html_url': reverse('dochtml_view', args=[udoc.id]),
195 'text_url': reverse('doctext_view', args=[udoc.id]),
196 'dc_url': reverse('docdc_view', args=[udoc.id]),
197 'gallery_url': reverse('docgallery_view', args=[udoc.id]),
198 'merge_url': reverse('docmerge_view', args=[udoc.id]),
199 'user_revision': udoc.revision,
200 'user_timestamp': udoc.revision.timestamp,
201 'public_revision': doc.revision,
202 'public_timestamp': doc.revision.timestamp,
208 def update(self, request, docid, lib):
209 """Update information about the document, like display not"""
214 class DocumentHTMLHandler(BaseHandler):
215 allowed_methods = ('GET')
218 def read(self, request, docid, lib):
219 """Read document as html text"""
221 revision = request.GET.get('revision', 'latest')
223 if revision == 'latest':
224 document = lib.document(docid)
226 document = lib.document_for_rev(revision)
228 if document.id != docid:
229 return response.BadRequest().django_response({'reason': 'name-mismatch',
230 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
232 return librarian.html.transform(document.data('xml'), is_file=False, parse_dublincore=False)
233 except (EntryNotFound, RevisionNotFound), e:
234 return response.EntityNotFound().django_response({
235 'exception': type(e), 'message': e.message})
241 from django.core.files.storage import FileSystemStorage
243 class DocumentGalleryHandler(BaseHandler):
244 allowed_methods = ('GET')
246 def read(self, request, docid):
247 """Read meta-data about scans for gallery of this document."""
250 for assoc in GalleryForDocument.objects.filter(document=docid):
251 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
253 if not os.path.isdir(dirpath):
254 log.info(u"[WARNING]: missing gallery %s", dirpath)
257 gallery = {'name': assoc.name, 'pages': []}
259 for file in sorted(os.listdir(dirpath), key=natural_order()):
261 name, ext = os.path.splitext(os.path.basename(file))
263 if ext.lower() not in ['.png', '.jpeg', '.jpg']:
264 log.info("Ignoring: %s %s", name, ext)
267 url = settings.MEDIA_URL + assoc.subpath + u'/' + file.decode('utf-8');
268 gallery['pages'].append(url)
270 galleries.append(gallery)
278 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
282 class DocumentTextHandler(BaseHandler):
283 allowed_methods = ('GET', 'POST')
286 def read(self, request, docid, lib):
287 """Read document as raw text"""
288 revision = request.GET.get('revision', 'latest')
290 if revision == 'latest':
291 document = lib.document(docid)
293 document = lib.document_for_rev(revision)
295 if document.id != docid:
296 return response.BadRequest().django_response({'reason': 'name-mismatch',
297 'message': 'Provided revision is not valid for this document'})
299 # TODO: some finer-grained access control
300 return document.data('xml')
301 except (EntryNotFound, RevisionNotFound), e:
302 return response.EntityNotFound().django_response({
303 'exception': type(e), 'message': e.message})
306 def create(self, request, docid, lib):
308 data = request.POST['contents']
309 revision = request.POST['revision']
311 if request.POST.has_key('message'):
312 msg = u"$USER$ " + request.POST['message']
314 msg = u"$AUTO$ XML content update."
316 current = lib.document(docid, request.user.username)
317 orig = lib.document_for_rev(revision)
320 return response.EntityConflict().django_response({
321 "reason": "out-of-date",
322 "provided_revision": orig.revision,
323 "latest_revision": current.revision })
325 # try to find any Xinclude tags
326 includes = [m.groupdict()['link'] for m in (re.finditer(\
327 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
329 log.info("INCLUDES: %s", includes)
331 # TODO: provide useful routines to make this simpler
332 def xml_update_action(lib, resolve):
334 f = lib._fileopen(resolve('parts'), 'r')
335 stored_includes = json.loads(f.read())
340 if stored_includes != includes:
341 f = lib._fileopen(resolve('parts'), 'w+')
342 f.write(json.dumps(includes))
345 lib._fileadd(resolve('parts'))
347 # update the parts cache
348 PartCache.update_cache(docid, current.owner,\
349 stored_includes, includes)
351 # now that the parts are ok, write xml
352 f = lib._fileopen(resolve('xml'), 'w+')
353 f.write(data.encode('utf-8'))
357 ndoc = current.invoke_and_commit(\
358 xml_update_action, lambda d: (msg, current.owner) )
361 # return the new revision number
362 return response.SuccessAllOk().django_response({
365 "previous_revision": current.revision,
366 "revision": ndoc.revision,
367 'timestamp': ndoc.revision.timestamp,
368 "url": reverse("doctext_view", args=[ndoc.id])
371 if ndoc: lib._rollback()
373 except RevisionNotFound, e:
374 return response.EntityNotFound(mimetype="text/plain").\
375 django_response(e.message)
379 # Dublin Core handlers
381 # @requires librarian
383 class DocumentDublinCoreHandler(BaseHandler):
384 allowed_methods = ('GET', 'POST')
387 def read(self, request, docid, lib):
388 """Read document as raw text"""
390 revision = request.GET.get('revision', 'latest')
392 if revision == 'latest':
393 doc = lib.document(docid)
395 doc = lib.document_for_rev(revision)
398 if document.id != docid:
399 return response.BadRequest().django_response({'reason': 'name-mismatch',
400 'message': 'Provided revision is not valid for this document'})
402 bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
403 return bookinfo.serialize()
404 except (EntryNotFound, RevisionNotFound), e:
405 return response.EntityNotFound().django_response({
406 'exception': type(e), 'message': e.message})
409 def create(self, request, docid, lib):
411 bi_json = request.POST['contents']
412 revision = request.POST['revision']
414 if request.POST.has_key('message'):
415 msg = u"$USER$ " + request.PUT['message']
417 msg = u"$AUTO$ Dublin core update."
419 current = lib.document(docid, request.user.username)
420 orig = lib.document_for_rev(revision)
423 return response.EntityConflict().django_response({
424 "reason": "out-of-date",
425 "provided": orig.revision,
426 "latest": current.revision })
428 xmldoc = parser.WLDocument.from_string(current.data('xml'))
429 document.book_info = dcparser.BookInfo.from_json(bi_json)
432 ndoc = current.quickwrite('xml', \
433 document.serialize().encode('utf-8'),\
434 message=msg, user=request.user.username)
437 # return the new revision number
441 "previous_revision": current.revision,
442 "revision": ndoc.revision,
443 'timestamp': ndoc.revision.timestamp,
444 "url": reverse("docdc_view", args=[ndoc.id])
447 if ndoc: lib._rollback()
449 except RevisionNotFound:
450 return response.EntityNotFound().django_response()
452 class MergeHandler(BaseHandler):
453 allowed_methods = ('POST',)
455 @validate_form(forms.MergeRequestForm, 'POST')
457 def create(self, request, form, docid, lib):
458 """Create a new document revision from the information provided by user"""
460 target_rev = form.cleaned_data['target_revision']
462 doc = lib.document(docid)
463 udoc = doc.take(request.user.username)
465 if target_rev == 'latest':
466 target_rev = udoc.revision
468 if str(udoc.revision) != target_rev:
469 # user think doesn't know he has an old version
472 # Updating is teorericly ok, but we need would
473 # have to force a refresh. Sharing may be not safe,
474 # 'cause it doesn't always result in update.
476 # In other words, we can't lie about the resource's state
477 # So we should just yield and 'out-of-date' conflict
478 # and let the client ask again with updated info.
480 # NOTE: this could result in a race condition, when there
481 # are 2 instances of the same user editing the same document.
482 # Instance "A" trying to update, and instance "B" always changing
483 # the document right before "A". The anwser to this problem is
484 # for the "A" to request a merge from 'latest' and then
485 # check the parent revisions in response, if he actually
486 # merge from where he thinks he should. If not, the client SHOULD
487 # update his internal state.
488 return response.EntityConflict().django_response({
489 "reason": "out-of-date",
490 "provided": target_rev,
491 "latest": udoc.revision })
493 if not request.user.has_perm('explorer.book.can_share'):
494 # User is not permitted to make a merge, right away
495 # So we instead create a pull request in the database
497 comitter=request.user,
499 source_revision = str(udoc.revision),
501 comment = form.cleaned_data['message'] or '$AUTO$ Document shared.'
505 return response.RequestAccepted().django_response(\
506 ticket_status=prq.status, \
507 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
509 if form.cleaned_data['type'] == 'update':
510 # update is always performed from the file branch
512 success, changed = udoc.update(request.user.username)
514 if form.cleaned_data['type'] == 'share':
515 success, changed = udoc.share(form.cleaned_data['message'])
518 return response.EntityConflict().django_response({
519 'reason': 'merge-failure',
523 return response.SuccessNoContent().django_response()
525 nudoc = udoc.latest()
527 return response.SuccessAllOk().django_response({
529 "parent_user_resivion": udoc.revision,
530 "parent_revision": doc.revision,
531 "revision": nudoc.revision,
532 'timestamp': nudoc.revision.timestamp,