1 # -*- encoding: utf-8 -*-
5 __author__= "Ćukasz Rekucki"
6 __date__ = "$2009-09-25 15:49:50$"
7 __doc__ = "Module documentation."
9 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from datetime import date
14 from django.core.urlresolvers import reverse
15 from django.utils import simplejson as json
19 from librarian import dcparser
22 from explorer.models import PullRequest, GalleryForDocument
25 import api.forms as forms
26 import api.response as response
27 from api.utils import validate_form, hglibrary, natural_order
28 from api.models import PartCache
34 log = logging.getLogger('platforma.api')
38 # Document List Handlers
40 class BasicLibraryHandler(AnonymousBaseHandler):
41 allowed_methods = ('GET',)
44 def read(self, request, lib):
45 """Return the list of documents."""
47 'url': reverse('document_view', args=[docid]),
48 'name': docid } for docid in lib.documents() ]
50 return {'documents' : document_list}
53 class LibraryHandler(BaseHandler):
54 allowed_methods = ('GET', 'POST')
55 anonymous = BasicLibraryHandler
58 def read(self, request, lib):
59 """Return the list of documents."""
63 for docid in lib.documents():
65 'url': reverse('document_view', args=[docid]),
70 parts = PartCache.objects.defer('part_id')\
71 .values_list('part_id', 'document_id').distinct()
73 document_tree = dict(documents)
75 for part, docid in parts:
76 # this way, we won't display broken links
77 if not documents.has_key(part):
78 log.info("NOT FOUND: %s", part)
81 parent = documents[docid]
82 child = documents[part]
84 # not top-level anymore
85 document_tree.pop(part)
86 parent['parts'].append(child)
88 for doc in documents.itervalues():
89 doc['parts'].sort(key=natural_order(lambda d: d['name']))
91 return {'documents': sorted(document_tree.itervalues(),
92 key=natural_order(lambda d: d['name']) ) }
94 @validate_form(forms.DocumentUploadForm, 'POST')
96 def create(self, request, form, lib):
97 """Create a new document."""
99 if form.cleaned_data['ocr_data']:
100 data = form.cleaned_data['ocr_data']
102 data = request.FILES['ocr_file'].read().decode('utf-8')
105 return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
107 if form.cleaned_data['generate_dc']:
108 data = librarian.wrap_text(data, unicode(date.today()))
110 docid = form.cleaned_data['bookname']
115 log.info("DOCID %s", docid)
116 doc = lib.document_create(docid)
117 # document created, but no content yet
120 doc = doc.quickwrite('xml', data.encode('utf-8'),
121 '$AUTO$ XML data uploaded.', user=request.user.username)
124 # rollback branch creation
126 raise LibraryException(traceback.format_exc())
128 url = reverse('document_view', args=[doc.id])
130 return response.EntityCreated().django_response(\
134 'revision': doc.revision },
138 except LibraryException, e:
140 return response.InternalError().django_response(\
141 {'exception': traceback.format_exc()} )
142 except DocumentAlreadyExists:
143 # Document is already there
144 return response.EntityConflict().django_response(\
145 {"reason": "Document %s already exists." % docid})
150 class BasicDocumentHandler(AnonymousBaseHandler):
151 allowed_methods = ('GET',)
154 def read(self, request, docid, lib):
156 doc = lib.document(docid)
157 except RevisionNotFound:
162 'html_url': reverse('dochtml_view', args=[doc.id]),
163 'text_url': reverse('doctext_view', args=[doc.id]),
164 'dc_url': reverse('docdc_view', args=[doc.id]),
165 'public_revision': doc.revision,
173 class DocumentHandler(BaseHandler):
174 allowed_methods = ('GET', 'PUT')
175 anonymous = BasicDocumentHandler
178 def read(self, request, docid, lib):
179 """Read document's meta data"""
180 log.info(u"Read %s (%s)" % (docid, type(docid)) )
182 doc = lib.document(docid)
183 udoc = doc.take(request.user.username)
184 except RevisionNotFound, e:
185 return response.EntityNotFound().django_response({
186 'exception': type(e), 'message': e.message,
189 # is_shared = udoc.ancestorof(doc)
190 # is_uptodate = is_shared or shared.ancestorof(document)
194 'html_url': reverse('dochtml_view', args=[udoc.id]),
195 'text_url': reverse('doctext_view', args=[udoc.id]),
196 'dc_url': reverse('docdc_view', args=[udoc.id]),
197 'gallery_url': reverse('docgallery_view', args=[udoc.id]),
198 'merge_url': reverse('docmerge_view', args=[udoc.id]),
199 'user_revision': udoc.revision,
200 'user_timestamp': udoc.revision.timestamp,
201 'public_revision': doc.revision,
202 'public_timestamp': doc.revision.timestamp,
208 def update(self, request, docid, lib):
209 """Update information about the document, like display not"""
214 class DocumentHTMLHandler(BaseHandler):
215 allowed_methods = ('GET')
218 def read(self, request, docid, lib):
219 """Read document as html text"""
221 revision = request.GET.get('revision', 'latest')
223 if revision == 'latest':
224 document = lib.document(docid)
226 document = lib.document_for_rev(revision)
228 if document.id != docid:
229 return response.BadRequest().django_response({'reason': 'name-mismatch',
230 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
232 return librarian.html.transform(document.data('xml'), is_file=False, parse_dublincore=False)
233 except (EntryNotFound, RevisionNotFound), e:
234 return response.EntityNotFound().django_response({
235 'reason': 'not-found', 'message': e.message})
236 except librarian.ParseError, e:
237 return response.InternalError().django_response({
238 'reason': 'xml-parse-error', 'message': e.message })
244 class DocumentGalleryHandler(BaseHandler):
245 allowed_methods = ('GET')
248 def read(self, request, docid):
249 """Read meta-data about scans for gallery of this document."""
251 from urllib import quote
253 for assoc in GalleryForDocument.objects.filter(document=docid):
254 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
256 if not os.path.isdir(dirpath):
257 log.warn(u"[WARNING]: missing gallery %s", dirpath)
260 gallery = {'name': assoc.name, 'pages': []}
262 for file in os.listdir(dirpath):
263 if not isinstance(file, unicode):
264 log.warn(u"File %r is gallery %r is not unicode. Ommiting."\
268 name, ext = os.path.splitext(os.path.basename(file))
270 if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
271 log.info(u"Ignoring: %s %s", name, ext)
274 url = settings.MEDIA_URL + assoc.subpath + u'/' + file;
275 gallery['pages'].append( quote(url.encode('utf-8')) )
277 gallery['pages'].sort()
278 galleries.append(gallery)
286 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
290 class DocumentTextHandler(BaseHandler):
291 allowed_methods = ('GET', 'POST')
294 def read(self, request, docid, lib):
295 """Read document as raw text"""
296 revision = request.GET.get('revision', 'latest')
298 if revision == 'latest':
299 document = lib.document(docid)
301 document = lib.document_for_rev(revision)
303 if document.id != docid:
304 return response.BadRequest().django_response({'reason': 'name-mismatch',
305 'message': 'Provided revision is not valid for this document'})
307 # TODO: some finer-grained access control
308 return document.data('xml')
309 except (EntryNotFound, RevisionNotFound), e:
310 return response.EntityNotFound().django_response({
311 'exception': type(e), 'message': e.message})
314 def create(self, request, docid, lib):
316 data = request.POST['contents']
317 revision = request.POST['revision']
319 if request.POST.has_key('message'):
320 msg = u"$USER$ " + request.POST['message']
322 msg = u"$AUTO$ XML content update."
324 current = lib.document(docid, request.user.username)
325 orig = lib.document_for_rev(revision)
328 return response.EntityConflict().django_response({
329 "reason": "out-of-date",
330 "provided_revision": orig.revision,
331 "latest_revision": current.revision })
333 # try to find any Xinclude tags
334 includes = [m.groupdict()['link'] for m in (re.finditer(\
335 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
337 log.info("INCLUDES: %s", includes)
339 # TODO: provide useful routines to make this simpler
340 def xml_update_action(lib, resolve):
342 f = lib._fileopen(resolve('parts'), 'r')
343 stored_includes = json.loads(f.read())
348 if stored_includes != includes:
349 f = lib._fileopen(resolve('parts'), 'w+')
350 f.write(json.dumps(includes))
353 lib._fileadd(resolve('parts'))
355 # update the parts cache
356 PartCache.update_cache(docid, current.owner,\
357 stored_includes, includes)
359 # now that the parts are ok, write xml
360 f = lib._fileopen(resolve('xml'), 'w+')
361 f.write(data.encode('utf-8'))
365 ndoc = current.invoke_and_commit(\
366 xml_update_action, lambda d: (msg, current.owner) )
369 # return the new revision number
370 return response.SuccessAllOk().django_response({
373 "previous_revision": current.revision,
374 "revision": ndoc.revision,
375 'timestamp': ndoc.revision.timestamp,
376 "url": reverse("doctext_view", args=[ndoc.id])
379 if ndoc: lib._rollback()
381 except RevisionNotFound, e:
382 return response.EntityNotFound(mimetype="text/plain").\
383 django_response(e.message)
387 # Dublin Core handlers
389 # @requires librarian
391 class DocumentDublinCoreHandler(BaseHandler):
392 allowed_methods = ('GET', 'POST')
395 def read(self, request, docid, lib):
396 """Read document as raw text"""
398 revision = request.GET.get('revision', 'latest')
400 if revision == 'latest':
401 doc = lib.document(docid)
403 doc = lib.document_for_rev(revision)
406 if document.id != docid:
407 return response.BadRequest().django_response({'reason': 'name-mismatch',
408 'message': 'Provided revision is not valid for this document'})
410 bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
411 return bookinfo.serialize()
412 except (EntryNotFound, RevisionNotFound), e:
413 return response.EntityNotFound().django_response({
414 'exception': type(e), 'message': e.message})
417 def create(self, request, docid, lib):
419 bi_json = request.POST['contents']
420 revision = request.POST['revision']
422 if request.POST.has_key('message'):
423 msg = u"$USER$ " + request.PUT['message']
425 msg = u"$AUTO$ Dublin core update."
427 current = lib.document(docid, request.user.username)
428 orig = lib.document_for_rev(revision)
431 return response.EntityConflict().django_response({
432 "reason": "out-of-date",
433 "provided": orig.revision,
434 "latest": current.revision })
436 xmldoc = parser.WLDocument.from_string(current.data('xml'))
437 document.book_info = dcparser.BookInfo.from_json(bi_json)
440 ndoc = current.quickwrite('xml', \
441 document.serialize().encode('utf-8'),\
442 message=msg, user=request.user.username)
445 # return the new revision number
449 "previous_revision": current.revision,
450 "revision": ndoc.revision,
451 'timestamp': ndoc.revision.timestamp,
452 "url": reverse("docdc_view", args=[ndoc.id])
455 if ndoc: lib._rollback()
457 except RevisionNotFound:
458 return response.EntityNotFound().django_response()
460 class MergeHandler(BaseHandler):
461 allowed_methods = ('POST',)
463 @validate_form(forms.MergeRequestForm, 'POST')
465 def create(self, request, form, docid, lib):
466 """Create a new document revision from the information provided by user"""
468 target_rev = form.cleaned_data['target_revision']
470 doc = lib.document(docid)
471 udoc = doc.take(request.user.username)
473 if target_rev == 'latest':
474 target_rev = udoc.revision
476 if str(udoc.revision) != target_rev:
477 # user think doesn't know he has an old version
480 # Updating is teorericly ok, but we need would
481 # have to force a refresh. Sharing may be not safe,
482 # 'cause it doesn't always result in update.
484 # In other words, we can't lie about the resource's state
485 # So we should just yield and 'out-of-date' conflict
486 # and let the client ask again with updated info.
488 # NOTE: this could result in a race condition, when there
489 # are 2 instances of the same user editing the same document.
490 # Instance "A" trying to update, and instance "B" always changing
491 # the document right before "A". The anwser to this problem is
492 # for the "A" to request a merge from 'latest' and then
493 # check the parent revisions in response, if he actually
494 # merge from where he thinks he should. If not, the client SHOULD
495 # update his internal state.
496 return response.EntityConflict().django_response({
497 "reason": "out-of-date",
498 "provided": target_rev,
499 "latest": udoc.revision })
501 if not request.user.has_perm('explorer.book.can_share'):
502 # User is not permitted to make a merge, right away
503 # So we instead create a pull request in the database
505 comitter=request.user,
507 source_revision = str(udoc.revision),
509 comment = form.cleaned_data['message'] or '$AUTO$ Document shared.'
513 return response.RequestAccepted().django_response(\
514 ticket_status=prq.status, \
515 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
517 if form.cleaned_data['type'] == 'update':
518 # update is always performed from the file branch
520 success, changed = udoc.update(request.user.username)
522 if form.cleaned_data['type'] == 'share':
523 success, changed = udoc.share(form.cleaned_data['message'])
526 return response.EntityConflict().django_response({
527 'reason': 'merge-failure',
531 return response.SuccessNoContent().django_response()
533 nudoc = udoc.latest()
535 return response.SuccessAllOk().django_response({
537 "parent_user_resivion": udoc.revision,
538 "parent_revision": doc.revision,
539 "revision": nudoc.revision,
540 'timestamp': nudoc.revision.timestamp,