1 # -*- encoding: utf-8 -*-
5 __author__= "Ćukasz Rekucki"
6 __date__ = "$2009-09-25 15:49:50$"
7 __doc__ = "Module documentation."
9 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from datetime import date
14 from django.core.urlresolvers import reverse
15 from django.utils import simplejson as json
19 from librarian import dcparser
22 from explorer.models import PullRequest, GalleryForDocument
25 import api.forms as forms
26 import api.response as response
27 from api.utils import validate_form, hglibrary, natural_order
28 from api.models import PartCache
34 log = logging.getLogger('platforma.api')
38 # Document List Handlers
40 class BasicLibraryHandler(AnonymousBaseHandler):
41 allowed_methods = ('GET',)
44 def read(self, request, lib):
45 """Return the list of documents."""
47 'url': reverse('document_view', args=[docid]),
48 'name': docid } for docid in lib.documents() ]
50 return {'documents' : document_list}
53 class LibraryHandler(BaseHandler):
54 allowed_methods = ('GET', 'POST')
55 anonymous = BasicLibraryHandler
58 def read(self, request, lib):
59 """Return the list of documents."""
63 for docid in lib.documents():
64 docid = docid.decode('utf-8')
66 'url': reverse('document_view', args=[docid]),
71 parts = PartCache.objects.defer('part_id')\
72 .values_list('part_id', 'document_id').distinct()
74 document_tree = dict(documents)
76 for part, docid in parts:
77 # this way, we won't display broken links
78 if not documents.has_key(part):
79 log.info("NOT FOUND: %s", part)
82 parent = documents[docid]
83 child = documents[part]
85 # not top-level anymore
86 document_tree.pop(part)
87 parent['parts'].append(child)
89 for doc in documents.itervalues():
90 doc['parts'].sort(key=natural_order(lambda d: d['name']))
92 return {'documents': sorted(document_tree.itervalues(),
93 key=natural_order(lambda d: d['name']) ) }
95 @validate_form(forms.DocumentUploadForm, 'POST')
97 def create(self, request, form, lib):
98 """Create a new document."""
100 if form.cleaned_data['ocr_data']:
101 data = form.cleaned_data['ocr_data']
103 data = request.FILES['ocr_file'].read().decode('utf-8')
105 if form.cleaned_data['generate_dc']:
106 data = librarian.wrap_text(data, unicode(date.today()))
108 docid = form.cleaned_data['bookname']
113 log.info("DOCID %s", docid)
114 doc = lib.document_create(docid)
115 # document created, but no content yet
118 doc = doc.quickwrite('xml', data.encode('utf-8'),
119 '$AUTO$ XML data uploaded.', user=request.user.username)
121 # rollback branch creation
123 raise LibraryException("Exception occured:" + repr(e))
125 url = reverse('document_view', args=[doc.id])
127 return response.EntityCreated().django_response(\
131 'revision': doc.revision },
135 except LibraryException, e:
136 return response.InternalError().django_response(\
137 {'exception': repr(e) })
138 except DocumentAlreadyExists:
139 # Document is already there
140 return response.EntityConflict().django_response(\
141 {"reason": "Document %s already exists." % docid})
146 class BasicDocumentHandler(AnonymousBaseHandler):
147 allowed_methods = ('GET',)
150 def read(self, request, docid, lib):
152 doc = lib.document(docid)
153 except RevisionNotFound:
158 'html_url': reverse('dochtml_view', args=[doc.id]),
159 'text_url': reverse('doctext_view', args=[doc.id]),
160 'dc_url': reverse('docdc_view', args=[doc.id]),
161 'public_revision': doc.revision,
169 class DocumentHandler(BaseHandler):
170 allowed_methods = ('GET', 'PUT')
171 anonymous = BasicDocumentHandler
174 def read(self, request, docid, lib):
175 """Read document's meta data"""
176 log.info("Read %s", docid)
178 doc = lib.document(docid)
179 udoc = doc.take(request.user.username)
180 except RevisionNotFound, e:
181 return response.EntityNotFound().django_response({
182 'exception': type(e), 'message': e.message})
184 # is_shared = udoc.ancestorof(doc)
185 # is_uptodate = is_shared or shared.ancestorof(document)
189 'html_url': reverse('dochtml_view', args=[udoc.id]),
190 'text_url': reverse('doctext_view', args=[udoc.id]),
191 'dc_url': reverse('docdc_view', args=[udoc.id]),
192 'gallery_url': reverse('docgallery_view', args=[udoc.id]),
193 'merge_url': reverse('docmerge_view', args=[udoc.id]),
194 'user_revision': udoc.revision,
195 'user_timestamp': udoc.revision.timestamp,
196 'public_revision': doc.revision,
197 'public_timestamp': doc.revision.timestamp,
203 def update(self, request, docid, lib):
204 """Update information about the document, like display not"""
209 class DocumentHTMLHandler(BaseHandler):
210 allowed_methods = ('GET')
213 def read(self, request, docid, lib):
214 """Read document as html text"""
216 revision = request.GET.get('revision', 'latest')
218 if revision == 'latest':
219 document = lib.document(docid)
221 document = lib.document_for_rev(revision)
223 if document.id != docid:
224 return response.BadRequest().django_response({'reason': 'name-mismatch',
225 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
227 return librarian.html.transform(document.data('xml'), is_file=False, parse_dublincore=False)
228 except (EntryNotFound, RevisionNotFound), e:
229 return response.EntityNotFound().django_response({
230 'exception': type(e), 'message': e.message})
236 from django.core.files.storage import FileSystemStorage
238 class DocumentGalleryHandler(BaseHandler):
239 allowed_methods = ('GET')
241 def read(self, request, docid):
242 """Read meta-data about scans for gallery of this document."""
245 for assoc in GalleryForDocument.objects.filter(document=docid):
246 dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
248 if not os.path.isdir(dirpath):
249 log.info(u"[WARNING]: missing gallery %s", dirpath)
252 gallery = {'name': assoc.name, 'pages': []}
254 for file in sorted(os.listdir(dirpath), key=natural_order()):
256 name, ext = os.path.splitext(os.path.basename(file))
258 if ext.lower() not in ['.png', '.jpeg', '.jpg']:
259 log.info("Ignoring: %s %s", name, ext)
262 url = settings.MEDIA_URL + assoc.subpath + u'/' + file.decode('utf-8');
263 gallery['pages'].append(url)
265 galleries.append(gallery)
273 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
277 class DocumentTextHandler(BaseHandler):
278 allowed_methods = ('GET', 'POST')
281 def read(self, request, docid, lib):
282 """Read document as raw text"""
283 revision = request.GET.get('revision', 'latest')
285 if revision == 'latest':
286 document = lib.document(docid)
288 document = lib.document_for_rev(revision)
290 if document.id != docid:
291 return response.BadRequest().django_response({'reason': 'name-mismatch',
292 'message': 'Provided revision is not valid for this document'})
294 # TODO: some finer-grained access control
295 return document.data('xml')
296 except (EntryNotFound, RevisionNotFound), e:
297 return response.EntityNotFound().django_response({
298 'exception': type(e), 'message': e.message})
301 def create(self, request, docid, lib):
303 data = request.POST['contents']
304 revision = request.POST['revision']
306 if request.POST.has_key('message'):
307 msg = u"$USER$ " + request.POST['message']
309 msg = u"$AUTO$ XML content update."
311 current = lib.document(docid, request.user.username)
312 orig = lib.document_for_rev(revision)
315 return response.EntityConflict().django_response({
316 "reason": "out-of-date",
317 "provided_revision": orig.revision,
318 "latest_revision": current.revision })
320 # try to find any Xinclude tags
321 includes = [m.groupdict()['link'] for m in (re.finditer(\
322 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
324 log.info("INCLUDES: %s", includes)
326 # TODO: provide useful routines to make this simpler
327 def xml_update_action(lib, resolve):
329 f = lib._fileopen(resolve('parts'), 'r')
330 stored_includes = json.loads(f.read())
335 if stored_includes != includes:
336 f = lib._fileopen(resolve('parts'), 'w+')
337 f.write(json.dumps(includes))
340 lib._fileadd(resolve('parts'))
342 # update the parts cache
343 PartCache.update_cache(docid, current.owner,\
344 stored_includes, includes)
346 # now that the parts are ok, write xml
347 f = lib._fileopen(resolve('xml'), 'w+')
348 f.write(data.encode('utf-8'))
352 ndoc = current.invoke_and_commit(\
353 xml_update_action, lambda d: (msg, current.owner) )
356 # return the new revision number
357 return response.SuccessAllOk().django_response({
360 "previous_revision": current.revision,
361 "revision": ndoc.revision,
362 'timestamp': ndoc.revision.timestamp,
363 "url": reverse("doctext_view", args=[ndoc.id])
366 if ndoc: lib._rollback()
368 except RevisionNotFound, e:
369 return response.EntityNotFound(mimetype="text/plain").\
370 django_response(e.message)
374 # Dublin Core handlers
376 # @requires librarian
378 class DocumentDublinCoreHandler(BaseHandler):
379 allowed_methods = ('GET', 'POST')
382 def read(self, request, docid, lib):
383 """Read document as raw text"""
385 revision = request.GET.get('revision', 'latest')
387 if revision == 'latest':
388 doc = lib.document(docid)
390 doc = lib.document_for_rev(revision)
393 if document.id != docid:
394 return response.BadRequest().django_response({'reason': 'name-mismatch',
395 'message': 'Provided revision is not valid for this document'})
397 bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
398 return bookinfo.serialize()
399 except (EntryNotFound, RevisionNotFound), e:
400 return response.EntityNotFound().django_response({
401 'exception': type(e), 'message': e.message})
404 def create(self, request, docid, lib):
406 bi_json = request.POST['contents']
407 revision = request.POST['revision']
409 if request.POST.has_key('message'):
410 msg = u"$USER$ " + request.PUT['message']
412 msg = u"$AUTO$ Dublin core update."
414 current = lib.document(docid, request.user.username)
415 orig = lib.document_for_rev(revision)
418 return response.EntityConflict().django_response({
419 "reason": "out-of-date",
420 "provided": orig.revision,
421 "latest": current.revision })
423 xmldoc = parser.WLDocument.from_string(current.data('xml'))
424 document.book_info = dcparser.BookInfo.from_json(bi_json)
427 ndoc = current.quickwrite('xml', \
428 document.serialize().encode('utf-8'),\
429 message=msg, user=request.user.username)
432 # return the new revision number
436 "previous_revision": current.revision,
437 "revision": ndoc.revision,
438 'timestamp': ndoc.revision.timestamp,
439 "url": reverse("docdc_view", args=[ndoc.id])
442 if ndoc: lib._rollback()
444 except RevisionNotFound:
445 return response.EntityNotFound().django_response()
447 class MergeHandler(BaseHandler):
448 allowed_methods = ('POST',)
450 @validate_form(forms.MergeRequestForm, 'POST')
452 def create(self, request, form, docid, lib):
453 """Create a new document revision from the information provided by user"""
455 target_rev = form.cleaned_data['target_revision']
457 doc = lib.document(docid)
458 udoc = doc.take(request.user.username)
460 if target_rev == 'latest':
461 target_rev = udoc.revision
463 if str(udoc.revision) != target_rev:
464 # user think doesn't know he has an old version
467 # Updating is teorericly ok, but we need would
468 # have to force a refresh. Sharing may be not safe,
469 # 'cause it doesn't always result in update.
471 # In other words, we can't lie about the resource's state
472 # So we should just yield and 'out-of-date' conflict
473 # and let the client ask again with updated info.
475 # NOTE: this could result in a race condition, when there
476 # are 2 instances of the same user editing the same document.
477 # Instance "A" trying to update, and instance "B" always changing
478 # the document right before "A". The anwser to this problem is
479 # for the "A" to request a merge from 'latest' and then
480 # check the parent revisions in response, if he actually
481 # merge from where he thinks he should. If not, the client SHOULD
482 # update his internal state.
483 return response.EntityConflict().django_response({
484 "reason": "out-of-date",
485 "provided": target_rev,
486 "latest": udoc.revision })
488 if not request.user.has_perm('explorer.book.can_share'):
489 # User is not permitted to make a merge, right away
490 # So we instead create a pull request in the database
492 comitter=request.user,
494 source_revision = str(udoc.revision),
496 comment = form.cleaned_data['message'] or '$AUTO$ Document shared.'
500 return response.RequestAccepted().django_response(\
501 ticket_status=prq.status, \
502 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
504 if form.cleaned_data['type'] == 'update':
505 # update is always performed from the file branch
507 success, changed = udoc.update(request.user.username)
509 if form.cleaned_data['type'] == 'share':
510 success, changed = udoc.share(form.cleaned_data['message'])
513 return response.EntityConflict().django_response({
514 'reason': 'merge-failure',
518 return response.SuccessNoContent().django_response()
520 nudoc = udoc.latest()
522 return response.SuccessAllOk().django_response({
524 "parent_user_resivion": udoc.revision,
525 "parent_revision": doc.revision,
526 "revision": nudoc.revision,
527 'timestamp': nudoc.revision.timestamp,