aef1e9c1a6864e06a5e0e6a812809becfc721880
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2 import os.path
3
4 import logging
5 log = logging.getLogger('platforma.api.library')
6
7 __author__= "Ɓukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
10
11 from piston.handler import BaseHandler, AnonymousBaseHandler
12
13 from datetime import date
14
15 from django.core.urlresolvers import reverse
16 from django.db import IntegrityError
17
18 import librarian
19 import librarian.html
20
21 from wlrepo import *
22 from api.models import PullRequest
23 from explorer.models import GalleryForDocument
24
25 # internal imports
26 import api.forms as forms
27 import api.response as response
28 from api.utils import validate_form, hglibrary, natural_order
29 from api.models import PartCache, PullRequest
30
31 #
32 import settings
33
34
35 def is_prq(username):
36     return username.startswith('$prq-')
37
38 def prq_for_user(username):
39     try:
40         return PullRequest.objects.get(id=int(username[5:]))
41     except:
42         return None
43
44 def check_user(request, user):
45     log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
46     #pull request
47     if is_prq(user):
48         if not request.user.has_perm('api.view_prq'):
49             yield response.AccessDenied().django_response({
50                 'reason': 'access-denied',
51                 'message': "You don't have enough priviliges to view pull requests."
52             })
53     # other users
54     elif request.user.username != user:
55         if not request.user.has_perm('api.view_other_document'):
56             yield response.AccessDenied().django_response({
57                 'reason': 'access-denied',
58                 'message': "You don't have enough priviliges to view other people's document."
59             })
60     pass
61
62 #
63 # Document List Handlers
64 #
65 # TODO: security check
66 class BasicLibraryHandler(AnonymousBaseHandler):
67     allowed_methods = ('GET',)
68
69     @hglibrary
70     def read(self, request, lib):
71         """Return the list of documents."""       
72         document_list = [{
73             'url': reverse('document_view', args=[docid]),
74             'name': docid } for docid in lib.documents() ]
75         return {'documents' : document_list}
76         
77 #
78 # This handler controlls the document collection
79 #
80 class LibraryHandler(BaseHandler):
81     allowed_methods = ('GET', 'POST')
82     anonymous = BasicLibraryHandler
83
84     @hglibrary
85     def read(self, request, lib):
86         """Return the list of documents."""
87
88         documents = {}
89         
90         for docid in lib.documents():            
91             documents[docid] = {
92                 'url': reverse('document_view', args=[docid]),
93                 'name': docid,
94                 'parts': []
95             }
96
97         parts = PartCache.objects.defer('part_id')\
98             .values_list('part_id', 'document_id').distinct()
99        
100         document_tree = dict(documents)
101
102         for part, docid in parts:
103             # this way, we won't display broken links
104             if not documents.has_key(part):
105                 log.info("NOT FOUND: %s", part)
106                 continue
107
108             parent = documents[docid]
109             child = documents[part]
110
111             # not top-level anymore
112             document_tree.pop(part)
113             parent['parts'].append(child)
114         
115         for doc in documents.itervalues():
116             doc['parts'].sort(key=natural_order(lambda d: d['name']))
117             
118         return {'documents': sorted(document_tree.itervalues(),
119             key=natural_order(lambda d: d['name']) ) }
120
121
122     @validate_form(forms.DocumentUploadForm, 'POST')
123     @hglibrary
124     def create(self, request, form, lib):
125         """Create a new document."""       
126
127         if form.cleaned_data['ocr_data']:
128             data = form.cleaned_data['ocr_data']
129         else:            
130             data = request.FILES['ocr_file'].read().decode('utf-8')
131
132         if data is None:
133             return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
134
135         if form.cleaned_data['generate_dc']:
136             data = librarian.wrap_text(data, unicode(date.today()))
137
138         docid = form.cleaned_data['bookname']
139
140         try:
141             lock = lib.lock()            
142             try:
143                 log.info("DOCID %s", docid)
144                 doc = lib.document_create(docid)
145                 # document created, but no content yet
146                 try:
147                     doc = doc.quickwrite('xml', data.encode('utf-8'),
148                         '$AUTO$ XML data uploaded.', user=request.user.username)
149                 except Exception,e:
150                     import traceback
151                     # rollback branch creation
152                     lib._rollback()
153                     raise LibraryException(traceback.format_exc())
154
155                 url = reverse('document_view', args=[doc.id])
156
157                 return response.EntityCreated().django_response(\
158                     body = {
159                         'url': url,
160                         'name': doc.id,
161                         'revision': doc.revision },
162                     url = url )            
163             finally:
164                 lock.release()
165         except LibraryException, e:
166             import traceback
167             return response.InternalError().django_response({
168                 "reason": traceback.format_exc()
169             })
170         except DocumentAlreadyExists:
171             # Document is already there
172             return response.EntityConflict().django_response({
173                 "reason": "already-exists",
174                 "message": "Document already exists." % docid
175             })
176
177 #
178 # Document Handlers
179 #
180 class BasicDocumentHandler(AnonymousBaseHandler):
181     allowed_methods = ('GET',)
182
183     @hglibrary
184     def read(self, request, docid, lib):
185         try:    
186             doc = lib.document(docid)
187         except RevisionNotFound:
188             return rc.NOT_FOUND
189
190         result = {
191             'name': doc.id,
192             'html_url': reverse('dochtml_view', args=[doc.id]),
193             'text_url': reverse('doctext_view', args=[doc.id]),
194             'dc_url': reverse('docdc_view', args=[doc.id]),
195             'public_revision': doc.revision,
196         }
197
198         return result
199
200 #
201 # Document Meta Data
202 #
203 class DocumentHandler(BaseHandler):
204     allowed_methods = ('GET', 'PUT')
205     anonymous = BasicDocumentHandler
206
207     @validate_form(forms.DocumentRetrieveForm, 'GET')
208     @hglibrary
209     def read(self, request, form, docid, lib):
210         """Read document's meta data"""       
211         log.info(u"User '%s' wants to %s(%s) as %s" % \
212             (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
213
214         user = form.cleaned_data['user'] or request.user.username
215         rev = form.cleaned_data['revision'] or 'latest'
216
217         for error in check_user(request, user):
218             return error
219             
220         try:
221             doc = lib.document(docid, user, rev=rev)
222         except RevisionMismatch, e:
223             # the document exists, but the revision is bad
224             return response.EntityNotFound().django_response({
225                 'reason': 'revision-mismatch',
226                 'message': e.message,
227                 'docid': docid,
228                 'user': user,
229             })
230         except RevisionNotFound, e:
231             # the user doesn't have this document checked out
232             # or some other weird error occured
233             # try to do the checkout
234             try:
235                 if user == request.user.username:
236                     mdoc = lib.document(docid)
237                     doc = mdoc.take(user)
238                 elif is_prq(user):
239                     prq = prq_for_user(user)
240                     # commiter's document
241                     prq_doc = lib.document_for_rev(prq.source_revision)
242                     doc = prq_doc.take(user)
243                 else:
244                     return response.EntityNotFound().django_response({
245                         'reason': 'document-not-found',
246                         'message': e.message,
247                         'docid': docid,
248                         'user': user,
249                     })
250             except RevisionNotFound, e:
251                 return response.EntityNotFound().django_response({
252                     'reason': 'document-not-found',
253                     'message': e.message,
254                     'docid': docid,
255                     'user': user
256                 })
257
258         return {
259             'name': doc.id,
260             'user': user,
261             'html_url': reverse('dochtml_view', args=[doc.id]),
262             'text_url': reverse('doctext_view', args=[doc.id]),
263             # 'dc_url': reverse('docdc_view', args=[doc.id]),
264             'gallery_url': reverse('docgallery_view', args=[doc.id]),
265             'merge_url': reverse('docmerge_view', args=[doc.id]),
266             'revision': doc.revision,
267             'timestamp': doc.revision.timestamp,
268             # 'public_revision': doc.revision,
269             # 'public_timestamp': doc.revision.timestamp,
270         }   
271
272     
273 #    @hglibrary
274 #    def update(self, request, docid, lib):
275 #        """Update information about the document, like display not"""
276 #        return
277 #
278 #
279 #
280 class DocumentHTMLHandler(BaseHandler):
281     allowed_methods = ('GET')
282
283     @validate_form(forms.DocumentRetrieveForm, 'GET')
284     @hglibrary
285     def read(self, request, form, docid, lib, stylesheet='partial'):
286         """Read document as html text"""
287         try:
288             revision = form.cleaned_data['revision']
289             user = form.cleaned_data['user'] or request.user.username
290             document = lib.document_for_rev(revision)
291
292             if document.id != docid:
293                 return response.BadRequest().django_response({
294                     'reason': 'name-mismatch',
295                     'message': 'Provided revision is not valid for this document'
296                 })
297
298             if document.owner != user:
299                 return response.BadRequest().django_response({
300                     'reason': 'user-mismatch',
301                     'message': "Provided revision doesn't belong to user %s" % user
302                 })
303
304             for error in check_user(request, user):
305                 return error
306
307             return librarian.html.transform(document.data('xml'), is_file=False, \
308                 parse_dublincore=False, stylesheet=stylesheet,\
309                 options={
310                     "with-paths": 'boolean(1)',                    
311                 })
312                 
313         except (EntryNotFound, RevisionNotFound), e:
314             return response.EntityNotFound().django_response({
315                 'reason': 'not-found', 'message': e.message})
316         except librarian.ParseError, e:
317             return response.InternalError().django_response({
318                 'reason': 'xml-parse-error', 'message': e.message })
319
320 #
321 # Image Gallery
322 #
323
324 class DocumentGalleryHandler(BaseHandler):
325     allowed_methods = ('GET')
326     
327     
328     def read(self, request, docid):
329         """Read meta-data about scans for gallery of this document."""
330         galleries = []
331         from urllib import quote
332
333         for assoc in GalleryForDocument.objects.filter(document=docid):
334             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
335
336             if not os.path.isdir(dirpath):
337                 log.warn(u"[WARNING]: missing gallery %s", dirpath)
338                 continue
339
340             gallery = {'name': assoc.name, 'pages': []}
341             
342             for file in os.listdir(dirpath):
343                 if not isinstance(file, unicode):
344                     try:
345                         file = file.decode('utf-8')
346                     except:
347                         log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
348                             % (file, dirpath) )
349                         file = None
350
351                 if file is not None:
352                     name, ext = os.path.splitext(os.path.basename(file))
353
354                     if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
355                         log.warn(u"Ignoring: %s %s", name, ext)
356                         url = None
357
358                     url = settings.MEDIA_URL + assoc.subpath + u'/' + file
359                 
360                 if url is None:
361                     url = settings.MEDIA_URL + u'/missing.png'
362                     
363                 gallery['pages'].append( quote(url.encode('utf-8')) )
364
365 #            gallery['pages'].sort()
366             galleries.append(gallery)
367
368         return galleries
369
370
371
372 #
373 # Dublin Core handlers
374 #
375 # @requires librarian
376 #
377 #class DocumentDublinCoreHandler(BaseHandler):
378 #    allowed_methods = ('GET', 'POST')
379 #
380 #    @hglibrary
381 #    def read(self, request, docid, lib):
382 #        """Read document as raw text"""
383 #        try:
384 #            revision = request.GET.get('revision', 'latest')
385 #
386 #            if revision == 'latest':
387 #                doc = lib.document(docid)
388 #            else:
389 #                doc = lib.document_for_rev(revision)
390 #
391 #
392 #            if document.id != docid:
393 #                return response.BadRequest().django_response({'reason': 'name-mismatch',
394 #                    'message': 'Provided revision is not valid for this document'})
395 #
396 #            bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
397 #            return bookinfo.serialize()
398 #        except (EntryNotFound, RevisionNotFound), e:
399 #            return response.EntityNotFound().django_response({
400 #                'exception': type(e), 'message': e.message})
401 #
402 #    @hglibrary
403 #    def create(self, request, docid, lib):
404 #        try:
405 #            bi_json = request.POST['contents']
406 #            revision = request.POST['revision']
407 #
408 #            if request.POST.has_key('message'):
409 #                msg = u"$USER$ " + request.PUT['message']
410 #            else:
411 #                msg = u"$AUTO$ Dublin core update."
412 #
413 #            current = lib.document(docid, request.user.username)
414 #            orig = lib.document_for_rev(revision)
415 #
416 #            if current != orig:
417 #                return response.EntityConflict().django_response({
418 #                        "reason": "out-of-date",
419 #                        "provided": orig.revision,
420 #                        "latest": current.revision })
421 #
422 #            xmldoc = parser.WLDocument.from_string(current.data('xml'))
423 #            document.book_info = dcparser.BookInfo.from_json(bi_json)
424 #
425 #            # zapisz
426 #            ndoc = current.quickwrite('xml', \
427 #                document.serialize().encode('utf-8'),\
428 #                message=msg, user=request.user.username)
429 #
430 #            try:
431 #                # return the new revision number
432 #                return {
433 #                    "document": ndoc.id,
434 #                    "subview": "dc",
435 #                    "previous_revision": current.revision,
436 #                    "revision": ndoc.revision,
437 #                    'timestamp': ndoc.revision.timestamp,
438 #                    "url": reverse("docdc_view", args=[ndoc.id])
439 #                }
440 #            except Exception, e:
441 #                if ndoc: lib._rollback()
442 #                raise e
443 #        except RevisionNotFound:
444 #            return response.EntityNotFound().django_response()
445
446 class MergeHandler(BaseHandler):
447     allowed_methods = ('POST',)
448
449     @validate_form(forms.MergeRequestForm, 'POST')
450     @hglibrary
451     def create(self, request, form, docid, lib):
452         """Create a new document revision from the information provided by user"""
453         revision = form.cleaned_data['revision']
454
455         # fetch the main branch document
456         doc = lib.document(docid)
457
458         # fetch the base document
459         user_doc = lib.document_for_rev(revision)
460         base_doc = user_doc.latest()
461
462         if base_doc != user_doc:
463             return response.EntityConflict().django_response({
464                 "reason": "out-of-date",
465                 "provided": str(user_doc.revision),
466                 "latest": str(base_doc.revision)
467             })      
468
469         if form.cleaned_data['type'] == 'update':
470             # update is always performed from the file branch
471             # to the user branch
472             user_doc_new = base_doc.update(request.user.username)
473
474             if user_doc_new == user_doc:
475                 return response.SuccessAllOk().django_response({
476                     "result": "no-op"
477                 })
478                 
479             # shared document is the same
480             doc_new = doc
481
482         if form.cleaned_data['type'] == 'share':
483             if not base_doc.up_to_date():
484                 return response.BadRequest().django_response({
485                     "reason": "not-fast-forward",
486                     "message": "You must first update your branch to the latest version."
487                 })
488
489             if base_doc.parentof(doc) or base_doc.has_parent_from(doc):
490                 return response.SuccessAllOk().django_response({
491                     "result": "no-op"
492                 })
493
494             # check for unresolved conflicts            
495             if base_doc.has_conflict_marks():
496                 return response.BadRequest().django_response({                    
497                     "reason": "unresolved-conflicts",
498                     "message": "There are unresolved conflicts in your file. Fix them, and try again."
499                 })
500
501             if not request.user.has_perm('api.share_document'):
502                 # User is not permitted to make a merge, right away
503                 # So we instead create a pull request in the database
504                 try:
505                     prq, created = PullRequest.objects.get_or_create(
506                         comitter = request.user,
507                         document = docid,
508                         status = "N",
509                         defaults = {
510                             'source_revision': str(base_doc.revision),
511                             'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
512                         }
513                     )
514
515                     # there can't be 2 pending request from same user
516                     # for the same document
517                     if not created:
518                         prq.source_revision = str(base_doc.revision)
519                         prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
520                         prq.save()
521
522                     return response.RequestAccepted().django_response(\
523                         ticket_status=prq.status, \
524                         ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
525                 except IntegrityError:
526                     return response.EntityConflict().django_response({
527                         'reason': 'request-already-exist'
528                     })
529
530             changed = base_doc.share(form.cleaned_data['message'])
531
532             # update shared version if needed
533             if changed:
534                 doc_new = doc.latest()
535             else:
536                 doc_new = doc
537
538             # the user wersion is the same
539             user_doc_new = base_doc
540
541         # The client can compare parent_revision to revision
542         # to see if he needs to update user's view        
543         # Same goes for shared view
544         
545         return response.SuccessAllOk().django_response({
546             "result": "success",
547             "name": user_doc_new.id,
548             "user": user_doc_new.owner,
549
550             "revision": user_doc_new.revision,
551             'timestamp': user_doc_new.revision.timestamp,
552
553             "parent_revision": user_doc.revision,
554             "parent_timestamp": user_doc.revision.timestamp,
555
556             "shared_revision": doc_new.revision,
557             "shared_timestamp": doc_new.revision.timestamp,
558
559             "shared_parent_revision": doc.revision,
560             "shared_parent_timestamp": doc.revision.timestamp,
561         })