06358618308efe14535293be9c6a676ac280d988
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2 import os.path
3
4 import logging
5 log = logging.getLogger('platforma.api.library')
6
7 __author__= "Ɓukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
10
11 from piston.handler import BaseHandler, AnonymousBaseHandler
12
13 import re
14 from datetime import date
15
16 from django.core.urlresolvers import reverse
17 from django.utils import simplejson as json
18 from django.db import IntegrityError
19
20 import librarian
21 import librarian.html
22 from librarian import dcparser, parser
23
24 from wlrepo import *
25 from api.models import PullRequest
26 from explorer.models import GalleryForDocument
27
28 # internal imports
29 import api.forms as forms
30 import api.response as response
31 from api.utils import validate_form, hglibrary, natural_order
32 from api.models import PartCache, PullRequest
33
34 #
35 import settings
36
37
38 def is_prq(username):
39     return username.startswith('$prq-')
40
41 def prq_for_user(username):
42     try:
43         return PullRequest.objects.get(id=int(username[5:]))
44     except:
45         return None
46
47 def check_user(request, user):
48     log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
49     #pull request
50     if is_prq(user):
51         if not request.user.has_perm('api.view_prq'):
52             yield response.AccessDenied().django_response({
53                 'reason': 'access-denied',
54                 'message': "You don't have enough priviliges to view pull requests."
55             })
56     # other users
57     elif request.user.username != user:
58         if not request.user.has_perm('api.view_other_document'):
59             yield response.AccessDenied().django_response({
60                 'reason': 'access-denied',
61                 'message': "You don't have enough priviliges to view other people's document."
62             })
63     pass
64
65 #
66 # Document List Handlers
67 #
68 # TODO: security check
69 class BasicLibraryHandler(AnonymousBaseHandler):
70     allowed_methods = ('GET',)
71
72     @hglibrary
73     def read(self, request, lib):
74         """Return the list of documents."""       
75         document_list = [{
76             'url': reverse('document_view', args=[docid]),
77             'name': docid } for docid in lib.documents() ]
78         return {'documents' : document_list}
79         
80 #
81 # This handler controlls the document collection
82 #
83 class LibraryHandler(BaseHandler):
84     allowed_methods = ('GET', 'POST')
85     anonymous = BasicLibraryHandler
86
87     @hglibrary
88     def read(self, request, lib):
89         """Return the list of documents."""
90
91         documents = {}
92         
93         for docid in lib.documents():            
94             documents[docid] = {
95                 'url': reverse('document_view', args=[docid]),
96                 'name': docid,
97                 'parts': []
98             }
99
100         parts = PartCache.objects.defer('part_id')\
101             .values_list('part_id', 'document_id').distinct()
102        
103         document_tree = dict(documents)
104
105         for part, docid in parts:
106             # this way, we won't display broken links
107             if not documents.has_key(part):
108                 log.info("NOT FOUND: %s", part)
109                 continue
110
111             parent = documents[docid]
112             child = documents[part]
113
114             # not top-level anymore
115             document_tree.pop(part)
116             parent['parts'].append(child)
117         
118         for doc in documents.itervalues():
119             doc['parts'].sort(key=natural_order(lambda d: d['name']))
120             
121         return {'documents': sorted(document_tree.itervalues(),
122             key=natural_order(lambda d: d['name']) ) }
123
124
125     @validate_form(forms.DocumentUploadForm, 'POST')
126     @hglibrary
127     def create(self, request, form, lib):
128         """Create a new document."""       
129
130         if form.cleaned_data['ocr_data']:
131             data = form.cleaned_data['ocr_data']
132         else:            
133             data = request.FILES['ocr_file'].read().decode('utf-8')
134
135         if data is None:
136             return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
137
138         if form.cleaned_data['generate_dc']:
139             data = librarian.wrap_text(data, unicode(date.today()))
140
141         docid = form.cleaned_data['bookname']
142
143         try:
144             lock = lib.lock()            
145             try:
146                 log.info("DOCID %s", docid)
147                 doc = lib.document_create(docid)
148                 # document created, but no content yet
149                 try:
150                     doc = doc.quickwrite('xml', data.encode('utf-8'),
151                         '$AUTO$ XML data uploaded.', user=request.user.username)
152                 except Exception,e:
153                     import traceback
154                     # rollback branch creation
155                     lib._rollback()
156                     raise LibraryException(traceback.format_exc())
157
158                 url = reverse('document_view', args=[doc.id])
159
160                 return response.EntityCreated().django_response(\
161                     body = {
162                         'url': url,
163                         'name': doc.id,
164                         'revision': doc.revision },
165                     url = url )            
166             finally:
167                 lock.release()
168         except LibraryException, e:
169             import traceback
170             return response.InternalError().django_response({
171                 "reason": traceback.format_exc()
172             })
173         except DocumentAlreadyExists:
174             # Document is already there
175             return response.EntityConflict().django_response({
176                 "reason": "already-exists",
177                 "message": "Document already exists." % docid
178             })
179
180 #
181 # Document Handlers
182 #
183 class BasicDocumentHandler(AnonymousBaseHandler):
184     allowed_methods = ('GET',)
185
186     @hglibrary
187     def read(self, request, docid, lib):
188         try:    
189             doc = lib.document(docid)
190         except RevisionNotFound:
191             return rc.NOT_FOUND
192
193         result = {
194             'name': doc.id,
195             'html_url': reverse('dochtml_view', args=[doc.id]),
196             'text_url': reverse('doctext_view', args=[doc.id]),
197             'dc_url': reverse('docdc_view', args=[doc.id]),
198             'public_revision': doc.revision,
199         }
200
201         return result
202
203 #
204 # Document Meta Data
205 #
206 class DocumentHandler(BaseHandler):
207     allowed_methods = ('GET', 'PUT')
208     anonymous = BasicDocumentHandler
209
210     @validate_form(forms.DocumentRetrieveForm, 'GET')
211     @hglibrary
212     def read(self, request, form, docid, lib):
213         """Read document's meta data"""       
214         log.info(u"User '%s' wants to %s(%s) as %s" % \
215             (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
216
217         user = form.cleaned_data['user'] or request.user.username
218         rev = form.cleaned_data['revision'] or 'latest'
219
220         for error in check_user(request, user):
221             return error
222             
223         try:
224             doc = lib.document(docid, user, rev=rev)
225         except RevisionMismatch, e:
226             # the document exists, but the revision is bad
227             return response.EntityNotFound().django_response({
228                 'reason': 'revision-mismatch',
229                 'message': e.message,
230                 'docid': docid,
231                 'user': user,
232             })
233         except RevisionNotFound, e:
234             # the user doesn't have this document checked out
235             # or some other weird error occured
236             # try to do the checkout
237             try:
238                 if user == request.user.username:
239                     mdoc = lib.document(docid)
240                     doc = mdoc.take(user)
241                 elif is_prq(user):
242                     prq = prq_for_user(user)
243                     # commiter's document
244                     prq_doc = lib.document_for_rev(prq.source_revision)
245                     doc = prq_doc.take(user)
246                 else:
247                     return response.EntityNotFound().django_response({
248                         'reason': 'document-not-found',
249                         'message': e.message,
250                         'docid': docid,
251                         'user': user,
252                     })
253             except RevisionNotFound, e:
254                 return response.EntityNotFound().django_response({
255                     'reason': 'document-not-found',
256                     'message': e.message,
257                     'docid': docid,
258                     'user': user
259                 })
260
261         return {
262             'name': doc.id,
263             'user': user,
264             'html_url': reverse('dochtml_view', args=[doc.id]),
265             'text_url': reverse('doctext_view', args=[doc.id]),
266             # 'dc_url': reverse('docdc_view', args=[doc.id]),
267             'gallery_url': reverse('docgallery_view', args=[doc.id]),
268             'merge_url': reverse('docmerge_view', args=[doc.id]),
269             'revision': doc.revision,
270             'timestamp': doc.revision.timestamp,
271             # 'public_revision': doc.revision,
272             # 'public_timestamp': doc.revision.timestamp,
273         }   
274
275     
276 #    @hglibrary
277 #    def update(self, request, docid, lib):
278 #        """Update information about the document, like display not"""
279 #        return
280 #
281 #
282 #
283 class DocumentHTMLHandler(BaseHandler):
284     allowed_methods = ('GET')
285
286     @validate_form(forms.DocumentRetrieveForm, 'GET')
287     @hglibrary
288     def read(self, request, form, docid, lib, stylesheet='partial'):
289         """Read document as html text"""
290         try:
291             revision = form.cleaned_data['revision']
292             user = form.cleaned_data['user'] or request.user.username
293             document = lib.document_for_rev(revision)
294
295             if document.id != docid:
296                 return response.BadRequest().django_response({
297                     'reason': 'name-mismatch',
298                     'message': 'Provided revision is not valid for this document'
299                 })
300
301             if document.owner != user:
302                 return response.BadRequest().django_response({
303                     'reason': 'user-mismatch',
304                     'message': "Provided revision doesn't belong to user %s" % user
305                 })
306
307             for error in check_user(request, user):
308                 return error
309
310             return librarian.html.transform(document.data('xml'), is_file=False, \
311                 parse_dublincore=False, stylesheet=stylesheet,\
312                 options={
313                     "with-paths": 'boolean(1)',                    
314                 })
315                 
316         except (EntryNotFound, RevisionNotFound), e:
317             return response.EntityNotFound().django_response({
318                 'reason': 'not-found', 'message': e.message})
319         except librarian.ParseError, e:
320             return response.InternalError().django_response({
321                 'reason': 'xml-parse-error', 'message': e.message })
322
323 #
324 # Image Gallery
325 #
326
327 class DocumentGalleryHandler(BaseHandler):
328     allowed_methods = ('GET')
329     
330     
331     def read(self, request, docid):
332         """Read meta-data about scans for gallery of this document."""
333         galleries = []
334         from urllib import quote
335
336         for assoc in GalleryForDocument.objects.filter(document=docid):
337             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
338
339             if not os.path.isdir(dirpath):
340                 log.warn(u"[WARNING]: missing gallery %s", dirpath)
341                 continue
342
343             gallery = {'name': assoc.name, 'pages': []}
344             
345             for file in os.listdir(dirpath):
346                 if not isinstance(file, unicode):
347                     try:
348                         file = file.decode('utf-8')
349                     except:
350                         log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
351                             % (file, dirpath) )
352                         file = None
353
354                 if file is not None:
355                     name, ext = os.path.splitext(os.path.basename(file))
356
357                     if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
358                         log.warn(u"Ignoring: %s %s", name, ext)
359                         url = None
360
361                     url = settings.MEDIA_URL + assoc.subpath + u'/' + file
362                 
363                 if url is None:
364                     url = settings.MEDIA_URL + u'/missing.png'
365                     
366                 gallery['pages'].append( quote(url.encode('utf-8')) )
367
368 #            gallery['pages'].sort()
369             galleries.append(gallery)
370
371         return galleries
372
373 #
374 # Document Text View
375 #
376
377 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
378 #
379 #
380 #
381
382 class DocumentTextHandler(BaseHandler):
383     allowed_methods = ('GET', 'POST')
384
385     @validate_form(forms.TextRetrieveForm, 'GET')
386     @hglibrary
387     def read(self, request, form, docid, lib):
388         """Read document as raw text"""        
389         try:
390             revision = form.cleaned_data['revision']
391             part = form.cleaned_data['part']
392             user = form.cleaned_data['user'] or request.user.username            
393             
394             document = lib.document_for_rev(revision)
395             
396             if document.id != docid:
397                 return response.BadRequest().django_response({
398                     'reason': 'name-mismatch',
399                     'message': 'Provided revision is not valid for this document'
400                 })
401
402             if document.owner != user:
403                 return response.BadRequest().django_response({
404                     'reason': 'user-mismatch',
405                     'message': "Provided revision doesn't belong to user %s" % user
406                 })
407
408             for error in check_user(request, user):
409                 return error
410             
411             if not part:                
412                 return document.data('xml')
413             
414             xdoc = parser.WLDocument.from_string(document.data('xml'),\
415                 parse_dublincore=False)
416             ptext = xdoc.part_as_text(part)
417
418             if ptext is None:
419                 return response.EntityNotFound().django_response({
420                       'reason': 'no-part-in-document'                     
421                 })
422
423             return ptext
424         except librarian.ParseError, e:
425             return response.EntityNotFound().django_response({
426                 'reason': 'invalid-document-state',
427                 'exception': type(e),
428                 'message': e.message
429             })
430         except (EntryNotFound, RevisionNotFound), e:
431             return response.EntityNotFound().django_response({
432                 'reason': 'not-found',
433                 'exception': type(e), 'message': e.message
434             })   
435
436     @validate_form(forms.TextUpdateForm, 'POST')
437     @hglibrary
438     def create(self, request, form, docid, lib):
439         try:
440             revision = form.cleaned_data['revision']
441             msg = form.cleaned_data['message']
442             user = form.cleaned_data['user'] or request.user.username
443
444             # do not allow changing not owned documents
445             # (for now... )
446             
447             
448             if user != request.user.username:
449                 return response.AccessDenied().django_response({
450                     'reason': 'insufficient-priviliges',
451                 })
452             
453             current = lib.document(docid, user)
454             orig = lib.document_for_rev(revision)
455
456             if current != orig:
457                 return response.EntityConflict().django_response({
458                         "reason": "out-of-date",
459                         "provided_revision": orig.revision,
460                         "latest_revision": current.revision })
461             
462             if form.cleaned_data.has_key('contents'):
463                 data = form.cleaned_data['contents']
464             else:                               
465                 chunks = form.cleaned_data['chunks']
466                 xdoc = parser.WLDocument.from_string(current.data('xml'))
467                 errors = xdoc.merge_chunks(chunks)
468
469                 if len(errors):
470                     return response.EntityConflict().django_response({
471                             "reason": "invalid-chunks",
472                             "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
473                     })
474
475                 data = xdoc.serialize()
476
477             # try to find any Xinclude tags
478             includes = [m.groupdict()['link'] for m in (re.finditer(\
479                 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
480
481             log.info("INCLUDES: %s", includes)
482
483             # TODO: provide useful routines to make this simpler
484             def xml_update_action(lib, resolve):
485                 try:
486                     f = lib._fileopen(resolve('parts'), 'r')
487                     stored_includes = json.loads(f.read())
488                     f.close()
489                 except:
490                     stored_includes = []
491                 
492                 if stored_includes != includes:
493                     f = lib._fileopen(resolve('parts'), 'w+')
494                     f.write(json.dumps(includes))
495                     f.close()
496
497                     lib._fileadd(resolve('parts'))
498
499                     # update the parts cache
500                     PartCache.update_cache(docid, current.owner,\
501                         stored_includes, includes)
502
503                 # now that the parts are ok, write xml
504                 f = lib._fileopen(resolve('xml'), 'w+')
505                 f.write(data.encode('utf-8'))
506                 f.close()
507
508             ndoc = None
509             ndoc = current.invoke_and_commit(\
510                 xml_update_action, lambda d: (msg, user) )
511
512             try:
513                 # return the new revision number
514                 return response.SuccessAllOk().django_response({
515                     "document": ndoc.id,
516                     "user": user,
517                     "subview": "xml",
518                     "previous_revision": current.revision,
519                     "revision": ndoc.revision,
520                     'timestamp': ndoc.revision.timestamp,
521                     "url": reverse("doctext_view", args=[ndoc.id])
522                 })
523             except Exception, e:
524                 if ndoc: lib._rollback()
525                 raise e        
526         except RevisionNotFound, e:
527             return response.EntityNotFound(mimetype="text/plain").\
528                 django_response(e.message)
529
530
531 #
532 # Dublin Core handlers
533 #
534 # @requires librarian
535 #
536 #class DocumentDublinCoreHandler(BaseHandler):
537 #    allowed_methods = ('GET', 'POST')
538 #
539 #    @hglibrary
540 #    def read(self, request, docid, lib):
541 #        """Read document as raw text"""
542 #        try:
543 #            revision = request.GET.get('revision', 'latest')
544 #
545 #            if revision == 'latest':
546 #                doc = lib.document(docid)
547 #            else:
548 #                doc = lib.document_for_rev(revision)
549 #
550 #
551 #            if document.id != docid:
552 #                return response.BadRequest().django_response({'reason': 'name-mismatch',
553 #                    'message': 'Provided revision is not valid for this document'})
554 #
555 #            bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
556 #            return bookinfo.serialize()
557 #        except (EntryNotFound, RevisionNotFound), e:
558 #            return response.EntityNotFound().django_response({
559 #                'exception': type(e), 'message': e.message})
560 #
561 #    @hglibrary
562 #    def create(self, request, docid, lib):
563 #        try:
564 #            bi_json = request.POST['contents']
565 #            revision = request.POST['revision']
566 #
567 #            if request.POST.has_key('message'):
568 #                msg = u"$USER$ " + request.PUT['message']
569 #            else:
570 #                msg = u"$AUTO$ Dublin core update."
571 #
572 #            current = lib.document(docid, request.user.username)
573 #            orig = lib.document_for_rev(revision)
574 #
575 #            if current != orig:
576 #                return response.EntityConflict().django_response({
577 #                        "reason": "out-of-date",
578 #                        "provided": orig.revision,
579 #                        "latest": current.revision })
580 #
581 #            xmldoc = parser.WLDocument.from_string(current.data('xml'))
582 #            document.book_info = dcparser.BookInfo.from_json(bi_json)
583 #
584 #            # zapisz
585 #            ndoc = current.quickwrite('xml', \
586 #                document.serialize().encode('utf-8'),\
587 #                message=msg, user=request.user.username)
588 #
589 #            try:
590 #                # return the new revision number
591 #                return {
592 #                    "document": ndoc.id,
593 #                    "subview": "dc",
594 #                    "previous_revision": current.revision,
595 #                    "revision": ndoc.revision,
596 #                    'timestamp': ndoc.revision.timestamp,
597 #                    "url": reverse("docdc_view", args=[ndoc.id])
598 #                }
599 #            except Exception, e:
600 #                if ndoc: lib._rollback()
601 #                raise e
602 #        except RevisionNotFound:
603 #            return response.EntityNotFound().django_response()
604
605 class MergeHandler(BaseHandler):
606     allowed_methods = ('POST',)
607
608     @validate_form(forms.MergeRequestForm, 'POST')
609     @hglibrary
610     def create(self, request, form, docid, lib):
611         """Create a new document revision from the information provided by user"""
612         revision = form.cleaned_data['revision']
613
614         # fetch the main branch document
615         doc = lib.document(docid)
616
617         # fetch the base document
618         user_doc = lib.document_for_rev(revision)
619         base_doc = user_doc.latest()
620
621         if base_doc != user_doc:
622             return response.EntityConflict().django_response({
623                 "reason": "out-of-date",
624                 "provided": str(user_doc.revision),
625                 "latest": str(base_doc.revision)
626             })      
627
628         if form.cleaned_data['type'] == 'update':
629             # update is always performed from the file branch
630             # to the user branch
631             user_doc_new = base_doc.update(request.user.username)
632                 
633             # shared document is the same
634             doc_new = doc
635
636         if form.cleaned_data['type'] == 'share':
637             if not base_doc.up_to_date():
638                 return response.BadRequest().django_response({
639                     "reason": "not-fast-forward",
640                     "message": "You must first update yout branch to the latest version."
641                 })
642
643             # check for unresolved conflicts            
644             if base_doc.has_conflict_marks():
645                 return response.BadRequest().django_response({
646                     "reason": "unresolved-conflicts",
647                     "message": "There are unresolved conflicts in your file. Fix them, and try again."
648                 })
649
650             if not request.user.has_perm('api.share_document'):
651                 # User is not permitted to make a merge, right away
652                 # So we instead create a pull request in the database
653                 try:
654                     prq, created = PullRequest.objects.get_or_create(
655                         comitter = request.user,
656                         document = docid,
657                         status = "N",
658                         defaults = {
659                             'source_revision': str(base_doc.revision),
660                             'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
661                         }
662                     )
663
664                     # there can't be 2 pending request from same user
665                     # for the same document
666                     if not created:
667                         prq.source_revision = str(base_doc.revision)
668                         prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
669                         prq.save()
670
671                     return response.RequestAccepted().django_response(\
672                         ticket_status=prq.status, \
673                         ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
674                 except IntegrityError:
675                     return response.EntityConflict().django_response({
676                         'reason': 'request-already-exist'
677                     })
678
679             changed = base_doc.share(form.cleaned_data['message'])
680
681             # update shared version if needed
682             if changed:
683                 doc_new = doc.latest()
684             else:
685                 doc_new = doc
686
687             # the user wersion is the same
688             user_doc_new = base_doc
689
690         # The client can compare parent_revision to revision
691         # to see if he needs to update user's view        
692         # Same goes for shared view
693         
694         return response.SuccessAllOk().django_response({
695             "name": user_doc_new.id,
696             "user": user_doc_new.owner,
697
698             "revision": user_doc_new.revision,
699             'timestamp': user_doc_new.revision.timestamp,
700
701             "parent_revision": user_doc.revision,
702             "parent_timestamp": user_doc.revision.timestamp,
703
704             "shared_revision": doc_new.revision,
705             "shared_timestamp": doc_new.revision.timestamp,
706
707             "shared_parent_revision": doc.revision,
708             "shared_parent_timestamp": doc.revision.timestamp,
709         })