2b1edb2164d29ef5c893127e8ec6655c1a2ebfee
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2 import os.path
3
4 import logging
5 log = logging.getLogger('platforma.api.library')
6
7 __author__= "Ɓukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
10
11 from piston.handler import BaseHandler, AnonymousBaseHandler
12
13 import re
14 from datetime import date
15
16 from django.core.urlresolvers import reverse
17 from django.utils import simplejson as json
18 from django.db import IntegrityError
19
20 import librarian
21 import librarian.html
22 from librarian import dcparser, parser
23
24 from wlrepo import *
25 from api.models import PullRequest
26 from explorer.models import GalleryForDocument
27
28 # internal imports
29 import api.forms as forms
30 import api.response as response
31 from api.utils import validate_form, hglibrary, natural_order
32 from api.models import PartCache
33
34 #
35 import settings
36
37
38 def is_prq(username):
39     return username.startswith('$prq-')
40
41 def check_user(request, user):
42     log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
43     #pull request
44     if is_prq(user):
45         if not request.user.has_perm('api.pullrequest.can_view'):
46             yield response.AccessDenied().django_response({
47                 'reason': 'access-denied',
48                 'message': "You don't have enough priviliges to view pull requests."
49             })
50     # other users
51     elif request.user.username != user:
52         if not request.user.has_perm('api.document.can_view_other'):
53             yield response.AccessDenied().django_response({
54                 'reason': 'access-denied',
55                 'message': "You don't have enough priviliges to view other people's document."
56             })
57     pass
58
59 #
60 # Document List Handlers
61 #
62 # TODO: security check
63 class BasicLibraryHandler(AnonymousBaseHandler):
64     allowed_methods = ('GET',)
65
66     @hglibrary
67     def read(self, request, lib):
68         """Return the list of documents."""       
69         document_list = [{
70             'url': reverse('document_view', args=[docid]),
71             'name': docid } for docid in lib.documents() ]
72         return {'documents' : document_list}
73         
74 #
75 # This handler controlls the document collection
76 #
77 class LibraryHandler(BaseHandler):
78     allowed_methods = ('GET', 'POST')
79     anonymous = BasicLibraryHandler
80
81     @hglibrary
82     def read(self, request, lib):
83         """Return the list of documents."""
84
85         documents = {}
86         
87         for docid in lib.documents():            
88             documents[docid] = {
89                 'url': reverse('document_view', args=[docid]),
90                 'name': docid,
91                 'parts': []
92             }
93
94         parts = PartCache.objects.defer('part_id')\
95             .values_list('part_id', 'document_id').distinct()
96        
97         document_tree = dict(documents)
98
99         for part, docid in parts:
100             # this way, we won't display broken links
101             if not documents.has_key(part):
102                 log.info("NOT FOUND: %s", part)
103                 continue
104
105             parent = documents[docid]
106             child = documents[part]
107
108             # not top-level anymore
109             document_tree.pop(part)
110             parent['parts'].append(child)
111         
112         for doc in documents.itervalues():
113             doc['parts'].sort(key=natural_order(lambda d: d['name']))
114             
115         return {'documents': sorted(document_tree.itervalues(),
116             key=natural_order(lambda d: d['name']) ) }
117
118
119     @validate_form(forms.DocumentUploadForm, 'POST')
120     @hglibrary
121     def create(self, request, form, lib):
122         """Create a new document."""       
123
124         if form.cleaned_data['ocr_data']:
125             data = form.cleaned_data['ocr_data']
126         else:            
127             data = request.FILES['ocr_file'].read().decode('utf-8')
128
129         if data is None:
130             return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
131
132         if form.cleaned_data['generate_dc']:
133             data = librarian.wrap_text(data, unicode(date.today()))
134
135         docid = form.cleaned_data['bookname']
136
137         try:
138             lock = lib.lock()            
139             try:
140                 log.info("DOCID %s", docid)
141                 doc = lib.document_create(docid)
142                 # document created, but no content yet
143
144                 try:
145                     doc = doc.quickwrite('xml', data.encode('utf-8'),
146                         '$AUTO$ XML data uploaded.', user=request.user.username)
147                 except Exception,e:
148                     import traceback
149                     # rollback branch creation
150                     lib._rollback()
151                     raise LibraryException(traceback.format_exc())
152
153                 url = reverse('document_view', args=[doc.id])
154
155                 return response.EntityCreated().django_response(\
156                     body = {
157                         'url': url,
158                         'name': doc.id,
159                         'revision': doc.revision },
160                     url = url )            
161             finally:
162                 lock.release()
163         except LibraryException, e:
164             import traceback
165             return response.InternalError().django_response({
166                 "reason": traceback.format_exc()
167             })
168         except DocumentAlreadyExists:
169             # Document is already there
170             return response.EntityConflict().django_response({
171                 "reason": "already-exists",
172                 "message": "Document already exists." % docid
173             })
174
175 #
176 # Document Handlers
177 #
178 class BasicDocumentHandler(AnonymousBaseHandler):
179     allowed_methods = ('GET',)
180
181     @hglibrary
182     def read(self, request, docid, lib):
183         try:    
184             doc = lib.document(docid)
185         except RevisionNotFound:
186             return rc.NOT_FOUND
187
188         result = {
189             'name': doc.id,
190             'html_url': reverse('dochtml_view', args=[doc.id]),
191             'text_url': reverse('doctext_view', args=[doc.id]),
192             'dc_url': reverse('docdc_view', args=[doc.id]),
193             'public_revision': doc.revision,
194         }
195
196         return result
197
198 #
199 # Document Meta Data
200 #
201 class DocumentHandler(BaseHandler):
202     allowed_methods = ('GET', 'PUT')
203     anonymous = BasicDocumentHandler
204
205     @validate_form(forms.DocumentRetrieveForm, 'GET')
206     @hglibrary
207     def read(self, request, form, docid, lib):
208         """Read document's meta data"""       
209         log.info(u"User '%s' wants to %s(%s) as %s" % \
210             (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
211
212         user = form.cleaned_data['user'] or request.user.username
213         rev = form.cleaned_data['revision'] or 'latest'
214
215         for error in check_user(request, user):
216             return error
217             
218         try:
219             doc = lib.document(docid, user, rev=rev)
220         except RevisionMismatch, e:
221             # the document exists, but the revision is bad
222             return response.EntityNotFound().django_response({
223                 'reason': 'revision-mismatch',
224                 'message': e.message,
225                 'docid': docid,
226                 'user': user,
227             })
228         except RevisionNotFound, e:
229             # the user doesn't have this document checked out
230             # or some other weird error occured
231             # try to do the checkout
232             if is_prq(user) or (user == request.user.username):
233                 try:
234                     mdoc = lib.document(docid)
235                     doc = mdoc.take(user)
236
237                     if is_prq(user):
238                         # source revision, should probably change
239                         # but there are no changes yet, so...
240                         pass
241                     
242                 except RevisionNotFound, e:
243                     return response.EntityNotFound().django_response({
244                         'reason': 'document-not-found',
245                         'message': e.message,
246                         'docid': docid
247                     })
248             else:
249                 return response.EntityNotFound().django_response({
250                     'reason': 'document-not-found',
251                     'message': e.message,
252                     'docid': docid,
253                     'user': user,
254                 })
255
256         return {
257             'name': doc.id,
258             'user': user,
259             'html_url': reverse('dochtml_view', args=[doc.id]),
260             'text_url': reverse('doctext_view', args=[doc.id]),
261             # 'dc_url': reverse('docdc_view', args=[doc.id]),
262             'gallery_url': reverse('docgallery_view', args=[doc.id]),
263             'merge_url': reverse('docmerge_view', args=[doc.id]),
264             'revision': doc.revision,
265             'timestamp': doc.revision.timestamp,
266             # 'public_revision': doc.revision,
267             # 'public_timestamp': doc.revision.timestamp,
268         }   
269
270     
271 #    @hglibrary
272 #    def update(self, request, docid, lib):
273 #        """Update information about the document, like display not"""
274 #        return
275 #
276 #
277 #
278 class DocumentHTMLHandler(BaseHandler):
279     allowed_methods = ('GET')
280
281     @validate_form(forms.DocumentRetrieveForm, 'GET')
282     @hglibrary
283     def read(self, request, form, docid, lib, stylesheet='partial'):
284         """Read document as html text"""
285         try:
286             revision = form.cleaned_data['revision']
287             user = form.cleaned_data['user'] or request.user.username
288             document = lib.document_for_rev(revision)
289
290             if document.id != docid:
291                 return response.BadRequest().django_response({
292                     'reason': 'name-mismatch',
293                     'message': 'Provided revision is not valid for this document'
294                 })
295
296             if document.owner != user:
297                 return response.BadRequest().django_response({
298                     'reason': 'user-mismatch',
299                     'message': "Provided revision doesn't belong to user %s" % user
300                 })
301
302             for error in check_user(request, user):
303                 return error
304
305             return librarian.html.transform(document.data('xml'), is_file=False, \
306                 parse_dublincore=False, stylesheet=stylesheet,\
307                 options={
308                     "with-paths": 'boolean(1)',                    
309                 })
310                 
311         except (EntryNotFound, RevisionNotFound), e:
312             return response.EntityNotFound().django_response({
313                 'reason': 'not-found', 'message': e.message})
314         except librarian.ParseError, e:
315             return response.InternalError().django_response({
316                 'reason': 'xml-parse-error', 'message': e.message })
317
318 #
319 # Image Gallery
320 #
321
322 class DocumentGalleryHandler(BaseHandler):
323     allowed_methods = ('GET')
324     
325     
326     def read(self, request, docid):
327         """Read meta-data about scans for gallery of this document."""
328         galleries = []
329         from urllib import quote
330
331         for assoc in GalleryForDocument.objects.filter(document=docid):
332             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
333
334             if not os.path.isdir(dirpath):
335                 log.warn(u"[WARNING]: missing gallery %s", dirpath)
336                 continue
337
338             gallery = {'name': assoc.name, 'pages': []}
339             
340             for file in sorted(os.listdir(dirpath)):
341                 if not isinstance(file, unicode):
342                     try:
343                         file = file.decode('utf-8')
344                     except:
345                         log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
346                             % (file, dirpath) )
347                         file = None
348
349                 if file is not None:
350                     name, ext = os.path.splitext(os.path.basename(file))
351
352                     if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
353                         log.warn(u"Ignoring: %s %s", name, ext)
354                         url = None
355
356                     url = settings.MEDIA_URL + assoc.subpath + u'/' + file
357                 
358                 if url is None:
359                     url = settings.MEDIA_URL + u'/missing.png'
360                     
361                 gallery['pages'].append( quote(url.encode('utf-8')) )
362
363 #            gallery['pages'].sort()
364             galleries.append(gallery)
365
366         return galleries                      
367
368 #
369 # Document Text View
370 #
371
372 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
373 #
374 #
375 #
376
377 class DocumentTextHandler(BaseHandler):
378     allowed_methods = ('GET', 'POST')
379
380     @validate_form(forms.TextRetrieveForm, 'GET')
381     @hglibrary
382     def read(self, request, form, docid, lib):
383         """Read document as raw text"""        
384         try:
385             revision = form.cleaned_data['revision']
386             part = form.cleaned_data['part']
387             user = form.cleaned_data['user'] or request.user.username            
388             
389             document = lib.document_for_rev(revision)
390             
391             if document.id != docid:
392                 return response.BadRequest().django_response({
393                     'reason': 'name-mismatch',
394                     'message': 'Provided revision is not valid for this document'
395                 })
396
397             if document.owner != user:
398                 return response.BadRequest().django_response({
399                     'reason': 'user-mismatch',
400                     'message': "Provided revision doesn't belong to user %s" % user
401                 })
402
403             for error in check_user(request, user):
404                 return error
405             
406             if not part:                
407                 return document.data('xml')
408             
409             xdoc = parser.WLDocument.from_string(document.data('xml'),\
410                 parse_dublincore=False)
411             ptext = xdoc.part_as_text(part)
412
413             if ptext is None:
414                 return response.EntityNotFound().django_response({
415                       'reason': 'no-part-in-document'                     
416                 })
417
418             return ptext
419         except librarian.ParseError, e:
420             return response.EntityNotFound().django_response({
421                 'reason': 'invalid-document-state',
422                 'exception': type(e),
423                 'message': e.message
424             })
425         except (EntryNotFound, RevisionNotFound), e:
426             return response.EntityNotFound().django_response({
427                 'reason': 'not-found',
428                 'exception': type(e), 'message': e.message
429             })   
430
431     @validate_form(forms.TextUpdateForm, 'POST')
432     @hglibrary
433     def create(self, request, form, docid, lib):
434         try:
435             revision = form.cleaned_data['revision']
436             msg = form.cleaned_data['message']
437             user = form.cleaned_data['user'] or request.user.username
438
439             # do not allow changing not owned documents
440             # (for now... )
441             
442             
443             if user != request.user.username:
444                 return response.AccessDenied().django_response({
445                     'reason': 'insufficient-priviliges',
446                 })
447             
448             current = lib.document(docid, user)
449             orig = lib.document_for_rev(revision)
450
451             if current != orig:
452                 return response.EntityConflict().django_response({
453                         "reason": "out-of-date",
454                         "provided_revision": orig.revision,
455                         "latest_revision": current.revision })
456             
457             if form.cleaned_data.has_key('contents'):
458                 data = form.cleaned_data['contents']
459             else:                               
460                 chunks = form.cleaned_data['chunks']
461                 xdoc = parser.WLDocument.from_string(current.data('xml'))
462                 errors = xdoc.merge_chunks(chunks)
463
464                 if len(errors):
465                     return response.EntityConflict().django_response({
466                             "reason": "invalid-chunks",
467                             "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
468                     })
469
470                 data = xdoc.serialize()
471
472             # try to find any Xinclude tags
473             includes = [m.groupdict()['link'] for m in (re.finditer(\
474                 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
475
476             log.info("INCLUDES: %s", includes)
477
478             # TODO: provide useful routines to make this simpler
479             def xml_update_action(lib, resolve):
480                 try:
481                     f = lib._fileopen(resolve('parts'), 'r')
482                     stored_includes = json.loads(f.read())
483                     f.close()
484                 except:
485                     stored_includes = []
486                 
487                 if stored_includes != includes:
488                     f = lib._fileopen(resolve('parts'), 'w+')
489                     f.write(json.dumps(includes))
490                     f.close()
491
492                     lib._fileadd(resolve('parts'))
493
494                     # update the parts cache
495                     PartCache.update_cache(docid, current.owner,\
496                         stored_includes, includes)
497
498                 # now that the parts are ok, write xml
499                 f = lib._fileopen(resolve('xml'), 'w+')
500                 f.write(data.encode('utf-8'))
501                 f.close()
502
503             ndoc = None
504             ndoc = current.invoke_and_commit(\
505                 xml_update_action, lambda d: (msg, user) )
506
507             try:
508                 # return the new revision number
509                 return response.SuccessAllOk().django_response({
510                     "document": ndoc.id,
511                     "user": user,
512                     "subview": "xml",
513                     "previous_revision": current.revision,
514                     "revision": ndoc.revision,
515                     'timestamp': ndoc.revision.timestamp,
516                     "url": reverse("doctext_view", args=[ndoc.id])
517                 })
518             except Exception, e:
519                 if ndoc: lib._rollback()
520                 raise e        
521         except RevisionNotFound, e:
522             return response.EntityNotFound(mimetype="text/plain").\
523                 django_response(e.message)
524
525
526 #
527 # Dublin Core handlers
528 #
529 # @requires librarian
530 #
531 #class DocumentDublinCoreHandler(BaseHandler):
532 #    allowed_methods = ('GET', 'POST')
533 #
534 #    @hglibrary
535 #    def read(self, request, docid, lib):
536 #        """Read document as raw text"""
537 #        try:
538 #            revision = request.GET.get('revision', 'latest')
539 #
540 #            if revision == 'latest':
541 #                doc = lib.document(docid)
542 #            else:
543 #                doc = lib.document_for_rev(revision)
544 #
545 #
546 #            if document.id != docid:
547 #                return response.BadRequest().django_response({'reason': 'name-mismatch',
548 #                    'message': 'Provided revision is not valid for this document'})
549 #
550 #            bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
551 #            return bookinfo.serialize()
552 #        except (EntryNotFound, RevisionNotFound), e:
553 #            return response.EntityNotFound().django_response({
554 #                'exception': type(e), 'message': e.message})
555 #
556 #    @hglibrary
557 #    def create(self, request, docid, lib):
558 #        try:
559 #            bi_json = request.POST['contents']
560 #            revision = request.POST['revision']
561 #
562 #            if request.POST.has_key('message'):
563 #                msg = u"$USER$ " + request.PUT['message']
564 #            else:
565 #                msg = u"$AUTO$ Dublin core update."
566 #
567 #            current = lib.document(docid, request.user.username)
568 #            orig = lib.document_for_rev(revision)
569 #
570 #            if current != orig:
571 #                return response.EntityConflict().django_response({
572 #                        "reason": "out-of-date",
573 #                        "provided": orig.revision,
574 #                        "latest": current.revision })
575 #
576 #            xmldoc = parser.WLDocument.from_string(current.data('xml'))
577 #            document.book_info = dcparser.BookInfo.from_json(bi_json)
578 #
579 #            # zapisz
580 #            ndoc = current.quickwrite('xml', \
581 #                document.serialize().encode('utf-8'),\
582 #                message=msg, user=request.user.username)
583 #
584 #            try:
585 #                # return the new revision number
586 #                return {
587 #                    "document": ndoc.id,
588 #                    "subview": "dc",
589 #                    "previous_revision": current.revision,
590 #                    "revision": ndoc.revision,
591 #                    'timestamp': ndoc.revision.timestamp,
592 #                    "url": reverse("docdc_view", args=[ndoc.id])
593 #                }
594 #            except Exception, e:
595 #                if ndoc: lib._rollback()
596 #                raise e
597 #        except RevisionNotFound:
598 #            return response.EntityNotFound().django_response()
599
600 class MergeHandler(BaseHandler):
601     allowed_methods = ('POST',)
602
603     @validate_form(forms.MergeRequestForm, 'POST')
604     @hglibrary
605     def create(self, request, form, docid, lib):
606         """Create a new document revision from the information provided by user"""
607         revision = form.cleaned_data['revision']
608
609         # fetch the main branch document
610         doc = lib.document(docid)
611
612         # fetch the base document
613         user_doc = lib.document_for_rev(revision)
614         base_doc = user_doc.latest()
615
616         if base_doc != user_doc:
617             return response.EntityConflict().django_response({
618                 "reason": "out-of-date",
619                 "provided": str(user_doc.revision),
620                 "latest": str(base_doc.revision)
621             })      
622
623         if form.cleaned_data['type'] == 'update':
624             # update is always performed from the file branch
625             # to the user branch
626             changed, clean = base_doc.update(request.user.username)
627
628             # update user document
629             if changed:
630                 user_doc_new = user_doc.latest()
631                 
632             # shared document is the same
633             doc_new = doc
634
635         if form.cleaned_data['type'] == 'share':
636             if not base_doc.up_to_date():
637                 return response.BadRequest().django_response({
638                     "reason": "not-fast-forward",
639                     "message": "You must first update yout branch to the latest version."
640                 })
641
642             # check for unresolved conflicts            
643             if base_doc.has_conflict_marks():
644                 return response.BadRequest().django_response({
645                     "reason": "unresolved-conflicts",
646                     "message": "There are unresolved conflicts in your file. Fix them, and try again."
647                 })
648
649             if not request.user.has_perm('api.document.can_share'):
650                 # User is not permitted to make a merge, right away
651                 # So we instead create a pull request in the database
652                 try:
653                     prq, created = PullRequest.objects.get_or_create(
654                         comitter = request.user,
655                         document = docid,
656                         status = "N",
657                         defaults = {
658                             'source_revision': str(base_doc.revision),
659                             'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
660                         }
661                     )
662
663                     # there can't be 2 pending request from same user
664                     # for the same document
665                     if not created:
666                         prq.source_revision = str(base_doc.revision)
667                         prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
668                         prq.save()
669
670                     return response.RequestAccepted().django_response(\
671                         ticket_status=prq.status, \
672                         ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
673                 except IntegrityError:
674                     return response.EntityConflict().django_response({
675                         'reason': 'request-already-exist'
676                     })
677
678             changed = base_doc.share(form.cleaned_data['message'])
679
680             # update shared version if needed
681             if changed:
682                 doc_new = doc.latest()
683
684             # the user wersion is the same
685             user_doc_new = base_doc
686
687         # The client can compare parent_revision to revision
688         # to see if he needs to update user's view        
689         # Same goes for shared view
690         
691         return response.SuccessAllOk().django_response({
692             "name": user_doc_new.id,
693             "user": user_doc_new.owner,
694
695             "revision": user_doc_new.revision,
696             'timestamp': user_doc_new.revision.timestamp,
697
698             "parent_revision": user_doc_new.revision,
699             "parent_timestamp": user_doc_new.revision.timestamp,
700
701             "shared_revision": doc_new.revision,
702             "shared_timestamp": doc_new.revision.timestamp,
703
704             "shared_parent_revision": doc.revision,
705             "shared_parent_timestamp": doc.revision.timestamp,
706         })