Podstawowy DiffHandler.
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2 import os.path
3
4 import logging
5 log = logging.getLogger('platforma.api.library')
6
7 __author__= "Ɓukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
10
11 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from django.http import HttpResponse
13
14 import re
15 from datetime import date
16
17 from django.core.urlresolvers import reverse
18 from django.utils import simplejson as json
19 from django.db import IntegrityError
20
21 import librarian
22 import librarian.html
23 import difflib
24 from librarian import dcparser, parser
25
26 from wlrepo import *
27 from api.models import PullRequest
28 from explorer.models import GalleryForDocument
29
30 # internal imports
31 import api.forms as forms
32 import api.response as response
33 from api.utils import validate_form, hglibrary, natural_order
34 from api.models import PartCache, PullRequest
35
36 #
37 import settings
38
39
40 def is_prq(username):
41     return username.startswith('$prq-')
42
43 def prq_for_user(username):
44     try:
45         return PullRequest.objects.get(id=int(username[5:]))
46     except:
47         return None
48
49 def check_user(request, user):
50     log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
51     #pull request
52     if is_prq(user):
53         if not request.user.has_perm('api.view_prq'):
54             yield response.AccessDenied().django_response({
55                 'reason': 'access-denied',
56                 'message': "You don't have enough priviliges to view pull requests."
57             })
58     # other users
59     elif request.user.username != user:
60         if not request.user.has_perm('api.view_other_document'):
61             yield response.AccessDenied().django_response({
62                 'reason': 'access-denied',
63                 'message': "You don't have enough priviliges to view other people's document."
64             })
65     pass
66
67 #
68 # Document List Handlers
69 #
70 # TODO: security check
71 class BasicLibraryHandler(AnonymousBaseHandler):
72     allowed_methods = ('GET',)
73
74     @hglibrary
75     def read(self, request, lib):
76         """Return the list of documents."""       
77         document_list = [{
78             'url': reverse('document_view', args=[docid]),
79             'name': docid } for docid in lib.documents() ]
80         return {'documents' : document_list}
81         
82 #
83 # This handler controlls the document collection
84 #
85 class LibraryHandler(BaseHandler):
86     allowed_methods = ('GET', 'POST')
87     anonymous = BasicLibraryHandler
88
89     @hglibrary
90     def read(self, request, lib):
91         """Return the list of documents."""
92
93         documents = {}
94         
95         for docid in lib.documents():            
96             documents[docid] = {
97                 'url': reverse('document_view', args=[docid]),
98                 'name': docid,
99                 'parts': []
100             }
101
102         parts = PartCache.objects.defer('part_id')\
103             .values_list('part_id', 'document_id').distinct()
104        
105         document_tree = dict(documents)
106
107         for part, docid in parts:
108             # this way, we won't display broken links
109             if not documents.has_key(part):
110                 log.info("NOT FOUND: %s", part)
111                 continue
112
113             parent = documents[docid]
114             child = documents[part]
115
116             # not top-level anymore
117             document_tree.pop(part)
118             parent['parts'].append(child)
119         
120         for doc in documents.itervalues():
121             doc['parts'].sort(key=natural_order(lambda d: d['name']))
122             
123         return {'documents': sorted(document_tree.itervalues(),
124             key=natural_order(lambda d: d['name']) ) }
125
126
127     @validate_form(forms.DocumentUploadForm, 'POST')
128     @hglibrary
129     def create(self, request, form, lib):
130         """Create a new document."""       
131
132         if form.cleaned_data['ocr_data']:
133             data = form.cleaned_data['ocr_data']
134         else:            
135             data = request.FILES['ocr_file'].read().decode('utf-8')
136
137         if data is None:
138             return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
139
140         if form.cleaned_data['generate_dc']:
141             data = librarian.wrap_text(data, unicode(date.today()))
142
143         docid = form.cleaned_data['bookname']
144
145         try:
146             lock = lib.lock()            
147             try:
148                 log.info("DOCID %s", docid)
149                 doc = lib.document_create(docid)
150                 # document created, but no content yet
151                 try:
152                     doc = doc.quickwrite('xml', data.encode('utf-8'),
153                         '$AUTO$ XML data uploaded.', user=request.user.username)
154                 except Exception,e:
155                     import traceback
156                     # rollback branch creation
157                     lib._rollback()
158                     raise LibraryException(traceback.format_exc())
159
160                 url = reverse('document_view', args=[doc.id])
161
162                 return response.EntityCreated().django_response(\
163                     body = {
164                         'url': url,
165                         'name': doc.id,
166                         'revision': doc.revision },
167                     url = url )            
168             finally:
169                 lock.release()
170         except LibraryException, e:
171             import traceback
172             return response.InternalError().django_response({
173                 "reason": traceback.format_exc()
174             })
175         except DocumentAlreadyExists:
176             # Document is already there
177             return response.EntityConflict().django_response({
178                 "reason": "already-exists",
179                 "message": "Document already exists." % docid
180             })
181
182 #
183 # Document Handlers
184 #
185 class BasicDocumentHandler(AnonymousBaseHandler):
186     allowed_methods = ('GET',)
187
188     @hglibrary
189     def read(self, request, docid, lib):
190         try:    
191             doc = lib.document(docid)
192         except RevisionNotFound:
193             return rc.NOT_FOUND
194
195         result = {
196             'name': doc.id,
197             'html_url': reverse('dochtml_view', args=[doc.id]),
198             'text_url': reverse('doctext_view', args=[doc.id]),
199             'dc_url': reverse('docdc_view', args=[doc.id]),
200             'public_revision': doc.revision,
201         }
202
203         return result
204
205
206 class DiffHandler(BaseHandler):
207     allowed_methods = ('GET',)
208     
209     @hglibrary
210     def read(self, request, source_revision, target_revision, lib):
211         '''Return diff between source_revision and target_revision)'''
212         source_document = lib.document_for_rev(source_revision)
213         target_document = lib.document_for_rev(target_revision)
214         print source_document,
215         print target_document
216         diff = difflib.unified_diff(
217             source_document.data('xml').splitlines(True),
218             target_document.data('xml').splitlines(True),
219             'source',
220             'target')
221         
222         return ''.join(list(diff))
223
224
225 #
226 # Document Meta Data
227 #
228 class DocumentHandler(BaseHandler):
229     allowed_methods = ('GET', 'PUT')
230     anonymous = BasicDocumentHandler
231
232     @validate_form(forms.DocumentRetrieveForm, 'GET')
233     @hglibrary
234     def read(self, request, form, docid, lib):
235         """Read document's meta data"""       
236         log.info(u"User '%s' wants to %s(%s) as %s" % \
237             (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
238
239         user = form.cleaned_data['user'] or request.user.username
240         rev = form.cleaned_data['revision'] or 'latest'
241
242         for error in check_user(request, user):
243             return error
244             
245         try:
246             doc = lib.document(docid, user, rev=rev)
247         except RevisionMismatch, e:
248             # the document exists, but the revision is bad
249             return response.EntityNotFound().django_response({
250                 'reason': 'revision-mismatch',
251                 'message': e.message,
252                 'docid': docid,
253                 'user': user,
254             })
255         except RevisionNotFound, e:
256             # the user doesn't have this document checked out
257             # or some other weird error occured
258             # try to do the checkout
259             try:
260                 if user == request.user.username:
261                     mdoc = lib.document(docid)
262                     doc = mdoc.take(user)
263                 elif is_prq(user):
264                     prq = prq_for_user(user)
265                     # commiter's document
266                     prq_doc = lib.document_for_rev(prq.source_revision)
267                     doc = prq_doc.take(user)
268                 else:
269                     return response.EntityNotFound().django_response({
270                         'reason': 'document-not-found',
271                         'message': e.message,
272                         'docid': docid,
273                         'user': user,
274                     })
275             except RevisionNotFound, e:
276                 return response.EntityNotFound().django_response({
277                     'reason': 'document-not-found',
278                     'message': e.message,
279                     'docid': docid,
280                     'user': user
281                 })
282
283         return {
284             'name': doc.id,
285             'user': user,
286             'html_url': reverse('dochtml_view', args=[doc.id]),
287             'text_url': reverse('doctext_view', args=[doc.id]),
288             # 'dc_url': reverse('docdc_view', args=[doc.id]),
289             'gallery_url': reverse('docgallery_view', args=[doc.id]),
290             'merge_url': reverse('docmerge_view', args=[doc.id]),
291             'revision': doc.revision,
292             'timestamp': doc.revision.timestamp,
293             # 'public_revision': doc.revision,
294             # 'public_timestamp': doc.revision.timestamp,
295         }   
296
297     
298 #    @hglibrary
299 #    def update(self, request, docid, lib):
300 #        """Update information about the document, like display not"""
301 #        return
302 #
303 #
304 #
305 class DocumentHTMLHandler(BaseHandler):
306     allowed_methods = ('GET')
307
308     @validate_form(forms.DocumentRetrieveForm, 'GET')
309     @hglibrary
310     def read(self, request, form, docid, lib, stylesheet='partial'):
311         """Read document as html text"""
312         try:
313             revision = form.cleaned_data['revision']
314             user = form.cleaned_data['user'] or request.user.username
315             document = lib.document_for_rev(revision)
316
317             if document.id != docid:
318                 return response.BadRequest().django_response({
319                     'reason': 'name-mismatch',
320                     'message': 'Provided revision is not valid for this document'
321                 })
322
323             if document.owner != user:
324                 return response.BadRequest().django_response({
325                     'reason': 'user-mismatch',
326                     'message': "Provided revision doesn't belong to user %s" % user
327                 })
328
329             for error in check_user(request, user):
330                 return error
331
332             return librarian.html.transform(document.data('xml'), is_file=False, \
333                 parse_dublincore=False, stylesheet=stylesheet,\
334                 options={
335                     "with-paths": 'boolean(1)',                    
336                 })
337                 
338         except (EntryNotFound, RevisionNotFound), e:
339             return response.EntityNotFound().django_response({
340                 'reason': 'not-found', 'message': e.message})
341         except librarian.ParseError, e:
342             return response.InternalError().django_response({
343                 'reason': 'xml-parse-error', 'message': e.message })
344
345 #
346 # Image Gallery
347 #
348
349 class DocumentGalleryHandler(BaseHandler):
350     allowed_methods = ('GET')
351     
352     
353     def read(self, request, docid):
354         """Read meta-data about scans for gallery of this document."""
355         galleries = []
356         from urllib import quote
357
358         for assoc in GalleryForDocument.objects.filter(document=docid):
359             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
360
361             if not os.path.isdir(dirpath):
362                 log.warn(u"[WARNING]: missing gallery %s", dirpath)
363                 continue
364
365             gallery = {'name': assoc.name, 'pages': []}
366             
367             for file in os.listdir(dirpath):
368                 if not isinstance(file, unicode):
369                     try:
370                         file = file.decode('utf-8')
371                     except:
372                         log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
373                             % (file, dirpath) )
374                         file = None
375
376                 if file is not None:
377                     name, ext = os.path.splitext(os.path.basename(file))
378
379                     if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
380                         log.warn(u"Ignoring: %s %s", name, ext)
381                         url = None
382
383                     url = settings.MEDIA_URL + assoc.subpath + u'/' + file
384                 
385                 if url is None:
386                     url = settings.MEDIA_URL + u'/missing.png'
387                     
388                 gallery['pages'].append( quote(url.encode('utf-8')) )
389
390 #            gallery['pages'].sort()
391             galleries.append(gallery)
392
393         return galleries
394
395 #
396 # Document Text View
397 #
398
399 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
400 #
401 #
402 #
403
404 class DocumentTextHandler(BaseHandler):
405     allowed_methods = ('GET', 'POST')
406
407     @validate_form(forms.TextRetrieveForm, 'GET')
408     @hglibrary
409     def read(self, request, form, docid, lib):
410         """Read document as raw text"""        
411         try:
412             revision = form.cleaned_data['revision']
413             part = form.cleaned_data['part']
414             user = form.cleaned_data['user'] or request.user.username            
415             
416             document = lib.document_for_rev(revision)
417             
418             if document.id != docid:
419                 return response.BadRequest().django_response({
420                     'reason': 'name-mismatch',
421                     'message': 'Provided revision is not valid for this document'
422                 })
423
424             if document.owner != user:
425                 return response.BadRequest().django_response({
426                     'reason': 'user-mismatch',
427                     'message': "Provided revision doesn't belong to user %s" % user
428                 })
429
430             for error in check_user(request, user):
431                 return error
432             
433             if not part:                
434                 return document.data('xml')
435             
436             xdoc = parser.WLDocument.from_string(document.data('xml'),\
437                 parse_dublincore=False)
438             ptext = xdoc.part_as_text(part)
439
440             if ptext is None:
441                 return response.EntityNotFound().django_response({
442                       'reason': 'no-part-in-document'                     
443                 })
444
445             return ptext
446         except librarian.ParseError, e:
447             return response.EntityNotFound().django_response({
448                 'reason': 'invalid-document-state',
449                 'exception': type(e),
450                 'message': e.message
451             })
452         except (EntryNotFound, RevisionNotFound), e:
453             return response.EntityNotFound().django_response({
454                 'reason': 'not-found',
455                 'exception': type(e), 'message': e.message
456             })   
457
458     @validate_form(forms.TextUpdateForm, 'POST')
459     @hglibrary
460     def create(self, request, form, docid, lib):
461         try:
462             revision = form.cleaned_data['revision']
463             msg = form.cleaned_data['message']
464             user = form.cleaned_data['user'] or request.user.username
465
466             # do not allow changing not owned documents
467             # (for now... )
468             
469             
470             if user != request.user.username:
471                 return response.AccessDenied().django_response({
472                     'reason': 'insufficient-priviliges',
473                 })
474             
475             current = lib.document(docid, user)
476             orig = lib.document_for_rev(revision)
477
478             if current != orig:
479                 return response.EntityConflict().django_response({
480                         "reason": "out-of-date",
481                         "provided_revision": orig.revision,
482                         "latest_revision": current.revision })
483             
484             if form.cleaned_data.has_key('contents'):
485                 data = form.cleaned_data['contents']
486             else:                               
487                 chunks = form.cleaned_data['chunks']
488                 xdoc = parser.WLDocument.from_string(current.data('xml'))
489                 errors = xdoc.merge_chunks(chunks)
490
491                 if len(errors):
492                     return response.EntityConflict().django_response({
493                             "reason": "invalid-chunks",
494                             "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
495                     })
496
497                 data = xdoc.serialize()
498
499             # try to find any Xinclude tags
500             includes = [m.groupdict()['link'] for m in (re.finditer(\
501                 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
502
503             log.info("INCLUDES: %s", includes)
504
505             # TODO: provide useful routines to make this simpler
506             def xml_update_action(lib, resolve):
507                 try:
508                     f = lib._fileopen(resolve('parts'), 'r')
509                     stored_includes = json.loads(f.read())
510                     f.close()
511                 except:
512                     stored_includes = []
513                 
514                 if stored_includes != includes:
515                     f = lib._fileopen(resolve('parts'), 'w+')
516                     f.write(json.dumps(includes))
517                     f.close()
518
519                     lib._fileadd(resolve('parts'))
520
521                     # update the parts cache
522                     PartCache.update_cache(docid, current.owner,\
523                         stored_includes, includes)
524
525                 # now that the parts are ok, write xml
526                 f = lib._fileopen(resolve('xml'), 'w+')
527                 f.write(data.encode('utf-8'))
528                 f.close()
529
530             ndoc = None
531             ndoc = current.invoke_and_commit(\
532                 xml_update_action, lambda d: (msg, user) )
533
534             try:
535                 # return the new revision number
536                 return response.SuccessAllOk().django_response({
537                     "document": ndoc.id,
538                     "user": user,
539                     "subview": "xml",
540                     "previous_revision": current.revision,
541                     "revision": ndoc.revision,
542                     'timestamp': ndoc.revision.timestamp,
543                     "url": reverse("doctext_view", args=[ndoc.id])
544                 })
545             except Exception, e:
546                 if ndoc: lib._rollback()
547                 raise e        
548         except RevisionNotFound, e:
549             return response.EntityNotFound(mimetype="text/plain").\
550                 django_response(e.message)
551
552
553 #
554 # Dublin Core handlers
555 #
556 # @requires librarian
557 #
558 #class DocumentDublinCoreHandler(BaseHandler):
559 #    allowed_methods = ('GET', 'POST')
560 #
561 #    @hglibrary
562 #    def read(self, request, docid, lib):
563 #        """Read document as raw text"""
564 #        try:
565 #            revision = request.GET.get('revision', 'latest')
566 #
567 #            if revision == 'latest':
568 #                doc = lib.document(docid)
569 #            else:
570 #                doc = lib.document_for_rev(revision)
571 #
572 #
573 #            if document.id != docid:
574 #                return response.BadRequest().django_response({'reason': 'name-mismatch',
575 #                    'message': 'Provided revision is not valid for this document'})
576 #
577 #            bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
578 #            return bookinfo.serialize()
579 #        except (EntryNotFound, RevisionNotFound), e:
580 #            return response.EntityNotFound().django_response({
581 #                'exception': type(e), 'message': e.message})
582 #
583 #    @hglibrary
584 #    def create(self, request, docid, lib):
585 #        try:
586 #            bi_json = request.POST['contents']
587 #            revision = request.POST['revision']
588 #
589 #            if request.POST.has_key('message'):
590 #                msg = u"$USER$ " + request.PUT['message']
591 #            else:
592 #                msg = u"$AUTO$ Dublin core update."
593 #
594 #            current = lib.document(docid, request.user.username)
595 #            orig = lib.document_for_rev(revision)
596 #
597 #            if current != orig:
598 #                return response.EntityConflict().django_response({
599 #                        "reason": "out-of-date",
600 #                        "provided": orig.revision,
601 #                        "latest": current.revision })
602 #
603 #            xmldoc = parser.WLDocument.from_string(current.data('xml'))
604 #            document.book_info = dcparser.BookInfo.from_json(bi_json)
605 #
606 #            # zapisz
607 #            ndoc = current.quickwrite('xml', \
608 #                document.serialize().encode('utf-8'),\
609 #                message=msg, user=request.user.username)
610 #
611 #            try:
612 #                # return the new revision number
613 #                return {
614 #                    "document": ndoc.id,
615 #                    "subview": "dc",
616 #                    "previous_revision": current.revision,
617 #                    "revision": ndoc.revision,
618 #                    'timestamp': ndoc.revision.timestamp,
619 #                    "url": reverse("docdc_view", args=[ndoc.id])
620 #                }
621 #            except Exception, e:
622 #                if ndoc: lib._rollback()
623 #                raise e
624 #        except RevisionNotFound:
625 #            return response.EntityNotFound().django_response()
626
627 class MergeHandler(BaseHandler):
628     allowed_methods = ('POST',)
629
630     @validate_form(forms.MergeRequestForm, 'POST')
631     @hglibrary
632     def create(self, request, form, docid, lib):
633         """Create a new document revision from the information provided by user"""
634         revision = form.cleaned_data['revision']
635
636         # fetch the main branch document
637         doc = lib.document(docid)
638
639         # fetch the base document
640         user_doc = lib.document_for_rev(revision)
641         base_doc = user_doc.latest()
642
643         if base_doc != user_doc:
644             return response.EntityConflict().django_response({
645                 "reason": "out-of-date",
646                 "provided": str(user_doc.revision),
647                 "latest": str(base_doc.revision)
648             })      
649
650         if form.cleaned_data['type'] == 'update':
651             # update is always performed from the file branch
652             # to the user branch
653             user_doc_new = base_doc.update(request.user.username)
654                 
655             # shared document is the same
656             doc_new = doc
657
658         if form.cleaned_data['type'] == 'share':
659             if not base_doc.up_to_date():
660                 return response.BadRequest().django_response({
661                     "reason": "not-fast-forward",
662                     "message": "You must first update yout branch to the latest version."
663                 })
664
665             # check for unresolved conflicts            
666             if base_doc.has_conflict_marks():
667                 return response.BadRequest().django_response({
668                     "reason": "unresolved-conflicts",
669                     "message": "There are unresolved conflicts in your file. Fix them, and try again."
670                 })
671
672             if not request.user.has_perm('api.share_document'):
673                 # User is not permitted to make a merge, right away
674                 # So we instead create a pull request in the database
675                 try:
676                     prq, created = PullRequest.objects.get_or_create(
677                         comitter = request.user,
678                         document = docid,
679                         status = "N",
680                         defaults = {
681                             'source_revision': str(base_doc.revision),
682                             'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
683                         }
684                     )
685
686                     # there can't be 2 pending request from same user
687                     # for the same document
688                     if not created:
689                         prq.source_revision = str(base_doc.revision)
690                         prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
691                         prq.save()
692
693                     return response.RequestAccepted().django_response(\
694                         ticket_status=prq.status, \
695                         ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
696                 except IntegrityError:
697                     return response.EntityConflict().django_response({
698                         'reason': 'request-already-exist'
699                     })
700
701             changed = base_doc.share(form.cleaned_data['message'])
702
703             # update shared version if needed
704             if changed:
705                 doc_new = doc.latest()
706             else:
707                 doc_new = doc
708
709             # the user wersion is the same
710             user_doc_new = base_doc
711
712         # The client can compare parent_revision to revision
713         # to see if he needs to update user's view        
714         # Same goes for shared view
715         
716         return response.SuccessAllOk().django_response({
717             "name": user_doc_new.id,
718             "user": user_doc_new.owner,
719
720             "revision": user_doc_new.revision,
721             'timestamp': user_doc_new.revision.timestamp,
722
723             "parent_revision": user_doc.revision,
724             "parent_timestamp": user_doc.revision.timestamp,
725
726             "shared_revision": doc_new.revision,
727             "shared_timestamp": doc_new.revision.timestamp,
728
729             "shared_parent_revision": doc.revision,
730             "shared_parent_timestamp": doc.revision.timestamp,
731         })