c84cab8f105b9b1c9e03c3d01bfad7bbf2d7b528
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2 import os.path
3
4 import logging
5 log = logging.getLogger('platforma.api.library')
6
7 __author__= "Ɓukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
10
11 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from django.http import HttpResponse
13
14 from datetime import date
15
16 from django.core.urlresolvers import reverse
17 from django.db import IntegrityError
18
19 import librarian
20 import librarian.html
21 import difflib
22 from librarian import dcparser, parser
23
24 from wlrepo import *
25 from api.models import PullRequest
26 from explorer.models import GalleryForDocument
27
28 # internal imports
29 import api.forms as forms
30 import api.response as response
31 from api.utils import validate_form, hglibrary, natural_order
32 from api.models import PartCache, PullRequest
33
34 #
35 import settings
36
37
38 def is_prq(username):
39     return username.startswith('$prq-')
40
41 def prq_for_user(username):
42     try:
43         return PullRequest.objects.get(id=int(username[5:]))
44     except:
45         return None
46
47 def check_user(request, user):
48     log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
49     #pull request
50     if is_prq(user):
51         if not request.user.has_perm('api.view_prq'):
52             yield response.AccessDenied().django_response({
53                 'reason': 'access-denied',
54                 'message': "You don't have enough priviliges to view pull requests."
55             })
56     # other users
57     elif request.user.username != user:
58         if not request.user.has_perm('api.view_other_document'):
59             yield response.AccessDenied().django_response({
60                 'reason': 'access-denied',
61                 'message': "You don't have enough priviliges to view other people's document."
62             })
63     pass
64
65 #
66 # Document List Handlers
67 #
68 # TODO: security check
69 class BasicLibraryHandler(AnonymousBaseHandler):
70     allowed_methods = ('GET',)
71
72     @hglibrary
73     def read(self, request, lib):
74         """Return the list of documents."""       
75         document_list = [{
76             'url': reverse('document_view', args=[docid]),
77             'name': docid } for docid in lib.documents() ]
78         return {'documents' : document_list}
79         
80 #
81 # This handler controlls the document collection
82 #
83 class LibraryHandler(BaseHandler):
84     allowed_methods = ('GET', 'POST')
85     anonymous = BasicLibraryHandler
86
87     @hglibrary
88     def read(self, request, lib):
89         """Return the list of documents."""
90
91         documents = {}
92         
93         for docid in lib.documents():            
94             documents[docid] = {
95                 'url': reverse('document_view', args=[docid]),
96                 'name': docid,
97                 'parts': []
98             }
99
100         parts = PartCache.objects.defer('part_id')\
101             .values_list('part_id', 'document_id').distinct()
102        
103         document_tree = dict(documents)
104
105         for part, docid in parts:
106             # this way, we won't display broken links
107             if not documents.has_key(part):
108                 log.info("NOT FOUND: %s", part)
109                 continue
110
111             parent = documents[docid]
112             child = documents[part]
113
114             # not top-level anymore
115             document_tree.pop(part)
116             parent['parts'].append(child)
117         
118         for doc in documents.itervalues():
119             doc['parts'].sort(key=natural_order(lambda d: d['name']))
120             
121         return {'documents': sorted(document_tree.itervalues(),
122             key=natural_order(lambda d: d['name']) ) }
123
124
125     @validate_form(forms.DocumentUploadForm, 'POST')
126     @hglibrary
127     def create(self, request, form, lib):
128         """Create a new document."""       
129
130         if form.cleaned_data['ocr_data']:
131             data = form.cleaned_data['ocr_data']
132         else:            
133             data = request.FILES['ocr_file'].read().decode('utf-8')
134
135         if data is None:
136             return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
137
138         if form.cleaned_data['generate_dc']:
139             data = librarian.wrap_text(data, unicode(date.today()))
140
141         docid = form.cleaned_data['bookname']
142
143         try:
144             lock = lib.lock()            
145             try:
146                 log.info("DOCID %s", docid)
147                 doc = lib.document_create(docid)
148                 # document created, but no content yet
149                 try:
150                     doc = doc.quickwrite('xml', data.encode('utf-8'),
151                         '$AUTO$ XML data uploaded.', user=request.user.username)
152                 except Exception,e:
153                     import traceback
154                     # rollback branch creation
155                     lib._rollback()
156                     raise LibraryException(traceback.format_exc())
157
158                 url = reverse('document_view', args=[doc.id])
159
160                 return response.EntityCreated().django_response(\
161                     body = {
162                         'url': url,
163                         'name': doc.id,
164                         'revision': doc.revision },
165                     url = url )            
166             finally:
167                 lock.release()
168         except LibraryException, e:
169             import traceback
170             return response.InternalError().django_response({
171                 "reason": traceback.format_exc()
172             })
173         except DocumentAlreadyExists:
174             # Document is already there
175             return response.EntityConflict().django_response({
176                 "reason": "already-exists",
177                 "message": "Document already exists." % docid
178             })
179
180 #
181 # Document Handlers
182 #
183 class BasicDocumentHandler(AnonymousBaseHandler):
184     allowed_methods = ('GET',)
185
186     @hglibrary
187     def read(self, request, docid, lib):
188         try:    
189             doc = lib.document(docid)
190         except RevisionNotFound:
191             return rc.NOT_FOUND
192
193         result = {
194             'name': doc.id,
195             'html_url': reverse('dochtml_view', args=[doc.id]),
196             'text_url': reverse('doctext_view', args=[doc.id]),
197             'dc_url': reverse('docdc_view', args=[doc.id]),
198             'public_revision': doc.revision,
199         }
200
201         return result
202
203
204 class DiffHandler(BaseHandler):
205     allowed_methods = ('GET',)
206     
207     @hglibrary
208     def read(self, request, source_revision, target_revision, lib):
209         '''Return diff between source_revision and target_revision)'''
210         source_document = lib.document_for_rev(source_revision)
211         target_document = lib.document_for_rev(target_revision)
212         print source_document,
213         print target_document
214         diff = difflib.unified_diff(
215             source_document.data('xml').splitlines(True),
216             target_document.data('xml').splitlines(True),
217             'source',
218             'target')
219         
220         return ''.join(list(diff))
221
222
223 #
224 # Document Meta Data
225 #
226 class DocumentHandler(BaseHandler):
227     allowed_methods = ('GET', 'PUT')
228     anonymous = BasicDocumentHandler
229
230     @validate_form(forms.DocumentRetrieveForm, 'GET')
231     @hglibrary
232     def read(self, request, form, docid, lib):
233         """Read document's meta data"""       
234         log.info(u"User '%s' wants to %s(%s) as %s" % \
235             (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
236
237         user = form.cleaned_data['user'] or request.user.username
238         rev = form.cleaned_data['revision'] or 'latest'
239
240         for error in check_user(request, user):
241             return error
242             
243         try:
244             doc = lib.document(docid, user, rev=rev)
245         except RevisionMismatch, e:
246             # the document exists, but the revision is bad
247             return response.EntityNotFound().django_response({
248                 'reason': 'revision-mismatch',
249                 'message': e.message,
250                 'docid': docid,
251                 'user': user,
252             })
253         except RevisionNotFound, e:
254             # the user doesn't have this document checked out
255             # or some other weird error occured
256             # try to do the checkout
257             try:
258                 if user == request.user.username:
259                     mdoc = lib.document(docid)
260                     doc = mdoc.take(user)
261                 elif is_prq(user):
262                     prq = prq_for_user(user)
263                     # commiter's document
264                     prq_doc = lib.document_for_rev(prq.source_revision)
265                     doc = prq_doc.take(user)
266                 else:
267                     return response.EntityNotFound().django_response({
268                         'reason': 'document-not-found',
269                         'message': e.message,
270                         'docid': docid,
271                         'user': user,
272                     })
273             except RevisionNotFound, e:
274                 return response.EntityNotFound().django_response({
275                     'reason': 'document-not-found',
276                     'message': e.message,
277                     'docid': docid,
278                     'user': user
279                 })
280
281         return {
282             'name': doc.id,
283             'user': user,
284             'html_url': reverse('dochtml_view', args=[doc.id]),
285             'text_url': reverse('doctext_view', args=[doc.id]),
286             # 'dc_url': reverse('docdc_view', args=[doc.id]),
287             'gallery_url': reverse('docgallery_view', args=[doc.id]),
288             'merge_url': reverse('docmerge_view', args=[doc.id]),
289             'revision': doc.revision,
290             'timestamp': doc.revision.timestamp,
291             # 'public_revision': doc.revision,
292             # 'public_timestamp': doc.revision.timestamp,
293         }   
294
295     
296 #    @hglibrary
297 #    def update(self, request, docid, lib):
298 #        """Update information about the document, like display not"""
299 #        return
300 #
301 #
302 #
303 class DocumentHTMLHandler(BaseHandler):
304     allowed_methods = ('GET')
305
306     @validate_form(forms.DocumentRetrieveForm, 'GET')
307     @hglibrary
308     def read(self, request, form, docid, lib, stylesheet='partial'):
309         """Read document as html text"""
310         try:
311             revision = form.cleaned_data['revision']
312             user = form.cleaned_data['user'] or request.user.username
313             document = lib.document_for_rev(revision)
314
315             if document.id != docid:
316                 return response.BadRequest().django_response({
317                     'reason': 'name-mismatch',
318                     'message': 'Provided revision is not valid for this document'
319                 })
320
321             if document.owner != user:
322                 return response.BadRequest().django_response({
323                     'reason': 'user-mismatch',
324                     'message': "Provided revision doesn't belong to user %s" % user
325                 })
326
327             for error in check_user(request, user):
328                 return error
329
330             return librarian.html.transform(document.data('xml'), is_file=False, \
331                 parse_dublincore=False, stylesheet=stylesheet,\
332                 options={
333                     "with-paths": 'boolean(1)',                    
334                 })
335                 
336         except (EntryNotFound, RevisionNotFound), e:
337             return response.EntityNotFound().django_response({
338                 'reason': 'not-found', 'message': e.message})
339         except librarian.ParseError, e:
340             return response.InternalError().django_response({
341                 'reason': 'xml-parse-error', 'message': e.message })
342
343 #
344 # Image Gallery
345 #
346
347 class DocumentGalleryHandler(BaseHandler):
348     allowed_methods = ('GET')
349     
350     
351     def read(self, request, docid):
352         """Read meta-data about scans for gallery of this document."""
353         galleries = []
354         from urllib import quote
355
356         for assoc in GalleryForDocument.objects.filter(document=docid):
357             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
358
359             if not os.path.isdir(dirpath):
360                 log.warn(u"[WARNING]: missing gallery %s", dirpath)
361                 continue
362
363             gallery = {'name': assoc.name, 'pages': []}
364             
365             for file in os.listdir(dirpath):
366                 if not isinstance(file, unicode):
367                     try:
368                         file = file.decode('utf-8')
369                     except:
370                         log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
371                             % (file, dirpath) )
372                         file = None
373
374                 if file is not None:
375                     name, ext = os.path.splitext(os.path.basename(file))
376
377                     if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
378                         log.warn(u"Ignoring: %s %s", name, ext)
379                         url = None
380
381                     url = settings.MEDIA_URL + assoc.subpath + u'/' + file
382                 
383                 if url is None:
384                     url = settings.MEDIA_URL + u'/missing.png'
385                     
386                 gallery['pages'].append( quote(url.encode('utf-8')) )
387
388 #            gallery['pages'].sort()
389             galleries.append(gallery)
390
391         return galleries
392
393
394
395 #
396 # Dublin Core handlers
397 #
398 # @requires librarian
399 #
400 #class DocumentDublinCoreHandler(BaseHandler):
401 #    allowed_methods = ('GET', 'POST')
402 #
403 #    @hglibrary
404 #    def read(self, request, docid, lib):
405 #        """Read document as raw text"""
406 #        try:
407 #            revision = request.GET.get('revision', 'latest')
408 #
409 #            if revision == 'latest':
410 #                doc = lib.document(docid)
411 #            else:
412 #                doc = lib.document_for_rev(revision)
413 #
414 #
415 #            if document.id != docid:
416 #                return response.BadRequest().django_response({'reason': 'name-mismatch',
417 #                    'message': 'Provided revision is not valid for this document'})
418 #
419 #            bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
420 #            return bookinfo.serialize()
421 #        except (EntryNotFound, RevisionNotFound), e:
422 #            return response.EntityNotFound().django_response({
423 #                'exception': type(e), 'message': e.message})
424 #
425 #    @hglibrary
426 #    def create(self, request, docid, lib):
427 #        try:
428 #            bi_json = request.POST['contents']
429 #            revision = request.POST['revision']
430 #
431 #            if request.POST.has_key('message'):
432 #                msg = u"$USER$ " + request.PUT['message']
433 #            else:
434 #                msg = u"$AUTO$ Dublin core update."
435 #
436 #            current = lib.document(docid, request.user.username)
437 #            orig = lib.document_for_rev(revision)
438 #
439 #            if current != orig:
440 #                return response.EntityConflict().django_response({
441 #                        "reason": "out-of-date",
442 #                        "provided": orig.revision,
443 #                        "latest": current.revision })
444 #
445 #            xmldoc = parser.WLDocument.from_string(current.data('xml'))
446 #            document.book_info = dcparser.BookInfo.from_json(bi_json)
447 #
448 #            # zapisz
449 #            ndoc = current.quickwrite('xml', \
450 #                document.serialize().encode('utf-8'),\
451 #                message=msg, user=request.user.username)
452 #
453 #            try:
454 #                # return the new revision number
455 #                return {
456 #                    "document": ndoc.id,
457 #                    "subview": "dc",
458 #                    "previous_revision": current.revision,
459 #                    "revision": ndoc.revision,
460 #                    'timestamp': ndoc.revision.timestamp,
461 #                    "url": reverse("docdc_view", args=[ndoc.id])
462 #                }
463 #            except Exception, e:
464 #                if ndoc: lib._rollback()
465 #                raise e
466 #        except RevisionNotFound:
467 #            return response.EntityNotFound().django_response()
468
469 class MergeHandler(BaseHandler):
470     allowed_methods = ('POST',)
471
472     @validate_form(forms.MergeRequestForm, 'POST')
473     @hglibrary
474     def create(self, request, form, docid, lib):
475         """Create a new document revision from the information provided by user"""
476         revision = form.cleaned_data['revision']
477
478         # fetch the main branch document
479         doc = lib.document(docid)
480
481         # fetch the base document
482         user_doc = lib.document_for_rev(revision)
483         base_doc = user_doc.latest()
484
485         if base_doc != user_doc:
486             return response.EntityConflict().django_response({
487                 "reason": "out-of-date",
488                 "provided": str(user_doc.revision),
489                 "latest": str(base_doc.revision)
490             })      
491
492         if form.cleaned_data['type'] == 'update':
493             # update is always performed from the file branch
494             # to the user branch
495             user_doc_new = base_doc.update(request.user.username)
496
497             if user_doc_new == user_doc:
498                 return response.SuccessAllOk().django_response({
499                     "result": "no-op"
500                 })
501                 
502             # shared document is the same
503             doc_new = doc
504
505         if form.cleaned_data['type'] == 'share':
506             if not base_doc.up_to_date():
507                 return response.BadRequest().django_response({
508                     "reason": "not-fast-forward",
509                     "message": "You must first update your branch to the latest version."
510                 })
511
512             if base_doc.parentof(doc) or base_doc.has_parent_from(doc):
513                 return response.SuccessAllOk().django_response({
514                     "result": "no-op"
515                 })
516
517             # check for unresolved conflicts            
518             if base_doc.has_conflict_marks():
519                 return response.BadRequest().django_response({                    
520                     "reason": "unresolved-conflicts",
521                     "message": "There are unresolved conflicts in your file. Fix them, and try again."
522                 })
523
524             if not request.user.has_perm('api.share_document'):
525                 # User is not permitted to make a merge, right away
526                 # So we instead create a pull request in the database
527                 try:
528                     prq, created = PullRequest.objects.get_or_create(
529                         comitter = request.user,
530                         document = docid,
531                         status = "N",
532                         defaults = {
533                             'source_revision': str(base_doc.revision),
534                             'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
535                         }
536                     )
537
538                     # there can't be 2 pending request from same user
539                     # for the same document
540                     if not created:
541                         prq.source_revision = str(base_doc.revision)
542                         prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
543                         prq.save()
544
545                     return response.RequestAccepted().django_response(\
546                         ticket_status=prq.status, \
547                         ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
548                 except IntegrityError:
549                     return response.EntityConflict().django_response({
550                         'reason': 'request-already-exist'
551                     })
552
553             changed = base_doc.share(form.cleaned_data['message'])
554
555             # update shared version if needed
556             if changed:
557                 doc_new = doc.latest()
558             else:
559                 doc_new = doc
560
561             # the user wersion is the same
562             user_doc_new = base_doc
563
564         # The client can compare parent_revision to revision
565         # to see if he needs to update user's view        
566         # Same goes for shared view
567         
568         return response.SuccessAllOk().django_response({
569             "result": "success",
570             "name": user_doc_new.id,
571             "user": user_doc_new.owner,
572
573             "revision": user_doc_new.revision,
574             'timestamp': user_doc_new.revision.timestamp,
575
576             "parent_revision": user_doc.revision,
577             "parent_timestamp": user_doc.revision.timestamp,
578
579             "shared_revision": doc_new.revision,
580             "shared_timestamp": doc_new.revision.timestamp,
581
582             "shared_parent_revision": doc.revision,
583             "shared_parent_timestamp": doc.revision.timestamp,
584         })