562ed1b3bd830e527066d0bca053fe3d35d9e101
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2 import os.path
3
4 import logging
5 log = logging.getLogger('platforma.api.library')
6
7 __author__= "Ɓukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
10
11 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from django.http import HttpResponse
13
14 from datetime import date
15
16 from django.core.urlresolvers import reverse
17 from django.db import IntegrityError
18
19 import librarian
20 import librarian.html
21 import difflib
22 from librarian import dcparser, parser
23
24 from wlrepo import *
25 from api.models import PullRequest
26 from explorer.models import GalleryForDocument
27
28 # internal imports
29 import api.forms as forms
30 import api.response as response
31 from api.utils import validate_form, hglibrary, natural_order
32 from api.models import PartCache, PullRequest
33
34 #
35 import settings
36
37
38 def is_prq(username):
39     return username.startswith('$prq-')
40
41 def prq_for_user(username):
42     try:
43         return PullRequest.objects.get(id=int(username[5:]))
44     except:
45         return None
46
47 def check_user(request, user):
48     log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
49     #pull request
50     if is_prq(user):
51         if not request.user.has_perm('api.view_prq'):
52             yield response.AccessDenied().django_response({
53                 'reason': 'access-denied',
54                 'message': "You don't have enough priviliges to view pull requests."
55             })
56     # other users
57     elif request.user.username != user:
58         if not request.user.has_perm('api.view_other_document'):
59             yield response.AccessDenied().django_response({
60                 'reason': 'access-denied',
61                 'message': "You don't have enough priviliges to view other people's document."
62             })
63     pass
64
65 #
66 # Document List Handlers
67 #
68 # TODO: security check
69 class BasicLibraryHandler(AnonymousBaseHandler):
70     allowed_methods = ('GET',)
71
72     @hglibrary
73     def read(self, request, lib):
74         """Return the list of documents."""       
75         document_list = [{
76             'url': reverse('document_view', args=[docid]),
77             'name': docid } for docid in lib.documents() ]
78         return {'documents' : document_list}
79         
80 #
81 # This handler controlls the document collection
82 #
83 class LibraryHandler(BaseHandler):
84     allowed_methods = ('GET', 'POST')
85     anonymous = BasicLibraryHandler
86
87     @hglibrary
88     def read(self, request, lib):
89         """Return the list of documents."""
90
91         documents = {}
92         
93         for docid in lib.documents():            
94             documents[docid] = {
95                 'url': reverse('document_view', args=[docid]),
96                 'name': docid,
97                 'parts': []
98             }
99
100         parts = PartCache.objects.defer('part_id')\
101             .values_list('part_id', 'document_id').distinct()
102        
103         document_tree = dict(documents)
104
105         for part, docid in parts:
106             # this way, we won't display broken links
107             if not documents.has_key(part):
108                 log.info("NOT FOUND: %s", part)
109                 continue
110
111             parent = documents[docid]
112             child = documents[part]
113
114             # not top-level anymore
115             document_tree.pop(part)
116             parent['parts'].append(child)
117         
118         for doc in documents.itervalues():
119             doc['parts'].sort(key=natural_order(lambda d: d['name']))
120             
121         return {'documents': sorted(document_tree.itervalues(),
122             key=natural_order(lambda d: d['name']) ) }
123
124
125     @validate_form(forms.DocumentUploadForm, 'POST')
126     @hglibrary
127     def create(self, request, form, lib):
128         """Create a new document."""       
129
130         if form.cleaned_data['ocr_data']:
131             data = form.cleaned_data['ocr_data']
132         else:            
133             data = request.FILES['ocr_file'].read().decode('utf-8')
134
135         if data is None:
136             return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
137
138         if form.cleaned_data['generate_dc']:
139             data = librarian.wrap_text(data, unicode(date.today()))
140
141         docid = form.cleaned_data['bookname']
142
143         try:
144             lock = lib.lock()            
145             try:
146                 log.info("DOCID %s", docid)
147                 doc = lib.document_create(docid)
148                 # document created, but no content yet
149                 try:
150                     doc = doc.quickwrite('xml', data.encode('utf-8'),
151                         '$AUTO$ XML data uploaded.', user=request.user.username)
152                 except Exception,e:
153                     import traceback
154                     # rollback branch creation
155                     lib._rollback()
156                     raise LibraryException(traceback.format_exc())
157
158                 url = reverse('document_view', args=[doc.id])
159
160                 return response.EntityCreated().django_response(\
161                     body = {
162                         'url': url,
163                         'name': doc.id,
164                         'revision': doc.revision },
165                     url = url )            
166             finally:
167                 lock.release()
168         except LibraryException, e:
169             import traceback
170             return response.InternalError().django_response({
171                 "reason": traceback.format_exc()
172             })
173         except DocumentAlreadyExists:
174             # Document is already there
175             return response.EntityConflict().django_response({
176                 "reason": "already-exists",
177                 "message": "Document already exists." % docid
178             })
179
180 #
181 # Document Handlers
182 #
183 class BasicDocumentHandler(AnonymousBaseHandler):
184     allowed_methods = ('GET',)
185
186     @hglibrary
187     def read(self, request, docid, lib):
188         try:    
189             doc = lib.document(docid)
190         except RevisionNotFound:
191             return rc.NOT_FOUND
192
193         result = {
194             'name': doc.id,
195             'html_url': reverse('dochtml_view', args=[doc.id]),
196             'text_url': reverse('doctext_view', args=[doc.id]),
197             'dc_url': reverse('docdc_view', args=[doc.id]),
198             'public_revision': doc.revision,
199         }
200
201         return result
202
203
204 class DiffHandler(BaseHandler):
205     allowed_methods = ('GET',)
206     
207     @hglibrary
208     def read(self, request, source_revision, target_revision, lib):
209         '''Return diff between source_revision and target_revision)'''
210         source_document = lib.document_for_rev(source_revision)
211         target_document = lib.document_for_rev(target_revision)
212         print source_document,
213         print target_document
214         diff = difflib.unified_diff(
215             source_document.data('xml').splitlines(True),
216             target_document.data('xml').splitlines(True),
217             'source',
218             'target')
219         
220         return ''.join(list(diff))
221
222
223 #
224 # Document Meta Data
225 #
226 class DocumentHandler(BaseHandler):
227     allowed_methods = ('GET', 'PUT')
228     anonymous = BasicDocumentHandler
229
230     @validate_form(forms.DocumentRetrieveForm, 'GET')
231     @hglibrary
232     def read(self, request, form, docid, lib):
233         """Read document's meta data"""       
234         log.info(u"User '%s' wants to %s(%s) as %s" % \
235             (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
236
237         user = form.cleaned_data['user'] or request.user.username
238         rev = form.cleaned_data['revision'] or 'latest'
239
240         for error in check_user(request, user):
241             return error
242             
243         try:
244             doc = lib.document(docid, user, rev=rev)
245         except RevisionMismatch, e:
246             # the document exists, but the revision is bad
247             return response.EntityNotFound().django_response({
248                 'reason': 'revision-mismatch',
249                 'message': e.message,
250                 'docid': docid,
251                 'user': user,
252             })
253         except RevisionNotFound, e:
254             # the user doesn't have this document checked out
255             # or some other weird error occured
256             # try to do the checkout
257             try:
258                 if user == request.user.username:
259                     mdoc = lib.document(docid)
260                     doc = mdoc.take(user)
261                 elif is_prq(user):
262                     prq = prq_for_user(user)
263                     # commiter's document
264                     prq_doc = lib.document_for_rev(prq.source_revision)
265                     doc = prq_doc.take(user)
266                 else:
267                     return response.EntityNotFound().django_response({
268                         'reason': 'document-not-found',
269                         'message': e.message,
270                         'docid': docid,
271                         'user': user,
272                     })
273             except RevisionNotFound, e:
274                 return response.EntityNotFound().django_response({
275                     'reason': 'document-not-found',
276                     'message': e.message,
277                     'docid': docid,
278                     'user': user
279                 })
280
281         return {
282             'name': doc.id,
283             'user': user,
284             'html_url': reverse('dochtml_view', args=[doc.id]),
285             'text_url': reverse('doctext_view', args=[doc.id]),
286             # 'dc_url': reverse('docdc_view', args=[doc.id]),
287             'gallery_url': reverse('docgallery_view', args=[doc.id]),
288             'merge_url': reverse('docmerge_view', args=[doc.id]),
289             'revision': doc.revision,
290             'timestamp': doc.revision.timestamp,
291             # 'public_revision': doc.revision,
292             # 'public_timestamp': doc.revision.timestamp,
293         }   
294
295     
296 #    @hglibrary
297 #    def update(self, request, docid, lib):
298 #        """Update information about the document, like display not"""
299 #        return
300 #
301 #
302 #
303 class DocumentHTMLHandler(BaseHandler):
304     allowed_methods = ('GET')
305
306     @validate_form(forms.DocumentRetrieveForm, 'GET')
307     @hglibrary
308     def read(self, request, form, docid, lib, stylesheet='partial'):
309         """Read document as html text"""
310         try:
311             revision = form.cleaned_data['revision']
312             user = form.cleaned_data['user'] or request.user.username
313             document = lib.document_for_rev(revision)
314
315             if document.id != docid:
316                 return response.BadRequest().django_response({
317                     'reason': 'name-mismatch',
318                     'message': 'Provided revision is not valid for this document'
319                 })
320
321             if document.owner != user:
322                 return response.BadRequest().django_response({
323                     'reason': 'user-mismatch',
324                     'message': "Provided revision doesn't belong to user %s" % user
325                 })
326
327             for error in check_user(request, user):
328                 return error
329
330             return librarian.html.transform(document.data('xml'), is_file=False, \
331                 parse_dublincore=False, stylesheet='full',\
332                 options={
333                     "with-paths": 'boolean(1)',                    
334                 })
335                 
336         except (EntryNotFound, RevisionNotFound), e:
337             return response.EntityNotFound().django_response({
338                 'reason': 'not-found', 'message': e.message})
339         except librarian.ValidationError, e:
340             return response.InternalError().django_response({
341                 'reason': 'xml-non-valid', 'message': e.message })
342         except librarian.ParseError, e:
343             return response.InternalError().django_response({
344                 'reason': 'xml-parse-error', 'message': e.message })
345
346 #
347 # Image Gallery
348 #
349
350 class DocumentGalleryHandler(BaseHandler):
351     allowed_methods = ('GET')
352     
353     
354     def read(self, request, docid):
355         """Read meta-data about scans for gallery of this document."""
356         galleries = []
357         from urllib import quote
358
359         for assoc in GalleryForDocument.objects.filter(document=docid):
360             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
361
362             if not os.path.isdir(dirpath):
363                 log.warn(u"[WARNING]: missing gallery %s", dirpath)
364                 continue
365
366             gallery = {'name': assoc.name, 'pages': []}
367             
368             for file in os.listdir(dirpath):
369                 if not isinstance(file, unicode):
370                     try:
371                         file = file.decode('utf-8')
372                     except:
373                         log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
374                             % (file, dirpath) )
375                         file = None
376
377                 if file is not None:
378                     name, ext = os.path.splitext(os.path.basename(file))
379
380                     if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
381                         log.warn(u"Ignoring: %s %s", name, ext)
382                         url = None
383
384                     url = settings.MEDIA_URL + assoc.subpath + u'/' + file
385                 
386                 if url is None:
387                     url = settings.MEDIA_URL + u'/missing.png'
388                     
389                 gallery['pages'].append( quote(url.encode('utf-8')) )
390
391             gallery['pages'].sort()
392             galleries.append(gallery)
393
394         return galleries
395
396
397
398 #
399 # Dublin Core handlers
400 #
401 # @requires librarian
402 #
403 #class DocumentDublinCoreHandler(BaseHandler):
404 #    allowed_methods = ('GET', 'POST')
405 #
406 #    @hglibrary
407 #    def read(self, request, docid, lib):
408 #        """Read document as raw text"""
409 #        try:
410 #            revision = request.GET.get('revision', 'latest')
411 #
412 #            if revision == 'latest':
413 #                doc = lib.document(docid)
414 #            else:
415 #                doc = lib.document_for_rev(revision)
416 #
417 #
418 #            if document.id != docid:
419 #                return response.BadRequest().django_response({'reason': 'name-mismatch',
420 #                    'message': 'Provided revision is not valid for this document'})
421 #
422 #            bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
423 #            return bookinfo.serialize()
424 #        except (EntryNotFound, RevisionNotFound), e:
425 #            return response.EntityNotFound().django_response({
426 #                'exception': type(e), 'message': e.message})
427 #
428 #    @hglibrary
429 #    def create(self, request, docid, lib):
430 #        try:
431 #            bi_json = request.POST['contents']
432 #            revision = request.POST['revision']
433 #
434 #            if request.POST.has_key('message'):
435 #                msg = u"$USER$ " + request.PUT['message']
436 #            else:
437 #                msg = u"$AUTO$ Dublin core update."
438 #
439 #            current = lib.document(docid, request.user.username)
440 #            orig = lib.document_for_rev(revision)
441 #
442 #            if current != orig:
443 #                return response.EntityConflict().django_response({
444 #                        "reason": "out-of-date",
445 #                        "provided": orig.revision,
446 #                        "latest": current.revision })
447 #
448 #            xmldoc = parser.WLDocument.from_string(current.data('xml'))
449 #            document.book_info = dcparser.BookInfo.from_json(bi_json)
450 #
451 #            # zapisz
452 #            ndoc = current.quickwrite('xml', \
453 #                document.serialize().encode('utf-8'),\
454 #                message=msg, user=request.user.username)
455 #
456 #            try:
457 #                # return the new revision number
458 #                return {
459 #                    "document": ndoc.id,
460 #                    "subview": "dc",
461 #                    "previous_revision": current.revision,
462 #                    "revision": ndoc.revision,
463 #                    'timestamp': ndoc.revision.timestamp,
464 #                    "url": reverse("docdc_view", args=[ndoc.id])
465 #                }
466 #            except Exception, e:
467 #                if ndoc: lib._rollback()
468 #                raise e
469 #        except RevisionNotFound:
470 #            return response.EntityNotFound().django_response()
471
472 class MergeHandler(BaseHandler):
473     allowed_methods = ('POST',)
474
475     @validate_form(forms.MergeRequestForm, 'POST')
476     @hglibrary
477     def create(self, request, form, docid, lib):
478         """Create a new document revision from the information provided by user"""
479         revision = form.cleaned_data['revision']
480
481         # fetch the main branch document
482         doc = lib.document(docid)
483
484         # fetch the base document
485         user_doc = lib.document_for_rev(revision)
486         base_doc = user_doc.latest()
487
488         if base_doc != user_doc:
489             return response.EntityConflict().django_response({
490                 "reason": "out-of-date",
491                 "provided": str(user_doc.revision),
492                 "latest": str(base_doc.revision)
493             })      
494
495         if form.cleaned_data['type'] == 'update':
496             # update is always performed from the file branch
497             # to the user branch
498             user_doc_new = base_doc.update(request.user.username)
499
500             if user_doc_new == user_doc:
501                 return response.SuccessAllOk().django_response({
502                     "result": "no-op"
503                 })
504                 
505             # shared document is the same
506             doc_new = doc
507
508         if form.cleaned_data['type'] == 'share':
509             if not base_doc.up_to_date():
510                 return response.BadRequest().django_response({
511                     "reason": "not-fast-forward",
512                     "message": "You must first update your branch to the latest version."
513                 })
514
515             if not base_doc.would_share():
516                 return response.SuccessAllOk().django_response({
517                     "result": "no-op"
518                 })
519
520             # check for unresolved conflicts            
521             if base_doc.has_conflict_marks():
522                 return response.BadRequest().django_response({                    
523                     "reason": "unresolved-conflicts",
524                     "message": "There are unresolved conflicts in your file. Fix them, and try again."
525                 })
526
527             if not request.user.has_perm('api.share_document'):
528                 # User is not permitted to make a merge, right away
529                 # So we instead create a pull request in the database
530                 try:
531                     prq, created = PullRequest.objects.get_or_create(
532                         comitter = request.user,
533                         document = docid,
534                         status = "N",
535                         defaults = {
536                             'source_revision': str(base_doc.revision),
537                             'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
538                         }
539                     )
540
541                     # there can't be 2 pending request from same user
542                     # for the same document
543                     if not created:
544                         prq.source_revision = str(base_doc.revision)
545                         prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
546                         prq.save()
547
548                     return response.RequestAccepted().django_response(\
549                         ticket_status=prq.status, \
550                         ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
551                 except IntegrityError:
552                     return response.EntityConflict().django_response({
553                         'reason': 'request-already-exist'
554                     })
555
556             changed = base_doc.share(form.cleaned_data['message'])
557
558             # update shared version if needed
559             if changed:
560                 doc_new = doc.latest()
561             else:
562                 doc_new = doc
563
564             # the user wersion is the same
565             user_doc_new = base_doc
566
567         # The client can compare parent_revision to revision
568         # to see if he needs to update user's view        
569         # Same goes for shared view
570         
571         return response.SuccessAllOk().django_response({
572             "result": "success",
573             "name": user_doc_new.id,
574             "user": user_doc_new.owner,
575
576             "revision": user_doc_new.revision,
577             'timestamp': user_doc_new.revision.timestamp,
578
579             "parent_revision": user_doc.revision,
580             "parent_timestamp": user_doc.revision.timestamp,
581
582             "shared_revision": doc_new.revision,
583             "shared_timestamp": doc_new.revision.timestamp,
584
585             "shared_parent_revision": doc.revision,
586             "shared_parent_timestamp": doc.revision.timestamp,
587         })