Merge branch 'master' of stigma.nowoczesnapolska.org.pl:platforma
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2 import os.path
3
4 import logging
5 log = logging.getLogger('platforma.api.library')
6
7 __author__= "Ɓukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
10
11 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from django.http import HttpResponse
13
14 from datetime import date
15
16 from django.core.urlresolvers import reverse
17 from django.db import IntegrityError
18
19 import librarian
20 import librarian.html
21 import difflib
22 from librarian import dcparser, parser
23
24 from wlrepo import *
25 from api.models import PullRequest
26 from explorer.models import GalleryForDocument
27
28 # internal imports
29 import api.forms as forms
30 import api.response as response
31 from api.utils import validate_form, hglibrary, natural_order
32 from api.models import PartCache, PullRequest
33
34 from pygments import highlight
35 from pygments.lexers import DiffLexer
36 from pygments.formatters import HtmlFormatter
37
38 #
39 import settings
40
41
42 def is_prq(username):
43     return username.startswith('$prq-')
44
45 def prq_for_user(username):
46     try:
47         return PullRequest.objects.get(id=int(username[5:]))
48     except:
49         return None
50
51 def check_user(request, user):
52     log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
53     #pull request
54     if is_prq(user):
55         if not request.user.has_perm('api.view_prq'):
56             yield response.AccessDenied().django_response({
57                 'reason': 'access-denied',
58                 'message': "You don't have enough priviliges to view pull requests."
59             })
60     # other users
61     elif request.user.username != user:
62         if not request.user.has_perm('api.view_other_document'):
63             yield response.AccessDenied().django_response({
64                 'reason': 'access-denied',
65                 'message': "You don't have enough priviliges to view other people's document."
66             })
67     pass
68
69 #
70 # Document List Handlers
71 #
72 # TODO: security check
73 class BasicLibraryHandler(AnonymousBaseHandler):
74     allowed_methods = ('GET',)
75
76     @hglibrary
77     def read(self, request, lib):
78         """Return the list of documents."""       
79         document_list = [{
80             'url': reverse('document_view', args=[docid]),
81             'name': docid } for docid in lib.documents() ]
82         return {'documents' : document_list}
83         
84 #
85 # This handler controlls the document collection
86 #
87 class LibraryHandler(BaseHandler):
88     allowed_methods = ('GET', 'POST')
89     anonymous = BasicLibraryHandler
90
91     @hglibrary
92     def read(self, request, lib):
93         """Return the list of documents."""
94
95         documents = {}
96         
97         for docid in lib.documents():            
98             documents[docid] = {
99                 'url': reverse('document_view', args=[docid]),
100                 'name': docid,
101                 'parts': []
102             }
103
104         parts = PartCache.objects.defer('part_id')\
105             .values_list('part_id', 'document_id').distinct()
106        
107         document_tree = dict(documents)
108
109         for part, docid in parts:
110             # this way, we won't display broken links
111             if not documents.has_key(part):
112                 log.info("NOT FOUND: %s", part)
113                 continue
114
115             parent = documents[docid]
116             child = documents[part]
117
118             # not top-level anymore
119             document_tree.pop(part)
120             parent['parts'].append(child)
121         
122         for doc in documents.itervalues():
123             doc['parts'].sort(key=natural_order(lambda d: d['name']))
124             
125         return {'documents': sorted(document_tree.itervalues(),
126             key=natural_order(lambda d: d['name']) ) }
127
128
129     @validate_form(forms.DocumentUploadForm, 'POST')
130     @hglibrary
131     def create(self, request, form, lib):
132         """Create a new document."""       
133
134         if form.cleaned_data['ocr_data']:
135             data = form.cleaned_data['ocr_data']
136         else:            
137             data = request.FILES['ocr_file'].read().decode('utf-8')
138
139         if data is None:
140             return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
141
142         if form.cleaned_data['generate_dc']:
143             data = librarian.wrap_text(data, unicode(date.today()))
144
145         docid = form.cleaned_data['bookname']
146
147         try:
148             lock = lib.lock()            
149             try:
150                 log.info("DOCID %s", docid)
151                 doc = lib.document_create(docid)
152                 # document created, but no content yet
153                 try:
154                     doc = doc.quickwrite('xml', data.encode('utf-8'),
155                         '$AUTO$ XML data uploaded.', user=request.user.username)
156                 except Exception,e:
157                     import traceback
158                     # rollback branch creation
159                     lib._rollback()
160                     raise LibraryException(traceback.format_exc())
161
162                 url = reverse('document_view', args=[doc.id])
163
164                 return response.EntityCreated().django_response(\
165                     body = {
166                         'url': url,
167                         'name': doc.id,
168                         'revision': doc.revision },
169                     url = url )            
170             finally:
171                 lock.release()
172         except LibraryException, e:
173             import traceback
174             return response.InternalError().django_response({
175                 "reason": traceback.format_exc()
176             })
177         except DocumentAlreadyExists:
178             # Document is already there
179             return response.EntityConflict().django_response({
180                 "reason": "already-exists",
181                 "message": "Document already exists." % docid
182             })
183
184 #
185 # Document Handlers
186 #
187 class BasicDocumentHandler(AnonymousBaseHandler):
188     allowed_methods = ('GET',)
189
190     @hglibrary
191     def read(self, request, docid, lib):
192         try:    
193             doc = lib.document(docid)
194         except RevisionNotFound:
195             return rc.NOT_FOUND
196
197         result = {
198             'name': doc.id,
199             'html_url': reverse('dochtml_view', args=[doc.id]),
200             'text_url': reverse('doctext_view', args=[doc.id]),
201             'dc_url': reverse('docdc_view', args=[doc.id]),
202             'public_revision': doc.revision,
203         }
204
205         return result
206
207
208 class DiffHandler(BaseHandler):
209     allowed_methods = ('GET',)
210     
211     @hglibrary
212     def read(self, request, docid, lib):
213         '''Return diff between source_revision and target_revision)'''        
214         revision = request.GET.get('revision')
215         if not revision:
216             return ''
217         source_document = lib.document(docid)
218         target_document = lib.document_for_revision(revision)
219         print source_document, target_document
220         
221         diff = difflib.unified_diff(
222             source_document.data('xml').splitlines(True),
223             target_document.data('xml').splitlines(True),
224             'source',
225             'target')
226         
227         s =  ''.join(list(diff))
228         return highlight(s, DiffLexer(), HtmlFormatter(cssclass="pastie"))
229
230
231 #
232 # Document Meta Data
233 #
234 class DocumentHandler(BaseHandler):
235     allowed_methods = ('GET', 'PUT')
236     anonymous = BasicDocumentHandler
237
238     @validate_form(forms.DocumentRetrieveForm, 'GET')
239     @hglibrary
240     def read(self, request, form, docid, lib):
241         """Read document's meta data"""       
242         log.info(u"User '%s' wants to edit %s(%s) as %s" % \
243             (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
244
245         user = form.cleaned_data['user'] or request.user.username
246         rev = form.cleaned_data['revision'] or 'latest'
247
248         for error in check_user(request, user):
249             return error
250             
251         try:
252             doc = lib.document(docid, user, rev=rev)
253         except RevisionMismatch, e:
254             # the document exists, but the revision is bad
255             return response.EntityNotFound().django_response({
256                 'reason': 'revision-mismatch',
257                 'message': e.message,
258                 'docid': docid,
259                 'user': user,
260             })
261         except RevisionNotFound, e:
262             # the user doesn't have this document checked out
263             # or some other weird error occured
264             # try to do the checkout
265             try:
266                 if user == request.user.username:
267                     mdoc = lib.document(docid)
268                     doc = mdoc.take(user)
269                 elif is_prq(user):
270                     prq = prq_for_user(user)
271                     # commiter's document
272                     prq_doc = lib.document_for_revision(prq.source_revision)
273                     doc = prq_doc.take(user)
274                 else:
275                     return response.EntityNotFound().django_response({
276                         'reason': 'document-not-found',
277                         'message': e.message,
278                         'docid': docid,
279                         'user': user,
280                     })
281             except RevisionNotFound, e:
282                 return response.EntityNotFound().django_response({
283                     'reason': 'document-not-found',
284                     'message': e.message,
285                     'docid': docid,
286                     'user': user
287                 })
288
289         return {
290             'name': doc.id,
291             'user': user,
292             'html_url': reverse('dochtml_view', args=[doc.id]),
293             'text_url': reverse('doctext_view', args=[doc.id]),
294             # 'dc_url': reverse('docdc_view', args=[doc.id]),
295             'gallery_url': reverse('docgallery_view', args=[doc.id]),
296             'merge_url': reverse('docmerge_view', args=[doc.id]),
297             'revision': doc.revision,
298             'timestamp': doc.revision.timestamp,
299             # 'public_revision': doc.revision,
300             # 'public_timestamp': doc.revision.timestamp,
301         }   
302
303     
304 #    @hglibrary
305 #    def update(self, request, docid, lib):
306 #        """Update information about the document, like display not"""
307 #        return
308 #
309 #
310 #
311 class DocumentHTMLHandler(BaseHandler):
312     allowed_methods = ('GET')
313
314     @validate_form(forms.DocumentRetrieveForm, 'GET')
315     @hglibrary
316     def read(self, request, form, docid, lib, stylesheet='partial'):
317         """Read document as html text"""
318         try:
319             revision = form.cleaned_data['revision']
320             user = form.cleaned_data['user'] or request.user.username
321             document = lib.document_for_revision(revision)
322
323             if document.id != docid:
324                 return response.BadRequest().django_response({
325                     'reason': 'name-mismatch',
326                     'message': 'Provided revision is not valid for this document'
327                 })
328
329             if document.owner != user:
330                 return response.BadRequest().django_response({
331                     'reason': 'user-mismatch',
332                     'message': "Provided revision doesn't belong to user %s" % user
333                 })
334
335             for error in check_user(request, user):
336                 return error
337
338             return librarian.html.transform(document.data('xml'), is_file=False, \
339                 parse_dublincore=False, stylesheet='full',\
340                 options={
341                     "with-paths": 'boolean(1)',                    
342                 })
343                 
344         except (EntryNotFound, RevisionNotFound), e:
345             return response.EntityNotFound().django_response({
346                 'reason': 'not-found', 'message': e.message})
347         except librarian.ValidationError, e:
348             return response.InternalError().django_response({
349                 'reason': 'xml-non-valid', 'message': e.message or u''})
350         except librarian.ParseError, e:
351             return response.InternalError().django_response({
352                 'reason': 'xml-parse-error', 'message': e.message or u'' })
353
354 #
355 # Image Gallery
356 #
357
358 class DocumentGalleryHandler(BaseHandler):
359     allowed_methods = ('GET')
360     
361     
362     def read(self, request, docid):
363         """Read meta-data about scans for gallery of this document."""
364         galleries = []
365         from urllib import quote
366
367         for assoc in GalleryForDocument.objects.filter(document=docid):
368             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
369
370             if not os.path.isdir(dirpath):
371                 log.warn(u"[WARNING]: missing gallery %s", dirpath)
372                 continue
373
374             gallery = {'name': assoc.name, 'pages': []}
375             
376             for file in os.listdir(dirpath):
377                 if not isinstance(file, unicode):
378                     try:
379                         file = file.decode('utf-8')
380                     except:
381                         log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
382                             % (file, dirpath) )
383                         file = None
384
385                 if file is not None:
386                     name, ext = os.path.splitext(os.path.basename(file))
387
388                     if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
389                         log.warn(u"Ignoring: %s %s", name, ext)
390                         url = None
391
392                     url = settings.MEDIA_URL + assoc.subpath + u'/' + file
393                 
394                 if url is None:
395                     url = settings.MEDIA_URL + u'/missing.png'
396                     
397                 gallery['pages'].append( quote(url.encode('utf-8')) )
398
399             gallery['pages'].sort()
400             galleries.append(gallery)
401
402         return galleries
403
404
405
406 #
407 # Dublin Core handlers
408 #
409 # @requires librarian
410 #
411 #class DocumentDublinCoreHandler(BaseHandler):
412 #    allowed_methods = ('GET', 'POST')
413 #
414 #    @hglibrary
415 #    def read(self, request, docid, lib):
416 #        """Read document as raw text"""
417 #        try:
418 #            revision = request.GET.get('revision', 'latest')
419 #
420 #            if revision == 'latest':
421 #                doc = lib.document(docid)
422 #            else:
423 #                doc = lib.document_for_revision(revision)
424 #
425 #
426 #            if document.id != docid:
427 #                return response.BadRequest().django_response({'reason': 'name-mismatch',
428 #                    'message': 'Provided revision is not valid for this document'})
429 #
430 #            bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
431 #            return bookinfo.serialize()
432 #        except (EntryNotFound, RevisionNotFound), e:
433 #            return response.EntityNotFound().django_response({
434 #                'exception': type(e), 'message': e.message})
435 #
436 #    @hglibrary
437 #    def create(self, request, docid, lib):
438 #        try:
439 #            bi_json = request.POST['contents']
440 #            revision = request.POST['revision']
441 #
442 #            if request.POST.has_key('message'):
443 #                msg = u"$USER$ " + request.PUT['message']
444 #            else:
445 #                msg = u"$AUTO$ Dublin core update."
446 #
447 #            current = lib.document(docid, request.user.username)
448 #            orig = lib.document_for_revision(revision)
449 #
450 #            if current != orig:
451 #                return response.EntityConflict().django_response({
452 #                        "reason": "out-of-date",
453 #                        "provided": orig.revision,
454 #                        "latest": current.revision })
455 #
456 #            xmldoc = parser.WLDocument.from_string(current.data('xml'))
457 #            document.book_info = dcparser.BookInfo.from_json(bi_json)
458 #
459 #            # zapisz
460 #            ndoc = current.quickwrite('xml', \
461 #                document.serialize().encode('utf-8'),\
462 #                message=msg, user=request.user.username)
463 #
464 #            try:
465 #                # return the new revision number
466 #                return {
467 #                    "document": ndoc.id,
468 #                    "subview": "dc",
469 #                    "previous_revision": current.revision,
470 #                    "revision": ndoc.revision,
471 #                    'timestamp': ndoc.revision.timestamp,
472 #                    "url": reverse("docdc_view", args=[ndoc.id])
473 #                }
474 #            except Exception, e:
475 #                if ndoc: lib._rollback()
476 #                raise e
477 #        except RevisionNotFound:
478 #            return response.EntityNotFound().django_response()
479
480 class MergeHandler(BaseHandler):
481     allowed_methods = ('POST',)
482
483     @validate_form(forms.MergeRequestForm, 'POST')
484     @hglibrary
485     def create(self, request, form, docid, lib):
486         """Create a new document revision from the information provided by user"""
487         revision = form.cleaned_data['revision']
488
489         # fetch the main branch document
490         doc = lib.document(docid)
491
492         # fetch the base document
493         user_doc = lib.document_for_revision(revision)
494         base_doc = user_doc.latest()
495
496         if base_doc != user_doc:
497             return response.EntityConflict().django_response({
498                 "reason": "out-of-date",
499                 "provided": str(user_doc.revision),
500                 "latest": str(base_doc.revision)
501             })      
502
503         if form.cleaned_data['type'] == 'update':
504             # update is always performed from the file branch
505             # to the user branch
506             user_doc_new = base_doc.update(request.user.username)
507
508             if user_doc_new == user_doc:
509                 return response.SuccessAllOk().django_response({
510                     "result": "no-op"
511                 })
512                 
513             # shared document is the same
514             doc_new = doc
515
516         if form.cleaned_data['type'] == 'share':
517             if not base_doc.up_to_date():
518                 return response.BadRequest().django_response({
519                     "reason": "not-fast-forward",
520                     "message": "You must first update your branch to the latest version."
521                 })
522
523             anwser, info = base_doc.would_share()
524                 
525             if not anwser:
526                 return response.SuccessAllOk().django_response({
527                     "result": "no-op", "message": info
528                 })
529
530             # check for unresolved conflicts            
531             if base_doc.has_conflict_marks():
532                 return response.BadRequest().django_response({                    
533                     "reason": "unresolved-conflicts",
534                     "message": "There are unresolved conflicts in your file. Fix them, and try again."
535                 })
536
537             if not request.user.has_perm('api.share_document'):
538                 # User is not permitted to make a merge, right away
539                 # So we instead create a pull request in the database
540                 try:
541                     prq, created = PullRequest.objects.get_or_create(
542                         comitter = request.user,
543                         document = docid,
544                         status = "N",
545                         defaults = {
546                             'source_revision': str(base_doc.revision),
547                             'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
548                         }
549                     )
550
551                     # there can't be 2 pending request from same user
552                     # for the same document
553                     if not created:
554                         prq.source_revision = str(base_doc.revision)
555                         prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
556                         prq.save()
557
558                     return response.RequestAccepted().django_response(\
559                         ticket_status=prq.status, \
560                         ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
561                 except IntegrityError:
562                     return response.EntityConflict().django_response({
563                         'reason': 'request-already-exist'
564                     })
565
566             changed = base_doc.share(form.cleaned_data['message'])
567
568             # update shared version if needed
569             if changed:
570                 doc_new = doc.latest()
571             else:
572                 doc_new = doc
573
574             # the user wersion is the same
575             user_doc_new = base_doc
576
577         # The client can compare parent_revision to revision
578         # to see if he needs to update user's view        
579         # Same goes for shared view
580         
581         return response.SuccessAllOk().django_response({
582             "result": "success",
583             "name": user_doc_new.id,
584             "user": user_doc_new.owner,
585
586             "revision": user_doc_new.revision,
587             'timestamp': user_doc_new.revision.timestamp,
588
589             "parent_revision": user_doc.revision,
590             "parent_timestamp": user_doc.revision.timestamp,
591
592             "shared_revision": doc_new.revision,
593             "shared_timestamp": doc_new.revision.timestamp,
594
595             "shared_parent_revision": doc.revision,
596             "shared_parent_timestamp": doc.revision.timestamp,
597         })