Style diffa.
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2 import os.path
3
4 import logging
5 log = logging.getLogger('platforma.api.library')
6
7 __author__= "Ɓukasz Rekucki"
8 __date__ = "$2009-09-25 15:49:50$"
9 __doc__ = "Module documentation."
10
11 from piston.handler import BaseHandler, AnonymousBaseHandler
12 from django.http import HttpResponse
13
14 from datetime import date
15
16 from django.core.urlresolvers import reverse
17 from django.db import IntegrityError
18
19 import librarian
20 import librarian.html
21 import difflib
22 from librarian import dcparser, parser
23
24 from wlrepo import *
25 from api.models import PullRequest
26 from explorer.models import GalleryForDocument
27
28 # internal imports
29 import api.forms as forms
30 import api.response as response
31 from api.utils import validate_form, hglibrary, natural_order
32 from api.models import PartCache, PullRequest
33
34 from pygments import highlight
35 from pygments.lexers import DiffLexer
36 from pygments.formatters import HtmlFormatter
37
38 #
39 import settings
40
41
42 def is_prq(username):
43     return username.startswith('$prq-')
44
45 def prq_for_user(username):
46     try:
47         return PullRequest.objects.get(id=int(username[5:]))
48     except:
49         return None
50
51 def check_user(request, user):
52     log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
53     #pull request
54     if is_prq(user):
55         if not request.user.has_perm('api.view_prq'):
56             yield response.AccessDenied().django_response({
57                 'reason': 'access-denied',
58                 'message': "You don't have enough priviliges to view pull requests."
59             })
60     # other users
61     elif request.user.username != user:
62         if not request.user.has_perm('api.view_other_document'):
63             yield response.AccessDenied().django_response({
64                 'reason': 'access-denied',
65                 'message': "You don't have enough priviliges to view other people's document."
66             })
67     pass
68
69 #
70 # Document List Handlers
71 #
72 # TODO: security check
73 class BasicLibraryHandler(AnonymousBaseHandler):
74     allowed_methods = ('GET',)
75
76     @hglibrary
77     def read(self, request, lib):
78         """Return the list of documents."""       
79         document_list = [{
80             'url': reverse('document_view', args=[docid]),
81             'name': docid } for docid in lib.documents() ]
82         return {'documents' : document_list}
83         
84 #
85 # This handler controlls the document collection
86 #
87 class LibraryHandler(BaseHandler):
88     allowed_methods = ('GET', 'POST')
89     anonymous = BasicLibraryHandler
90
91     @hglibrary
92     def read(self, request, lib):
93         """Return the list of documents."""
94
95         documents = {}
96         
97         for docid in lib.documents():            
98             documents[docid] = {
99                 'url': reverse('document_view', args=[docid]),
100                 'name': docid,
101                 'parts': []
102             }
103
104         parts = PartCache.objects.defer('part_id')\
105             .values_list('part_id', 'document_id').distinct()
106        
107         document_tree = dict(documents)
108
109         for part, docid in parts:
110             # this way, we won't display broken links
111             if not documents.has_key(part):
112                 log.info("NOT FOUND: %s", part)
113                 continue
114
115             parent = documents[docid]
116             child = documents[part]
117
118             # not top-level anymore
119             document_tree.pop(part)
120             parent['parts'].append(child)
121         
122         for doc in documents.itervalues():
123             doc['parts'].sort(key=natural_order(lambda d: d['name']))
124             
125         return {'documents': sorted(document_tree.itervalues(),
126             key=natural_order(lambda d: d['name']) ) }
127
128
129     @validate_form(forms.DocumentUploadForm, 'POST')
130     @hglibrary
131     def create(self, request, form, lib):
132         """Create a new document."""       
133
134         if form.cleaned_data['ocr_data']:
135             data = form.cleaned_data['ocr_data']
136         else:            
137             data = request.FILES['ocr_file'].read().decode('utf-8')
138
139         if data is None:
140             return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
141
142         if form.cleaned_data['generate_dc']:
143             data = librarian.wrap_text(data, unicode(date.today()))
144
145         docid = form.cleaned_data['bookname']
146
147         try:
148             lock = lib.lock()            
149             try:
150                 log.info("DOCID %s", docid)
151                 doc = lib.document_create(docid)
152                 # document created, but no content yet
153                 try:
154                     doc = doc.quickwrite('xml', data.encode('utf-8'),
155                         '$AUTO$ XML data uploaded.', user=request.user.username)
156                 except Exception,e:
157                     import traceback
158                     # rollback branch creation
159                     lib._rollback()
160                     raise LibraryException(traceback.format_exc())
161
162                 url = reverse('document_view', args=[doc.id])
163
164                 return response.EntityCreated().django_response(\
165                     body = {
166                         'url': url,
167                         'name': doc.id,
168                         'revision': doc.revision },
169                     url = url )            
170             finally:
171                 lock.release()
172         except LibraryException, e:
173             import traceback
174             return response.InternalError().django_response({
175                 "reason": traceback.format_exc()
176             })
177         except DocumentAlreadyExists:
178             # Document is already there
179             return response.EntityConflict().django_response({
180                 "reason": "already-exists",
181                 "message": "Document already exists." % docid
182             })
183
184 #
185 # Document Handlers
186 #
187 class BasicDocumentHandler(AnonymousBaseHandler):
188     allowed_methods = ('GET',)
189
190     @hglibrary
191     def read(self, request, docid, lib):
192         try:    
193             doc = lib.document(docid)
194         except RevisionNotFound:
195             return rc.NOT_FOUND
196
197         result = {
198             'name': doc.id,
199             'html_url': reverse('dochtml_view', args=[doc.id]),
200             'text_url': reverse('doctext_view', args=[doc.id]),
201             'dc_url': reverse('docdc_view', args=[doc.id]),
202             'public_revision': doc.revision,
203         }
204
205         return result
206
207
208 class DiffHandler(BaseHandler):
209     allowed_methods = ('GET',)
210     
211     @hglibrary
212     def read(self, request, docid, lib):
213         '''Return diff between source_revision and target_revision)'''        
214         revision = request.GET.get('revision')
215         if not revision:
216             return ''
217         source_document = lib.document(docid)
218         target_document = lib.document_for_revision(revision)
219         print source_document, target_document
220         
221         diff = difflib.unified_diff(
222             source_document.data('xml').splitlines(True),
223             target_document.data('xml').splitlines(True),
224             'source',
225             'target')
226         
227         s =  ''.join(list(diff))
228         return highlight(s, DiffLexer(), HtmlFormatter(cssclass="pastie"))
229
230
231 #
232 # Document Meta Data
233 #
234 class DocumentHandler(BaseHandler):
235     allowed_methods = ('GET', 'PUT')
236     anonymous = BasicDocumentHandler
237
238     @validate_form(forms.DocumentRetrieveForm, 'GET')
239     @hglibrary
240     def read(self, request, form, docid, lib):
241         """Read document's meta data"""       
242         log.info(u"User '%s' wants to edit %s(%s) as %s" % \
243             (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
244
245         user = form.cleaned_data['user'] or request.user.username
246         rev = form.cleaned_data['revision'] or 'latest'
247
248         for error in check_user(request, user):
249             return error
250             
251         try:
252             doc = lib.document(docid, user, rev=rev)
253         except RevisionMismatch, e:
254             # the document exists, but the revision is bad
255             return response.EntityNotFound().django_response({
256                 'reason': 'revision-mismatch',
257                 'message': e.message,
258                 'docid': docid,
259                 'user': user,
260             })
261         except RevisionNotFound, e:
262             # the user doesn't have this document checked out
263             # or some other weird error occured
264             # try to do the checkout
265             try:
266                 if user == request.user.username:
267                     mdoc = lib.document(docid)
268                     doc = mdoc.take(user)
269                 elif is_prq(user):
270                     prq = prq_for_user(user)
271                     # commiter's document
272                     prq_doc = lib.document_for_revision(prq.source_revision)
273                     doc = prq_doc.take(user)
274                 else:
275                     return response.EntityNotFound().django_response({
276                         'reason': 'document-not-found',
277                         'message': e.message,
278                         'docid': docid,
279                         'user': user,
280                     })
281             except RevisionNotFound, e:
282                 return response.EntityNotFound().django_response({
283                     'reason': 'document-not-found',
284                     'message': e.message,
285                     'docid': docid,
286                     'user': user
287                 })
288
289         return {
290             'name': doc.id,
291             'user': user,
292             'html_url': reverse('dochtml_view', args=[doc.id]),
293             'text_url': reverse('doctext_view', args=[doc.id]),
294             # 'dc_url': reverse('docdc_view', args=[doc.id]),
295             'gallery_url': reverse('docgallery_view', args=[doc.id]),
296             'merge_url': reverse('docmerge_view', args=[doc.id]),
297             'revision': doc.revision,
298             'timestamp': doc.revision.timestamp,
299             # 'public_revision': doc.revision,
300             # 'public_timestamp': doc.revision.timestamp,
301         }   
302
303     
304 #    @hglibrary
305 #    def update(self, request, docid, lib):
306 #        """Update information about the document, like display not"""
307 #        return
308 #
309 #
310 #
311 class DocumentHTMLHandler(BaseHandler):
312     allowed_methods = ('GET')
313
314     @validate_form(forms.DocumentRetrieveForm, 'GET')
315     @hglibrary
316     def read(self, request, form, docid, lib, stylesheet='partial'):
317         """Read document as html text"""
318         try:
319             revision = form.cleaned_data['revision']
320             user = form.cleaned_data['user'] or request.user.username
321             document = lib.document_for_revision(revision)
322
323             if document.id != docid:
324                 return response.BadRequest().django_response({
325                     'reason': 'name-mismatch',
326                     'message': 'Provided revision is not valid for this document'
327                 })
328
329             if document.owner != user:
330                 return response.BadRequest().django_response({
331                     'reason': 'user-mismatch',
332                     'message': "Provided revision doesn't belong to user %s" % user
333                 })
334
335             for error in check_user(request, user):
336                 return error
337
338             return librarian.html.transform(document.data('xml'), is_file=False, \
339                 parse_dublincore=False, stylesheet='full',\
340                 options={
341                     "with-paths": 'boolean(1)',                    
342                 })
343                 
344         except (EntryNotFound, RevisionNotFound), e:
345             return response.EntityNotFound().django_response({
346                 'reason': 'not-found', 'message': e.message})
347         except librarian.ValidationError, e:
348             return response.InternalError().django_response({
349                 'reason': 'xml-non-valid', 'message': e.message or u''})
350         except librarian.ParseError, e:
351             return response.InternalError().django_response({
352                 'reason': 'xml-parse-error', 'message': e.message or u'' })
353
354 #
355 # Image Gallery
356 #
357
358 class DocumentGalleryHandler(BaseHandler):
359     allowed_methods = ('GET')
360     
361     
362     def read(self, request, docid):
363         """Read meta-data about scans for gallery of this document."""
364         galleries = []
365         from urllib import quote
366
367         for assoc in GalleryForDocument.objects.filter(document=docid):
368             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
369
370             if not os.path.isdir(dirpath):
371                 log.warn(u"[WARNING]: missing gallery %s", dirpath)
372                 continue
373
374             gallery = {'name': assoc.name, 'pages': []}
375             
376             for file in os.listdir(dirpath):
377                 if not isinstance(file, unicode):
378                     try:
379                         file = file.decode('utf-8')
380                     except:
381                         log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
382                             % (file, dirpath) )
383                         file = None
384
385                 if file is not None:
386                     name, ext = os.path.splitext(os.path.basename(file))
387
388                     if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
389                         log.warn(u"Ignoring: %s %s", name, ext)
390                         url = None
391
392                     url = settings.MEDIA_URL + assoc.subpath + u'/' + file
393                 
394                 if url is None:
395                     url = settings.MEDIA_URL + u'/missing.png'
396                     
397                 gallery['pages'].append( quote(url.encode('utf-8')) )
398
399             gallery['pages'].sort()
400             galleries.append(gallery)
401
402         return galleries
403
404
405
406 #
407 # Dublin Core handlers
408 #
409 # @requires librarian
410 #
411 #class DocumentDublinCoreHandler(BaseHandler):
412 #    allowed_methods = ('GET', 'POST')
413 #
414 #    @hglibrary
415 #    def read(self, request, docid, lib):
416 #        """Read document as raw text"""
417 #        try:
418 #            revision = request.GET.get('revision', 'latest')
419 #
420 #            if revision == 'latest':
421 #                doc = lib.document(docid)
422 #            else:
423 #                doc = lib.document_for_revision(revision)
424 #
425 #
426 #            if document.id != docid:
427 #                return response.BadRequest().django_response({'reason': 'name-mismatch',
428 #                    'message': 'Provided revision is not valid for this document'})
429 #
430 #            bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
431 #            return bookinfo.serialize()
432 #        except (EntryNotFound, RevisionNotFound), e:
433 #            return response.EntityNotFound().django_response({
434 #                'exception': type(e), 'message': e.message})
435 #
436 #    @hglibrary
437 #    def create(self, request, docid, lib):
438 #        try:
439 #            bi_json = request.POST['contents']
440 #            revision = request.POST['revision']
441 #
442 #            if request.POST.has_key('message'):
443 #                msg = u"$USER$ " + request.PUT['message']
444 #            else:
445 #                msg = u"$AUTO$ Dublin core update."
446 #
447 #            current = lib.document(docid, request.user.username)
448 #            orig = lib.document_for_revision(revision)
449 #
450 #            if current != orig:
451 #                return response.EntityConflict().django_response({
452 #                        "reason": "out-of-date",
453 #                        "provided": orig.revision,
454 #                        "latest": current.revision })
455 #
456 #            xmldoc = parser.WLDocument.from_string(current.data('xml'))
457 #            document.book_info = dcparser.BookInfo.from_json(bi_json)
458 #
459 #            # zapisz
460 #            ndoc = current.quickwrite('xml', \
461 #                document.serialize().encode('utf-8'),\
462 #                message=msg, user=request.user.username)
463 #
464 #            try:
465 #                # return the new revision number
466 #                return {
467 #                    "document": ndoc.id,
468 #                    "subview": "dc",
469 #                    "previous_revision": current.revision,
470 #                    "revision": ndoc.revision,
471 #                    'timestamp': ndoc.revision.timestamp,
472 #                    "url": reverse("docdc_view", args=[ndoc.id])
473 #                }
474 #            except Exception, e:
475 #                if ndoc: lib._rollback()
476 #                raise e
477 #        except RevisionNotFound:
478 #            return response.EntityNotFound().django_response()
479
480 class MergeHandler(BaseHandler):
481     allowed_methods = ('POST',)
482
483     @validate_form(forms.MergeRequestForm, 'POST')
484     @hglibrary
485     def create(self, request, form, docid, lib):
486         """Create a new document revision from the information provided by user"""
487         try:
488             revision = form.cleaned_data['revision']
489
490             # fetch the main branch document
491             doc = lib.document(docid)
492
493             # fetch the base document
494             user_doc = lib.document_for_revision(revision)
495             base_doc = user_doc.latest()
496
497             if base_doc != user_doc:
498                 return response.EntityConflict().django_response({
499                     "reason": "out-of-date",
500                     "provided": str(user_doc.revision),
501                     "latest": str(base_doc.revision)
502                 })
503
504             if form.cleaned_data['type'] == 'update':
505                 # update is always performed from the file branch
506                 # to the user branch
507                 user_doc_new = base_doc.update(request.user.username)
508
509                 if user_doc_new == user_doc:
510                     return response.SuccessAllOk().django_response({
511                         "result": "no-op"
512                     })
513
514                 # shared document is the same
515                 doc_new = doc
516
517             if form.cleaned_data['type'] == 'share':
518                 if not base_doc.up_to_date():
519                     return response.BadRequest().django_response({
520                         "reason": "not-fast-forward",
521                         "message": "You must first update your branch to the latest version."
522                     })
523
524                 anwser, info = base_doc.would_share()
525
526                 if not anwser:
527                     return response.SuccessAllOk().django_response({
528                         "result": "no-op", "message": info
529                     })
530
531                 # check for unresolved conflicts
532                 if base_doc.has_conflict_marks():
533                     return response.BadRequest().django_response({
534                         "reason": "unresolved-conflicts",
535                         "message": "There are unresolved conflicts in your file. Fix them, and try again."
536                     })
537
538                 if not request.user.has_perm('api.share_document'):
539                     # User is not permitted to make a merge, right away
540                     # So we instead create a pull request in the database
541                     try:
542                         prq, created = PullRequest.objects.get_or_create(
543                             comitter = request.user,
544                             document = docid,
545                             status = "N",
546                             defaults = {
547                                 'source_revision': str(base_doc.revision),
548                                 'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
549                             }
550                         )
551
552                         # there can't be 2 pending request from same user
553                         # for the same document
554                         if not created:
555                             prq.source_revision = str(base_doc.revision)
556                             prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
557                             prq.save()
558
559                         return response.RequestAccepted().django_response(\
560                             ticket_status=prq.status, \
561                             ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
562                     except IntegrityError:
563                         return response.EntityConflict().django_response({
564                             'reason': 'request-already-exist'
565                         })
566
567                 changed = base_doc.share(form.cleaned_data['message'])
568
569                 # update shared version if needed
570                 if changed:
571                     doc_new = doc.latest()
572                 else:
573                     doc_new = doc
574
575                 # the user wersion is the same
576                 user_doc_new = base_doc
577
578             # The client can compare parent_revision to revision
579             # to see if he needs to update user's view
580             # Same goes for shared view
581
582             return response.SuccessAllOk().django_response({
583                 "result": "success",
584                 "name": user_doc_new.id,
585                 "user": user_doc_new.owner,
586
587                 "revision": user_doc_new.revision,
588                 'timestamp': user_doc_new.revision.timestamp,
589
590                 "parent_revision": user_doc.revision,
591                 "parent_timestamp": user_doc.revision.timestamp,
592
593                 "shared_revision": doc_new.revision,
594                 "shared_timestamp": doc_new.revision.timestamp,
595
596                 "shared_parent_revision": doc.revision,
597                 "shared_parent_timestamp": doc.revision.timestamp,
598             })
599         except wlrepo.OutdatedException, e:
600             return response.BadRequest().django_response({
601                         "reason": "not-fast-forward",
602                         "message": e.message
603                     })
604         except wlrepo.LibraryException, e:
605             return response.InternalError().django_response({
606                         "reason": "merge-error",
607                         "message": e.message
608                     })