Konwersja HTML do XML przed zapisaniem XML na serwer.
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2 import os.path
3
4 import logging
5 log = logging.getLogger('platforma.api.library')
6
7 from piston.handler import BaseHandler, AnonymousBaseHandler
8 from piston.utils import rc
9
10 from datetime import date
11
12 from django.core.urlresolvers import reverse
13 from django.db import IntegrityError
14
15 import librarian
16 import librarian.html
17 import difflib
18 import wlrepo 
19
20 from explorer.models import GalleryForDocument
21
22 # internal imports
23 import api.forms as forms
24 import api.response as response
25 from api.utils import validate_form, hglibrary, natural_order
26 from api.models import PartCache, PullRequest
27
28 from pygments import highlight
29 from pygments.lexers import DiffLexer
30 from pygments.formatters import HtmlFormatter
31
32 #
33 import settings
34
35
36 def is_prq(username):
37     return username.startswith('$prq-')
38
39 def prq_for_user(username):
40     try:
41         return PullRequest.objects.get(id=int(username[5:]))
42     except:
43         return None
44
45 def check_user(request, user):
46     log.info("user: %r, perm: %r" % (request.user, request.user.get_all_permissions()) )
47     #pull request
48     if is_prq(user):
49         if not request.user.has_perm('api.view_prq'):
50             yield response.AccessDenied().django_response({
51                 'reason': 'access-denied',
52                 'message': "You don't have enough priviliges to view pull requests."
53             })
54     # other users
55     elif request.user.username != user:
56         if not request.user.has_perm('api.view_other_document'):
57             yield response.AccessDenied().django_response({
58                 'reason': 'access-denied',
59                 'message': "You don't have enough priviliges to view other people's document."
60             })
61     pass
62
63 #
64 # Document List Handlers
65 #
66 # TODO: security check
67 class BasicLibraryHandler(AnonymousBaseHandler):
68     allowed_methods = ('GET',)
69
70     @hglibrary
71     def read(self, request, lib):
72         """Return the list of documents."""       
73         document_list = [{
74             'url': reverse('document_view', args=[docid]),
75             'name': docid } for docid in lib.documents() ]
76         return {'documents' : document_list}
77         
78 #
79 # This handler controlls the document collection
80 #
81 class LibraryHandler(BaseHandler):
82     allowed_methods = ('GET', 'POST')
83     anonymous = BasicLibraryHandler
84
85     @hglibrary
86     def read(self, request, lib):
87         """Return the list of documents."""
88
89         documents = {}
90         
91         for docid in lib.documents():            
92             documents[docid] = {
93                 'url': reverse('document_view', args=[docid]),
94                 'name': docid,
95                 'parts': []
96             }
97
98         parts = PartCache.objects.defer('part_id')\
99             .values_list('part_id', 'document_id').distinct()
100        
101         document_tree = dict(documents)
102
103         for part, docid in parts:
104             # this way, we won't display broken links
105             if not documents.has_key(part):
106                 log.info("NOT FOUND: %s", part)
107                 continue
108
109             parent = documents[docid]
110             child = documents[part]
111
112             # not top-level anymore
113             document_tree.pop(part)
114             parent['parts'].append(child)
115         
116         for doc in documents.itervalues():
117             doc['parts'].sort(key=natural_order(lambda d: d['name']))
118             
119         return {'documents': sorted(document_tree.itervalues(),
120             key=natural_order(lambda d: d['name']) ) }
121
122
123     @validate_form(forms.DocumentUploadForm, 'POST')
124     @hglibrary
125     def create(self, request, form, lib):
126         """Create a new document."""       
127
128         if form.cleaned_data['ocr_data']:
129             data = form.cleaned_data['ocr_data']
130         else:            
131             data = request.FILES['ocr_file'].read().decode('utf-8')
132
133         if data is None:
134             return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
135
136         if form.cleaned_data['generate_dc']:
137             data = librarian.wrap_text(data, unicode(date.today()))
138
139         docid = form.cleaned_data['bookname']
140
141         try:
142             lock = lib.lock()            
143             try:
144                 log.info("DOCID %s", docid)
145                 doc = lib.document_create(docid)
146                 # document created, but no content yet
147                 try:
148                     doc = doc.quickwrite('xml', data.encode('utf-8'),
149                         '$AUTO$ XML data uploaded.', user=request.user.username)
150                 except Exception,e:
151                     import traceback
152                     # rollback branch creation
153                     lib._rollback()
154                     raise wlrepo.LibraryException(traceback.format_exc())
155
156                 url = reverse('document_view', args=[doc.id])
157
158                 return response.EntityCreated().django_response(\
159                     body = {
160                         'url': url,
161                         'name': doc.id,
162                         'revision': doc.revision },
163                     url = url )            
164             finally:
165                 lock.release()
166         except wlrepo.LibraryException, e:
167             import traceback
168             return response.InternalError().django_response({
169                 "reason": traceback.format_exc()
170             })
171         except wlrepo.DocumentAlreadyExists:
172             # Document is already there
173             return response.EntityConflict().django_response({
174                 "reason": "already-exists",
175                 "message": "Document already exists." % docid
176             })
177
178 #
179 # Document Handlers
180 #
181
182 class DiffHandler(BaseHandler):
183     allowed_methods = ('GET',)
184     
185     @hglibrary
186     def read(self, request, docid, lib):
187         '''Return diff between source_revision and target_revision)'''        
188         revision = request.GET.get('revision')
189         if not revision:
190             return ''
191         source_document = lib.document(docid)
192         target_document = lib.document_for_revision(revision)
193         print source_document, target_document
194         
195         diff = difflib.unified_diff(
196             source_document.data('xml').splitlines(True),
197             target_document.data('xml').splitlines(True),
198             'source',
199             'target')
200         
201         s =  ''.join(list(diff))
202         return highlight(s, DiffLexer(), HtmlFormatter(cssclass="pastie"))
203
204
205 #
206 # Document Meta Data
207 #
208 class DocumentHandler(BaseHandler):
209     allowed_methods = ('GET', 'PUT')
210
211     @validate_form(forms.DocumentRetrieveForm, 'GET')
212     @hglibrary
213     def read(self, request, form, docid, lib):
214         """Read document's meta data"""       
215         log.info(u"User '%s' wants to edit %s(%s) as %s" % \
216             (request.user.username, docid, form.cleaned_data['revision'], form.cleaned_data['user']) )
217
218         user = form.cleaned_data['user'] or request.user.username
219         rev = form.cleaned_data['revision'] or 'latest'
220
221         for error in check_user(request, user):
222             return error
223             
224         try:
225             doc = lib.document(docid, user, rev=rev)
226         except wlrepo.RevisionMismatch, e:
227             # the document exists, but the revision is bad
228             return response.EntityNotFound().django_response({
229                 'reason': 'revision-mismatch',
230                 'message': e.message,
231                 'docid': docid,
232                 'user': user,
233             })
234         except wlrepo.RevisionNotFound, e:
235             # the user doesn't have this document checked out
236             # or some other weird error occured
237             # try to do the checkout
238             try:
239                 if user == request.user.username:
240                     mdoc = lib.document(docid)
241                     doc = mdoc.take(user)
242                 elif is_prq(user):
243                     prq = prq_for_user(user)
244                     # commiter's document
245                     prq_doc = lib.document_for_revision(prq.source_revision)
246                     doc = prq_doc.take(user)
247                 else:
248                     return response.EntityNotFound().django_response({
249                         'reason': 'document-not-found',
250                         'message': e.message,
251                         'docid': docid,
252                         'user': user,
253                     })
254             except wlrepo.RevisionNotFound, e:
255                 return response.EntityNotFound().django_response({
256                     'reason': 'document-not-found',
257                     'message': e.message,
258                     'docid': docid,
259                     'user': user
260                 })
261
262         return {
263             'name': doc.id,
264             'user': user,
265             'html_url': reverse('dochtml_view', args=[doc.id]),
266             'text_url': reverse('doctext_view', args=[doc.id]),
267             # 'dc_url': reverse('docdc_view', args=[doc.id]),
268             'gallery_url': reverse('docgallery_view', args=[doc.id]),
269             'merge_url': reverse('docmerge_view', args=[doc.id]),
270             'revision': doc.revision,
271             'timestamp': doc.revision.timestamp,
272             # 'public_revision': doc.revision,
273             # 'public_timestamp': doc.revision.timestamp,
274         }   
275
276     
277 #    @hglibrary
278 #    def update(self, request, docid, lib):
279 #        """Update information about the document, like display not"""
280 #        return
281 #
282 #
283 #
284 class DocumentHTMLHandler(BaseHandler):
285     allowed_methods = ('GET')
286
287     @validate_form(forms.DocumentRetrieveForm, 'GET')
288     @hglibrary
289     def read(self, request, form, docid, lib, stylesheet='full'):
290         """Read document as html text"""
291         try:
292             revision = form.cleaned_data['revision']
293             user = form.cleaned_data['user'] or request.user.username
294             document = lib.document_for_revision(revision)
295
296             if document.id != docid:
297                 return response.BadRequest().django_response({
298                     'reason': 'name-mismatch',
299                     'message': 'Provided revision is not valid for this document'
300                 })
301
302             if document.owner != user:
303                 return response.BadRequest().django_response({
304                     'reason': 'user-mismatch',
305                     'message': "Provided revision doesn't belong to user %s" % user
306                 })
307
308             for error in check_user(request, user):
309                 return error
310
311             return librarian.html.transform(document.data('xml'), is_file=False, \
312                 parse_dublincore=False, stylesheet=stylesheet,\
313                 options={
314                     "with-paths": 'boolean(1)',                    
315                 })
316                 
317         except (wlrepo.EntryNotFound, wlrepo.RevisionNotFound), e:
318             return response.EntityNotFound().django_response({
319                 'reason': 'not-found', 'message': e.message})
320         except librarian.ValidationError, e:
321             return response.InternalError().django_response({
322                 'reason': 'xml-non-valid', 'message': e.message or u''})
323         except librarian.ParseError, e:
324             return response.InternalError().django_response({
325                 'reason': 'xml-parse-error', 'message': e.message or u'' })
326
327 #
328 # Image Gallery
329 #
330
331 class DocumentGalleryHandler(BaseHandler):
332     allowed_methods = ('GET', 'POST')
333     
334     
335     def read(self, request, docid):
336         """Read meta-data about scans for gallery of this document."""
337         galleries = []
338         from urllib import quote
339
340         for assoc in GalleryForDocument.objects.filter(document=docid):
341             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
342
343             if not os.path.isdir(dirpath):
344                 log.warn(u"[WARNING]: missing gallery %s", dirpath)
345                 continue
346
347             gallery = {'name': assoc.name, 'pages': []}
348             
349             for file in os.listdir(dirpath):
350                 if not isinstance(file, unicode):
351                     try:
352                         file = file.decode('utf-8')
353                     except:
354                         log.warn(u"File %r in gallery %r is not unicode. Ommiting."\
355                             % (file, dirpath) )
356                         file = None
357
358                 if file is not None:
359                     name, ext = os.path.splitext(os.path.basename(file))
360
361                     if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
362                         log.warn(u"Ignoring: %s %s", name, ext)
363                         url = None
364
365                     url = settings.MEDIA_URL + assoc.subpath + u'/' + file
366                 
367                 if url is None:
368                     url = settings.MEDIA_URL + u'/missing.png'
369                     
370                 gallery['pages'].append( quote(url.encode('utf-8')) )
371
372             gallery['pages'].sort()
373             galleries.append(gallery)
374
375         return galleries
376
377     def create(self, request, docid):
378         if not request.user.is_superuser:
379             return rc.FORBIDDEN
380         
381         new_path = request.POST.get('path')
382         
383         if new_path:
384             gallery, created = GalleryForDocument.objects.get_or_create(
385                 document = docid,
386                 defaults = {
387                     'subpath': new_path,
388                 }
389             )
390
391             if not created:
392                 gallery.subpath = new_path
393                 gallery.save()
394
395             return rc.CREATED
396         
397         return rc.BAD_REQUEST
398
399 #
400 # Dublin Core handlers
401 #
402 # @requires librarian
403 #
404 #class DocumentDublinCoreHandler(BaseHandler):
405 #    allowed_methods = ('GET', 'POST')
406 #
407 #    @hglibrary
408 #    def read(self, request, docid, lib):
409 #        """Read document as raw text"""
410 #        try:
411 #            revision = request.GET.get('revision', 'latest')
412 #
413 #            if revision == 'latest':
414 #                doc = lib.document(docid)
415 #            else:
416 #                doc = lib.document_for_revision(revision)
417 #
418 #
419 #            if document.id != docid:
420 #                return response.BadRequest().django_response({'reason': 'name-mismatch',
421 #                    'message': 'Provided revision is not valid for this document'})
422 #
423 #            bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
424 #            return bookinfo.serialize()
425 #        except (EntryNotFound, RevisionNotFound), e:
426 #            return response.EntityNotFound().django_response({
427 #                'exception': type(e), 'message': e.message})
428 #
429 #    @hglibrary
430 #    def create(self, request, docid, lib):
431 #        try:
432 #            bi_json = request.POST['contents']
433 #            revision = request.POST['revision']
434 #
435 #            if request.POST.has_key('message'):
436 #                msg = u"$USER$ " + request.PUT['message']
437 #            else:
438 #                msg = u"$AUTO$ Dublin core update."
439 #
440 #            current = lib.document(docid, request.user.username)
441 #            orig = lib.document_for_revision(revision)
442 #
443 #            if current != orig:
444 #                return response.EntityConflict().django_response({
445 #                        "reason": "out-of-date",
446 #                        "provided": orig.revision,
447 #                        "latest": current.revision })
448 #
449 #            xmldoc = parser.WLDocument.from_string(current.data('xml'))
450 #            document.book_info = dcparser.BookInfo.from_json(bi_json)
451 #
452 #            # zapisz
453 #            ndoc = current.quickwrite('xml', \
454 #                document.serialize().encode('utf-8'),\
455 #                message=msg, user=request.user.username)
456 #
457 #            try:
458 #                # return the new revision number
459 #                return {
460 #                    "document": ndoc.id,
461 #                    "subview": "dc",
462 #                    "previous_revision": current.revision,
463 #                    "revision": ndoc.revision,
464 #                    'timestamp': ndoc.revision.timestamp,
465 #                    "url": reverse("docdc_view", args=[ndoc.id])
466 #                }
467 #            except Exception, e:
468 #                if ndoc: lib._rollback()
469 #                raise e
470 #        except RevisionNotFound:
471 #            return response.EntityNotFound().django_response()
472
473 class MergeHandler(BaseHandler):
474     allowed_methods = ('POST',)
475
476     @validate_form(forms.MergeRequestForm, 'POST')
477     @hglibrary
478     def create(self, request, form, docid, lib):
479         """Create a new document revision from the information provided by user"""
480         try:
481             revision = form.cleaned_data['revision']
482
483             # fetch the main branch document
484             doc = lib.document(docid)
485
486             # fetch the base document
487             user_doc = lib.document_for_revision(revision)
488             base_doc = user_doc.latest()
489
490             if base_doc != user_doc:
491                 return response.EntityConflict().django_response({
492                     "reason": "out-of-date",
493                     "provided": str(user_doc.revision),
494                     "latest": str(base_doc.revision)
495                 })
496
497             if form.cleaned_data['type'] == 'update':
498                 # update is always performed from the file branch
499                 # to the user branch
500                 user_doc_new = base_doc.update(request.user.username)
501
502                 if user_doc_new == user_doc:
503                     return response.SuccessAllOk().django_response({
504                         "result": "no-op"
505                     })
506
507                 # shared document is the same
508                 doc_new = doc
509
510             if form.cleaned_data['type'] == 'share':
511                 if not base_doc.up_to_date():
512                     return response.BadRequest().django_response({
513                         "reason": "not-fast-forward",
514                         "message": "You must first update your branch to the latest version."
515                     })
516
517                 anwser, info = base_doc.would_share()
518
519                 if not anwser:
520                     return response.SuccessAllOk().django_response({
521                         "result": "no-op", "message": info
522                     })
523
524                 # check for unresolved conflicts
525                 if base_doc.has_conflict_marks():
526                     return response.BadRequest().django_response({
527                         "reason": "unresolved-conflicts",
528                         "message": "There are unresolved conflicts in your file. Fix them, and try again."
529                     })
530
531                 if not request.user.has_perm('api.share_document'):
532                     # User is not permitted to make a merge, right away
533                     # So we instead create a pull request in the database
534                     try:
535                         prq, created = PullRequest.objects.get_or_create(
536                             comitter = request.user,
537                             document = docid,
538                             status = "N",
539                             defaults = {
540                                 'source_revision': str(base_doc.revision),
541                                 'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
542                             }
543                         )
544
545                         # there can't be 2 pending request from same user
546                         # for the same document
547                         if not created:
548                             prq.source_revision = str(base_doc.revision)
549                             prq.comment = prq.comment + 'u\n\n' + (form.cleaned_data['message'] or u'')
550                             prq.save()
551
552                         return response.RequestAccepted().django_response(\
553                             ticket_status=prq.status, \
554                             ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
555                     except IntegrityError:
556                         return response.EntityConflict().django_response({
557                             'reason': 'request-already-exist'
558                         })
559
560                 changed = base_doc.share(form.cleaned_data['message'])
561
562                 # update shared version if needed
563                 if changed:
564                     doc_new = doc.latest()
565                 else:
566                     doc_new = doc
567
568                 # the user wersion is the same
569                 user_doc_new = base_doc
570
571             # The client can compare parent_revision to revision
572             # to see if he needs to update user's view
573             # Same goes for shared view
574
575             return response.SuccessAllOk().django_response({
576                 "result": "success",
577                 "name": user_doc_new.id,
578                 "user": user_doc_new.owner,
579
580                 "revision": user_doc_new.revision,
581                 'timestamp': user_doc_new.revision.timestamp,
582
583                 "parent_revision": user_doc.revision,
584                 "parent_timestamp": user_doc.revision.timestamp,
585
586                 "shared_revision": doc_new.revision,
587                 "shared_timestamp": doc_new.revision.timestamp,
588
589                 "shared_parent_revision": doc.revision,
590                 "shared_parent_timestamp": doc.revision.timestamp,
591             })
592         except wlrepo.OutdatedException, e:
593             return response.BadRequest().django_response({
594                         "reason": "not-fast-forward",
595                         "message": e.message
596                     })
597         except wlrepo.LibraryException, e:
598             return response.InternalError().django_response({
599                         "reason": "merge-error",
600                         "message": e.message
601                     })