Merge branch 'master' of stigma.nowoczesnapolska.org.pl:platforma
[redakcja.git] / apps / api / handlers / library_handlers.py
index 4d23478..488c2d4 100644 (file)
@@ -13,10 +13,11 @@ from datetime import date
 
 from django.core.urlresolvers import reverse
 from django.utils import simplejson as json
+from django.db import IntegrityError
 
 import librarian
 import librarian.html
-from librarian import dcparser
+from librarian import dcparser, parser
 
 from wlrepo import *
 from explorer.models import PullRequest, GalleryForDocument
@@ -37,6 +38,7 @@ log = logging.getLogger('platforma.api')
 #
 # Document List Handlers
 #
+# TODO: security check
 class BasicLibraryHandler(AnonymousBaseHandler):
     allowed_methods = ('GET',)
 
@@ -50,18 +52,21 @@ class BasicLibraryHandler(AnonymousBaseHandler):
         return {'documents' : document_list}
         
 
+#
+# This handler controlls the document collection
+#
 class LibraryHandler(BaseHandler):
     allowed_methods = ('GET', 'POST')
     anonymous = BasicLibraryHandler
 
+
     @hglibrary
     def read(self, request, lib):
         """Return the list of documents."""
 
         documents = {}
         
-        for docid in lib.documents():
-            docid = docid.decode('utf-8')
+        for docid in lib.documents():            
             documents[docid] = {
                 'url': reverse('document_view', args=[docid]),
                 'name': docid,
@@ -85,10 +90,7 @@ class LibraryHandler(BaseHandler):
             # not top-level anymore
             document_tree.pop(part)
             parent['parts'].append(child)
-
-        # sort the right way
         
-
         for doc in documents.itervalues():
             doc['parts'].sort(key=natural_order(lambda d: d['name']))
             
@@ -105,6 +107,9 @@ class LibraryHandler(BaseHandler):
         else:            
             data = request.FILES['ocr_file'].read().decode('utf-8')
 
+        if data is None:
+            return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
+
         if form.cleaned_data['generate_dc']:
             data = librarian.wrap_text(data, unicode(date.today()))
 
@@ -121,9 +126,10 @@ class LibraryHandler(BaseHandler):
                     doc = doc.quickwrite('xml', data.encode('utf-8'),
                         '$AUTO$ XML data uploaded.', user=request.user.username)
                 except Exception,e:
+                    import traceback
                     # rollback branch creation
                     lib._rollback()
-                    raise LibraryException("Exception occured:" + repr(e))
+                    raise LibraryException(traceback.format_exc())
 
                 url = reverse('document_view', args=[doc.id])
 
@@ -136,12 +142,16 @@ class LibraryHandler(BaseHandler):
             finally:
                 lock.release()
         except LibraryException, e:
-            return response.InternalError().django_response(\
-                {'exception': repr(e) })                
+            import traceback
+            return response.InternalError().django_response({
+                "reason": traceback.format_exc()
+            })
         except DocumentAlreadyExists:
             # Document is already there
-            return response.EntityConflict().django_response(\
-                {"reason": "Document %s already exists." % docid})
+            return response.EntityConflict().django_response({
+                "reason": "already-exists",
+                "message": "Document already exists." % docid
+            })
 
 #
 # Document Handlers
@@ -176,13 +186,14 @@ class DocumentHandler(BaseHandler):
     @hglibrary
     def read(self, request, docid, lib):
         """Read document's meta data"""       
-        log.info("Read %s", docid)
+        log.info(u"Read %s (%s)" % (docid, type(docid)) )
         try:
             doc = lib.document(docid)
             udoc = doc.take(request.user.username)
         except RevisionNotFound, e:
             return response.EntityNotFound().django_response({
-                'exception': type(e), 'message': e.message})
+                'exception': type(e), 'message': e.message,
+                'docid': docid })
 
         # is_shared = udoc.ancestorof(doc)
         # is_uptodate = is_shared or shared.ancestorof(document)
@@ -213,7 +224,7 @@ class DocumentHTMLHandler(BaseHandler):
     allowed_methods = ('GET')
 
     @hglibrary
-    def read(self, request, docid, lib):
+    def read(self, request, docid, lib, stylesheet='partial'):
         """Read document as html text"""
         try:
             revision = request.GET.get('revision', 'latest')
@@ -227,44 +238,57 @@ class DocumentHTMLHandler(BaseHandler):
                 return response.BadRequest().django_response({'reason': 'name-mismatch',
                     'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
 
-            return librarian.html.transform(document.data('xml'), is_file=False)
+            return librarian.html.transform(document.data('xml'), is_file=False, \
+                parse_dublincore=False, stylesheet=stylesheet,\
+                options={
+                    "with-paths": 'boolean(1)',                    
+                })
+                
         except (EntryNotFound, RevisionNotFound), e:
             return response.EntityNotFound().django_response({
-                'exception': type(e), 'message': e.message})
-
+                'reason': 'not-found', 'message': e.message})
+        except librarian.ParseError, e:
+            return response.InternalError().django_response({
+                'reason': 'xml-parse-error', 'message': e.message })
 
 #
 # Image Gallery
 #
-from django.core.files.storage import FileSystemStorage
 
 class DocumentGalleryHandler(BaseHandler):
     allowed_methods = ('GET')
     
+    
     def read(self, request, docid):
         """Read meta-data about scans for gallery of this document."""
         galleries = []
+        from urllib import quote
 
         for assoc in GalleryForDocument.objects.filter(document=docid):
             dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
 
             if not os.path.isdir(dirpath):
-                log.info(u"[WARNING]: missing gallery %s", dirpath)
+                log.warn(u"[WARNING]: missing gallery %s", dirpath)
                 continue
 
             gallery = {'name': assoc.name, 'pages': []}
             
-            for file in sorted(os.listdir(dirpath), key=natural_order()):
-                log.info(file)
+            for file in os.listdir(dirpath):
+                if not isinstance(file, unicode):
+                    log.warn(u"File %r is gallery %r is not unicode. Ommiting."\
+                        % (file, dirpath) )
+                    continue
+                               
                 name, ext = os.path.splitext(os.path.basename(file))
 
-                if ext.lower() not in ['.png', '.jpeg', '.jpg']:
-                    log.info("Ignoring: %s %s", name, ext)
+                if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
+                    log.info(u"Ignoring: %s %s", name, ext)
                     continue
 
-                url = settings.MEDIA_URL + assoc.subpath + u'/' + file.decode('utf-8');
-                gallery['pages'].append(url)
-                
+                url = settings.MEDIA_URL + assoc.subpath + u'/' + file;
+                gallery['pages'].append( quote(url.encode('utf-8')) )
+
+            gallery['pages'].sort()
             galleries.append(gallery)
 
         return galleries                      
@@ -277,6 +301,7 @@ XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1
 #
 #
 #
+
 class DocumentTextHandler(BaseHandler):
     allowed_methods = ('GET', 'POST')
 
@@ -284,6 +309,8 @@ class DocumentTextHandler(BaseHandler):
     def read(self, request, docid, lib):
         """Read document as raw text"""
         revision = request.GET.get('revision', 'latest')
+        part = request.GET.get('part', False)
+        
         try:
             if revision == 'latest':
                 document = lib.document(docid)
@@ -295,22 +322,36 @@ class DocumentTextHandler(BaseHandler):
                     'message': 'Provided revision is not valid for this document'})
             
             # TODO: some finer-grained access control
-            return document.data('xml')
+            if part is False:
+                # we're done :)
+                return document.data('xml')
+            else:
+                xdoc = parser.WLDocument.from_string(document.data('xml'),\
+                    parse_dublincore=False)
+                ptext = xdoc.part_as_text(part)
+
+                if ptext is None:
+                    return response.EntityNotFound().django_response({
+                      'reason': 'no-part-in-document'                     
+                    })
+
+                return ptext
+        except librarian.ParseError:
+            return response.EntityNotFound().django_response({
+                'reason': 'invalid-document-state',
+                'exception': type(e), 'message': e.message
+            })
         except (EntryNotFound, RevisionNotFound), e:
             return response.EntityNotFound().django_response({
-                'exception': type(e), 'message': e.message})
+                'reason': 'not-found',
+                'exception': type(e), 'message': e.message
+            })   
 
     @hglibrary
     def create(self, request, docid, lib):
         try:
-            data = request.POST['contents']
             revision = request.POST['revision']
 
-            if request.POST.has_key('message'):
-                msg = u"$USER$ " + request.POST['message']
-            else:
-                msg = u"$AUTO$ XML content update."
-
             current = lib.document(docid, request.user.username)
             orig = lib.document_for_rev(revision)
 
@@ -320,6 +361,33 @@ class DocumentTextHandler(BaseHandler):
                         "provided_revision": orig.revision,
                         "latest_revision": current.revision })
 
+            if request.POST.has_key('message'):
+                msg = u"$USER$ " + request.POST['message']
+            else:
+                msg = u"$AUTO$ XML content update."
+
+            if request.POST.has_key('contents'):
+                data = request.POST['contents']
+            else:
+                if not request.POST.has_key('chunks'):
+                    # bad request
+                    return response.BadRequest().django_response({'reason': 'invalid-arguments',
+                        'message': 'No contents nor chunks specified.'})
+
+                    # TODO: validate
+                parts = json.loads(request.POST['chunks'])                    
+                xdoc = parser.WLDocument.from_string(current.data('xml'))
+                   
+                errors = xdoc.merge_chunks(parts)
+
+                if len(errors):
+                    return response.EntityConflict().django_response({
+                            "reason": "invalid-chunks",
+                            "message": "Unable to merge following parts into the document: %s " % ",".join(errors)
+                    })
+
+                data = xdoc.serialize()
+
             # try to find any Xinclude tags
             includes = [m.groupdict()['link'] for m in (re.finditer(\
                 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
@@ -488,29 +556,35 @@ class MergeHandler(BaseHandler):
                     "provided": target_rev,
                     "latest": udoc.revision })
 
-        if not request.user.has_perm('explorer.book.can_share'):
-            # User is not permitted to make a merge, right away
-            # So we instead create a pull request in the database
-            prq = PullRequest(
-                comitter=request.user,
-                document=docid,
-                source_revision = str(udoc.revision),
-                status="N",
-                comment = form.cleaned_data['message'] or '$AUTO$ Document shared.'
-            )
-
-            prq.save()
-            return response.RequestAccepted().django_response(\
-                ticket_status=prq.status, \
-                ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
-
         if form.cleaned_data['type'] == 'update':
             # update is always performed from the file branch
             # to the user branch
             success, changed = udoc.update(request.user.username)
 
-        if form.cleaned_data['type'] == 'share':
-            success, changed = udoc.share(form.cleaned_data['message'])
+        if form.cleaned_data['type'] == 'share':        
+            if not request.user.has_perm('explorer.document.can_share'):
+                # User is not permitted to make a merge, right away
+                # So we instead create a pull request in the database
+                try:
+                    prq, created = PullRequest.objects.get_or_create(
+                        source_revision = str(udoc.revision),
+                        defaults = {
+                            'comitter': request.user,
+                            'document': docid,
+                            'status': "N",
+                            'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.',
+                        }
+                    )
+
+                    return response.RequestAccepted().django_response(\
+                        ticket_status=prq.status, \
+                        ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
+                except IntegrityError:
+                    return response.EntityConflict().django_response({
+                        'reason': 'request-already-exist'
+                    })
+            else:
+                success, changed = udoc.share(form.cleaned_data['message'])
 
         if not success:
             return response.EntityConflict().django_response({
@@ -520,12 +594,12 @@ class MergeHandler(BaseHandler):
         if not changed:
             return response.SuccessNoContent().django_response()
 
-        new_udoc = udoc.latest()
+        nudoc = udoc.latest()
 
         return response.SuccessAllOk().django_response({
-            "name": udoc.id,
+            "name": nudoc.id,
             "parent_user_resivion": udoc.revision,
             "parent_revision": doc.revision,
-            "revision": ndoc.revision,
-            'timestamp': ndoc.revision.timestamp,
+            "revision": nudoc.revision,
+            'timestamp': nudoc.revision.timestamp,
         })