Merge branch 'master' of stigma:platforma
[redakcja.git] / apps / api / handlers / library_handlers.py
index c5666dd..f113834 100644 (file)
@@ -1,4 +1,6 @@
 # -*- encoding: utf-8 -*-
 # -*- encoding: utf-8 -*-
+import os.path
+import logging
 
 __author__= "Łukasz Rekucki"
 __date__ = "$2009-09-25 15:49:50$"
 
 __author__= "Łukasz Rekucki"
 __date__ = "$2009-09-25 15:49:50$"
@@ -16,15 +18,22 @@ import librarian
 import librarian.html
 from librarian import dcparser
 
 import librarian.html
 from librarian import dcparser
 
-from wlrepo import RevisionNotFound, LibraryException, DocumentAlreadyExists
-from explorer.models import PullRequest
+from wlrepo import *
+from explorer.models import PullRequest, GalleryForDocument
 
 # internal imports
 import api.forms as forms
 import api.response as response
 
 # internal imports
 import api.forms as forms
 import api.response as response
-from api.utils import validate_form, hglibrary
+from api.utils import validate_form, hglibrary, natural_order
 from api.models import PartCache
 
 from api.models import PartCache
 
+#
+import settings
+
+
+log = logging.getLogger('platforma.api')
+
+
 #
 # Document List Handlers
 #
 #
 # Document List Handlers
 #
@@ -51,34 +60,36 @@ class LibraryHandler(BaseHandler):
 
         documents = {}
         
 
         documents = {}
         
-        for docid in lib.documents():
+        for docid in lib.documents():            
             documents[docid] = {
                 'url': reverse('document_view', args=[docid]),
                 'name': docid,
                 'parts': []
             }
 
             documents[docid] = {
                 'url': reverse('document_view', args=[docid]),
                 'name': docid,
                 'parts': []
             }
 
-        related = PartCache.objects.defer('part_id')\
+        parts = PartCache.objects.defer('part_id')\
             .values_list('part_id', 'document_id').distinct()
             .values_list('part_id', 'document_id').distinct()
+       
+        document_tree = dict(documents)
 
 
-        for part, docid in related:
+        for part, docid in parts:
             # this way, we won't display broken links
             if not documents.has_key(part):
             # this way, we won't display broken links
             if not documents.has_key(part):
+                log.info("NOT FOUND: %s", part)
                 continue
 
                 continue
 
-            child = documents[part]
             parent = documents[docid]
             parent = documents[docid]
+            child = documents[part]
+
+            # not top-level anymore
+            document_tree.pop(part)
+            parent['parts'].append(child)
+        
+        for doc in documents.itervalues():
+            doc['parts'].sort(key=natural_order(lambda d: d['name']))
             
             
-            if isinstance(parent, dict): # the parent is top-level
-                documents.pop(part)                
-                parent['parts'].append(child)
-                documents[part] = child['parts']
-            else: # not top-level
-                parent.append(child)
-            
-        return {
-            'documents': [d for d in documents.itervalues() if isinstance(d, dict)]
-        }
+        return {'documents': sorted(document_tree.itervalues(),
+            key=natural_order(lambda d: d['name']) ) }
 
     @validate_form(forms.DocumentUploadForm, 'POST')
     @hglibrary
 
     @validate_form(forms.DocumentUploadForm, 'POST')
     @hglibrary
@@ -90,6 +101,9 @@ class LibraryHandler(BaseHandler):
         else:            
             data = request.FILES['ocr_file'].read().decode('utf-8')
 
         else:            
             data = request.FILES['ocr_file'].read().decode('utf-8')
 
+        if data is None:
+            return response.BadRequest().django_response('You must pass ocr_data or ocr_file.')
+
         if form.cleaned_data['generate_dc']:
             data = librarian.wrap_text(data, unicode(date.today()))
 
         if form.cleaned_data['generate_dc']:
             data = librarian.wrap_text(data, unicode(date.today()))
 
@@ -98,7 +112,7 @@ class LibraryHandler(BaseHandler):
         try:
             lock = lib.lock()            
             try:
         try:
             lock = lib.lock()            
             try:
-                print "DOCID", docid                
+                log.info("DOCID %s", docid)
                 doc = lib.document_create(docid)
                 # document created, but no content yet
 
                 doc = lib.document_create(docid)
                 # document created, but no content yet
 
@@ -106,9 +120,10 @@ class LibraryHandler(BaseHandler):
                     doc = doc.quickwrite('xml', data.encode('utf-8'),
                         '$AUTO$ XML data uploaded.', user=request.user.username)
                 except Exception,e:
                     doc = doc.quickwrite('xml', data.encode('utf-8'),
                         '$AUTO$ XML data uploaded.', user=request.user.username)
                 except Exception,e:
+                    import traceback
                     # rollback branch creation
                     lib._rollback()
                     # rollback branch creation
                     lib._rollback()
-                    raise LibraryException("Exception occured:" + repr(e))
+                    raise LibraryException(traceback.format_exc())
 
                 url = reverse('document_view', args=[doc.id])
 
 
                 url = reverse('document_view', args=[doc.id])
 
@@ -121,8 +136,9 @@ class LibraryHandler(BaseHandler):
             finally:
                 lock.release()
         except LibraryException, e:
             finally:
                 lock.release()
         except LibraryException, e:
+            import traceback
             return response.InternalError().django_response(\
             return response.InternalError().django_response(\
-                {'exception': repr(e) })                
+                {'exception': traceback.format_exc()} )
         except DocumentAlreadyExists:
             # Document is already there
             return response.EntityConflict().django_response(\
         except DocumentAlreadyExists:
             # Document is already there
             return response.EntityConflict().django_response(\
@@ -143,9 +159,9 @@ class BasicDocumentHandler(AnonymousBaseHandler):
 
         result = {
             'name': doc.id,
 
         result = {
             'name': doc.id,
-            'html_url': reverse('dochtml_view', args=[doc.id,doc.revision]),
-            'text_url': reverse('doctext_view', args=[doc.id,doc.revision]),
-            'dc_url': reverse('docdc_view', args=[doc.id,doc.revision]),
+            'html_url': reverse('dochtml_view', args=[doc.id]),
+            'text_url': reverse('doctext_view', args=[doc.id]),
+            'dc_url': reverse('docdc_view', args=[doc.id]),
             'public_revision': doc.revision,
         }
 
             'public_revision': doc.revision,
         }
 
@@ -161,22 +177,29 @@ class DocumentHandler(BaseHandler):
     @hglibrary
     def read(self, request, docid, lib):
         """Read document's meta data"""       
     @hglibrary
     def read(self, request, docid, lib):
         """Read document's meta data"""       
+        log.info(u"Read %s (%s)" % (docid, type(docid)) )
         try:
             doc = lib.document(docid)
             udoc = doc.take(request.user.username)
         try:
             doc = lib.document(docid)
             udoc = doc.take(request.user.username)
-        except RevisionNotFound:
-            return request.EnityNotFound().django_response()
+        except RevisionNotFound, e:
+            return response.EntityNotFound().django_response({
+                'exception': type(e), 'message': e.message,
+                'docid': docid })
 
         # is_shared = udoc.ancestorof(doc)
         # is_uptodate = is_shared or shared.ancestorof(document)
 
         result = {
             'name': udoc.id,
 
         # is_shared = udoc.ancestorof(doc)
         # is_uptodate = is_shared or shared.ancestorof(document)
 
         result = {
             'name': udoc.id,
-            'html_url': reverse('dochtml_view', args=[udoc.id,udoc.revision]),
-            'text_url': reverse('doctext_view', args=[udoc.id,udoc.revision]),
-            'dc_url': reverse('docdc_view', args=[udoc.id,udoc.revision]),
+            'html_url': reverse('dochtml_view', args=[udoc.id]),
+            'text_url': reverse('doctext_view', args=[udoc.id]),
+            'dc_url': reverse('docdc_view', args=[udoc.id]),
+            'gallery_url': reverse('docgallery_view', args=[udoc.id]),
+            'merge_url': reverse('docmerge_view', args=[udoc.id]),
             'user_revision': udoc.revision,
             'user_revision': udoc.revision,
-            'public_revision': doc.revision,            
+            'user_timestamp': udoc.revision.timestamp,
+            'public_revision': doc.revision,
+            'public_timestamp': doc.revision.timestamp,
         }       
 
         return result
         }       
 
         return result
@@ -189,23 +212,70 @@ class DocumentHandler(BaseHandler):
 #
 #
 class DocumentHTMLHandler(BaseHandler):
 #
 #
 class DocumentHTMLHandler(BaseHandler):
-    allowed_methods = ('GET', 'PUT')
+    allowed_methods = ('GET')
 
     @hglibrary
 
     @hglibrary
-    def read(self, request, docid, revision, lib):
+    def read(self, request, docid, lib):
         """Read document as html text"""
         try:
         """Read document as html text"""
         try:
+            revision = request.GET.get('revision', 'latest')
+
             if revision == 'latest':
                 document = lib.document(docid)
             else:
                 document = lib.document_for_rev(revision)
 
             if revision == 'latest':
                 document = lib.document(docid)
             else:
                 document = lib.document_for_rev(revision)
 
-            return librarian.html.transform(document.data('xml'), is_file=False)
-        except RevisionNotFound:
-            return response.EntityNotFound().django_response()
+            if document.id != docid:
+                return response.BadRequest().django_response({'reason': 'name-mismatch',
+                    'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) })
+
+            return librarian.html.transform(document.data('xml'), is_file=False, parse_dublincore=False)
+        except (EntryNotFound, RevisionNotFound), e:
+            return response.EntityNotFound().django_response({
+                'exception': type(e), 'message': e.message})
+
+
+#
+# Image Gallery
+#
+
+class DocumentGalleryHandler(BaseHandler):
+    allowed_methods = ('GET')
+    
+    
+    def read(self, request, docid):
+        """Read meta-data about scans for gallery of this document."""
+        galleries = []
+        from urllib import quote
+
+        for assoc in GalleryForDocument.objects.filter(document=docid):
+            dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath)
+
+            if not os.path.isdir(dirpath):
+                log.warn(u"[WARNING]: missing gallery %s", dirpath)
+                continue
 
 
+            gallery = {'name': assoc.name, 'pages': []}
+            
+            for file in os.listdir(dirpath):
+                if not isinstance(file, unicode):
+                    log.warn(u"File %r is gallery %r is not unicode. Ommiting."\
+                        % (file, dirpath) )
+                    continue
+                               
+                name, ext = os.path.splitext(os.path.basename(file))
+
+                if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']:
+                    log.info(u"Ignoring: %s %s", name, ext)
+                    continue
 
 
+                url = settings.MEDIA_URL + assoc.subpath + u'/' + file;
+                gallery['pages'].append( quote(url.encode('utf-8')) )
 
 
+            gallery['pages'].sort()
+            galleries.append(gallery)
+
+        return galleries                      
 
 #
 # Document Text View
 
 #
 # Document Text View
@@ -214,30 +284,38 @@ class DocumentHTMLHandler(BaseHandler):
 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
 #
 #
 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
 #
 #
+#
 class DocumentTextHandler(BaseHandler):
 class DocumentTextHandler(BaseHandler):
-    allowed_methods = ('GET', 'PUT')
+    allowed_methods = ('GET', 'POST')
 
     @hglibrary
 
     @hglibrary
-    def read(self, request, docid, revision, lib):
-        """Read document as raw text"""               
+    def read(self, request, docid, lib):
+        """Read document as raw text"""
+        revision = request.GET.get('revision', 'latest')
         try:
             if revision == 'latest':
                 document = lib.document(docid)
             else:
                 document = lib.document_for_rev(revision)
         try:
             if revision == 'latest':
                 document = lib.document(docid)
             else:
                 document = lib.document_for_rev(revision)
+
+            if document.id != docid:
+                return response.BadRequest().django_response({'reason': 'name-mismatch',
+                    'message': 'Provided revision is not valid for this document'})
             
             # TODO: some finer-grained access control
             return document.data('xml')
             
             # TODO: some finer-grained access control
             return document.data('xml')
-        except RevisionNotFound:
-            return response.EntityNotFound().django_response()
+        except (EntryNotFound, RevisionNotFound), e:
+            return response.EntityNotFound().django_response({
+                'exception': type(e), 'message': e.message})
 
     @hglibrary
 
     @hglibrary
-    def update(self, request, docid, revision, lib):
+    def create(self, request, docid, lib):
         try:
         try:
-            data = request.PUT['contents']            
+            data = request.POST['contents']
+            revision = request.POST['revision']
 
 
-            if request.PUT.has_key('message'):
-                msg = u"$USER$ " + request.PUT['message']
+            if request.POST.has_key('message'):
+                msg = u"$USER$ " + request.POST['message']
             else:
                 msg = u"$AUTO$ XML content update."
 
             else:
                 msg = u"$AUTO$ XML content update."
 
@@ -254,6 +332,8 @@ class DocumentTextHandler(BaseHandler):
             includes = [m.groupdict()['link'] for m in (re.finditer(\
                 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
 
             includes = [m.groupdict()['link'] for m in (re.finditer(\
                 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
 
+            log.info("INCLUDES: %s", includes)
+
             # TODO: provide useful routines to make this simpler
             def xml_update_action(lib, resolve):
                 try:
             # TODO: provide useful routines to make this simpler
             def xml_update_action(lib, resolve):
                 try:
@@ -276,25 +356,30 @@ class DocumentTextHandler(BaseHandler):
 
                 # now that the parts are ok, write xml
                 f = lib._fileopen(resolve('xml'), 'w+')
 
                 # now that the parts are ok, write xml
                 f = lib._fileopen(resolve('xml'), 'w+')
-                f.write(data)
+                f.write(data.encode('utf-8'))
                 f.close()
                 f.close()
-                
+
+            ndoc = None
             ndoc = current.invoke_and_commit(\
                 xml_update_action, lambda d: (msg, current.owner) )
 
             try:
                 # return the new revision number
             ndoc = current.invoke_and_commit(\
                 xml_update_action, lambda d: (msg, current.owner) )
 
             try:
                 # return the new revision number
-                return {
+                return response.SuccessAllOk().django_response({
                     "document": ndoc.id,
                     "subview": "xml",
                     "previous_revision": current.revision,
                     "document": ndoc.id,
                     "subview": "xml",
                     "previous_revision": current.revision,
-                    "updated_revision": ndoc.revision
-                }
+                    "revision": ndoc.revision,
+                    'timestamp': ndoc.revision.timestamp,
+                    "url": reverse("doctext_view", args=[ndoc.id])
+                })
             except Exception, e:
             except Exception, e:
-                lib._rollback()
+                if ndoc: lib._rollback()
                 raise e        
         except RevisionNotFound, e:
                 raise e        
         except RevisionNotFound, e:
-            return response.EntityNotFound().django_response(e)
+            return response.EntityNotFound(mimetype="text/plain").\
+                django_response(e.message)
+
 
 #
 # Dublin Core handlers
 
 #
 # Dublin Core handlers
@@ -302,27 +387,37 @@ class DocumentTextHandler(BaseHandler):
 # @requires librarian
 #
 class DocumentDublinCoreHandler(BaseHandler):
 # @requires librarian
 #
 class DocumentDublinCoreHandler(BaseHandler):
-    allowed_methods = ('GET', 'PUT')
+    allowed_methods = ('GET', 'POST')
 
     @hglibrary
 
     @hglibrary
-    def read(self, request, docid, revision, lib):
+    def read(self, request, docid, lib):
         """Read document as raw text"""        
         try:
         """Read document as raw text"""        
         try:
+            revision = request.GET.get('revision', 'latest')
+
             if revision == 'latest':
                 doc = lib.document(docid)
             else:
                 doc = lib.document_for_rev(revision)
             if revision == 'latest':
                 doc = lib.document(docid)
             else:
                 doc = lib.document_for_rev(revision)
+
+
+            if document.id != docid:
+                return response.BadRequest().django_response({'reason': 'name-mismatch',
+                    'message': 'Provided revision is not valid for this document'})
             
             bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
             return bookinfo.serialize()
             
             bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
             return bookinfo.serialize()
-        except RevisionNotFound:
-            return response.EntityNotFound().django_response()
+        except (EntryNotFound, RevisionNotFound), e:
+            return response.EntityNotFound().django_response({
+                'exception': type(e), 'message': e.message})
 
     @hglibrary
 
     @hglibrary
-    def update(self, request, docid, revision, lib):
+    def create(self, request, docid, lib):
         try:
         try:
-            bi_json = request.PUT['contents']            
-            if request.PUT.has_key('message'):
+            bi_json = request.POST['contents']
+            revision = request.POST['revision']
+            
+            if request.POST.has_key('message'):
                 msg = u"$USER$ " + request.PUT['message']
             else:
                 msg = u"$AUTO$ Dublin core update."
                 msg = u"$USER$ " + request.PUT['message']
             else:
                 msg = u"$AUTO$ Dublin core update."
@@ -350,16 +445,16 @@ class DocumentDublinCoreHandler(BaseHandler):
                     "document": ndoc.id,
                     "subview": "dc",
                     "previous_revision": current.revision,
                     "document": ndoc.id,
                     "subview": "dc",
                     "previous_revision": current.revision,
-                    "updated_revision": ndoc.revision
+                    "revision": ndoc.revision,
+                    'timestamp': ndoc.revision.timestamp,
+                    "url": reverse("docdc_view", args=[ndoc.id])
                 }
             except Exception, e:
                 }
             except Exception, e:
-                lib._rollback()
+                if ndoc: lib._rollback()
                 raise e
         except RevisionNotFound:
             return response.EntityNotFound().django_response()
 
                 raise e
         except RevisionNotFound:
             return response.EntityNotFound().django_response()
 
-
-
 class MergeHandler(BaseHandler):
     allowed_methods = ('POST',)
 
 class MergeHandler(BaseHandler):
     allowed_methods = ('POST',)
 
@@ -409,7 +504,7 @@ class MergeHandler(BaseHandler):
                 document=docid,
                 source_revision = str(udoc.revision),
                 status="N",
                 document=docid,
                 source_revision = str(udoc.revision),
                 status="N",
-                comment = form.cleaned_data['comment'] or '$AUTO$ Document shared.'
+                comment = form.cleaned_data['message'] or '$AUTO$ Document shared.'
             )
 
             prq.save()
             )
 
             prq.save()
@@ -423,19 +518,22 @@ class MergeHandler(BaseHandler):
             success, changed = udoc.update(request.user.username)
 
         if form.cleaned_data['type'] == 'share':
             success, changed = udoc.update(request.user.username)
 
         if form.cleaned_data['type'] == 'share':
-            success, changed = udoc.share(form.cleaned_data['comment'])
+            success, changed = udoc.share(form.cleaned_data['message'])
 
         if not success:
 
         if not success:
-            return response.EntityConflict().django_response()
+            return response.EntityConflict().django_response({
+                'reason': 'merge-failure',
+            })
 
         if not changed:
             return response.SuccessNoContent().django_response()
 
 
         if not changed:
             return response.SuccessNoContent().django_response()
 
-        new_udoc = udoc.latest()
+        nudoc = udoc.latest()
 
         return response.SuccessAllOk().django_response({
 
         return response.SuccessAllOk().django_response({
-            "name": udoc.id,
+            "name": nudoc.id,
             "parent_user_resivion": udoc.revision,
             "parent_revision": doc.revision,
             "parent_user_resivion": udoc.revision,
             "parent_revision": doc.revision,
-            "revision": udoc.revision,
+            "revision": nudoc.revision,
+            'timestamp': nudoc.revision.timestamp,
         })
         })