X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/cde981363740cf6907d3a3133e4ea9945b724e04..60f59d5903e5f4289d7a4b224c3f05e161395815:/apps/api/handlers/library_handlers.py?ds=inline diff --git a/apps/api/handlers/library_handlers.py b/apps/api/handlers/library_handlers.py index c5666dd0..b7260674 100644 --- a/apps/api/handlers/library_handlers.py +++ b/apps/api/handlers/library_handlers.py @@ -1,4 +1,6 @@ # -*- encoding: utf-8 -*- +import os.path +import logging __author__= "Łukasz Rekucki" __date__ = "$2009-09-25 15:49:50$" @@ -16,15 +18,22 @@ import librarian import librarian.html from librarian import dcparser -from wlrepo import RevisionNotFound, LibraryException, DocumentAlreadyExists -from explorer.models import PullRequest +from wlrepo import * +from explorer.models import PullRequest, GalleryForDocument # internal imports import api.forms as forms import api.response as response -from api.utils import validate_form, hglibrary +from api.utils import validate_form, hglibrary, natural_order from api.models import PartCache +# +import settings + + +log = logging.getLogger('platforma.api') + + # # Document List Handlers # @@ -51,34 +60,36 @@ class LibraryHandler(BaseHandler): documents = {} - for docid in lib.documents(): + for docid in lib.documents(): documents[docid] = { 'url': reverse('document_view', args=[docid]), 'name': docid, 'parts': [] } - related = PartCache.objects.defer('part_id')\ + parts = PartCache.objects.defer('part_id')\ .values_list('part_id', 'document_id').distinct() + + document_tree = dict(documents) - for part, docid in related: + for part, docid in parts: # this way, we won't display broken links if not documents.has_key(part): + log.info("NOT FOUND: %s", part) continue - child = documents[part] parent = documents[docid] + child = documents[part] + + # not top-level anymore + document_tree.pop(part) + parent['parts'].append(child) + + for doc in documents.itervalues(): + doc['parts'].sort(key=natural_order(lambda d: d['name'])) - if isinstance(parent, dict): # the parent is top-level - documents.pop(part) - parent['parts'].append(child) - documents[part] = child['parts'] - else: # not top-level - parent.append(child) - - return { - 'documents': [d for d in documents.itervalues() if isinstance(d, dict)] - } + return {'documents': sorted(document_tree.itervalues(), + key=natural_order(lambda d: d['name']) ) } @validate_form(forms.DocumentUploadForm, 'POST') @hglibrary @@ -90,6 +101,9 @@ class LibraryHandler(BaseHandler): else: data = request.FILES['ocr_file'].read().decode('utf-8') + if data is None: + return response.BadRequest().django_response('You must pass ocr_data or ocr_file.') + if form.cleaned_data['generate_dc']: data = librarian.wrap_text(data, unicode(date.today())) @@ -98,7 +112,7 @@ class LibraryHandler(BaseHandler): try: lock = lib.lock() try: - print "DOCID", docid + log.info("DOCID %s", docid) doc = lib.document_create(docid) # document created, but no content yet @@ -106,9 +120,10 @@ class LibraryHandler(BaseHandler): doc = doc.quickwrite('xml', data.encode('utf-8'), '$AUTO$ XML data uploaded.', user=request.user.username) except Exception,e: + import traceback # rollback branch creation lib._rollback() - raise LibraryException("Exception occured:" + repr(e)) + raise LibraryException(traceback.format_exc()) url = reverse('document_view', args=[doc.id]) @@ -121,8 +136,9 @@ class LibraryHandler(BaseHandler): finally: lock.release() except LibraryException, e: + import traceback return response.InternalError().django_response(\ - {'exception': repr(e) }) + {'exception': traceback.format_exc()} ) except DocumentAlreadyExists: # Document is already there return response.EntityConflict().django_response(\ @@ -143,9 +159,9 @@ class BasicDocumentHandler(AnonymousBaseHandler): result = { 'name': doc.id, - 'html_url': reverse('dochtml_view', args=[doc.id,doc.revision]), - 'text_url': reverse('doctext_view', args=[doc.id,doc.revision]), - 'dc_url': reverse('docdc_view', args=[doc.id,doc.revision]), + 'html_url': reverse('dochtml_view', args=[doc.id]), + 'text_url': reverse('doctext_view', args=[doc.id]), + 'dc_url': reverse('docdc_view', args=[doc.id]), 'public_revision': doc.revision, } @@ -161,22 +177,29 @@ class DocumentHandler(BaseHandler): @hglibrary def read(self, request, docid, lib): """Read document's meta data""" + log.info(u"Read %s (%s)" % (docid, type(docid)) ) try: doc = lib.document(docid) udoc = doc.take(request.user.username) - except RevisionNotFound: - return request.EnityNotFound().django_response() + except RevisionNotFound, e: + return response.EntityNotFound().django_response({ + 'exception': type(e), 'message': e.message, + 'docid': docid }) # is_shared = udoc.ancestorof(doc) # is_uptodate = is_shared or shared.ancestorof(document) result = { 'name': udoc.id, - 'html_url': reverse('dochtml_view', args=[udoc.id,udoc.revision]), - 'text_url': reverse('doctext_view', args=[udoc.id,udoc.revision]), - 'dc_url': reverse('docdc_view', args=[udoc.id,udoc.revision]), + 'html_url': reverse('dochtml_view', args=[udoc.id]), + 'text_url': reverse('doctext_view', args=[udoc.id]), + 'dc_url': reverse('docdc_view', args=[udoc.id]), + 'gallery_url': reverse('docgallery_view', args=[udoc.id]), + 'merge_url': reverse('docmerge_view', args=[udoc.id]), 'user_revision': udoc.revision, - 'public_revision': doc.revision, + 'user_timestamp': udoc.revision.timestamp, + 'public_revision': doc.revision, + 'public_timestamp': doc.revision.timestamp, } return result @@ -189,23 +212,72 @@ class DocumentHandler(BaseHandler): # # class DocumentHTMLHandler(BaseHandler): - allowed_methods = ('GET', 'PUT') + allowed_methods = ('GET') @hglibrary - def read(self, request, docid, revision, lib): + def read(self, request, docid, lib): """Read document as html text""" try: + revision = request.GET.get('revision', 'latest') + if revision == 'latest': document = lib.document(docid) else: document = lib.document_for_rev(revision) - return librarian.html.transform(document.data('xml'), is_file=False) - except RevisionNotFound: - return response.EntityNotFound().django_response() + if document.id != docid: + return response.BadRequest().django_response({'reason': 'name-mismatch', + 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) }) + + return librarian.html.transform(document.data('xml'), is_file=False, parse_dublincore=False) + except (EntryNotFound, RevisionNotFound), e: + return response.EntityNotFound().django_response({ + 'reason': 'not-found', 'message': e.message}) + except librarian.ParseError, e: + return response.InternalError().django_response({ + 'reason': 'xml-parse-error', 'message': e.message }) + +# +# Image Gallery +# + +class DocumentGalleryHandler(BaseHandler): + allowed_methods = ('GET') + + + def read(self, request, docid): + """Read meta-data about scans for gallery of this document.""" + galleries = [] + from urllib import quote + + for assoc in GalleryForDocument.objects.filter(document=docid): + dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath) + if not os.path.isdir(dirpath): + log.warn(u"[WARNING]: missing gallery %s", dirpath) + continue + + gallery = {'name': assoc.name, 'pages': []} + + for file in os.listdir(dirpath): + if not isinstance(file, unicode): + log.warn(u"File %r is gallery %r is not unicode. Ommiting."\ + % (file, dirpath) ) + continue + + name, ext = os.path.splitext(os.path.basename(file)) + if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']: + log.info(u"Ignoring: %s %s", name, ext) + continue + url = settings.MEDIA_URL + assoc.subpath + u'/' + file; + gallery['pages'].append( quote(url.encode('utf-8')) ) + + gallery['pages'].sort() + galleries.append(gallery) + + return galleries # # Document Text View @@ -214,30 +286,38 @@ class DocumentHTMLHandler(BaseHandler): XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P[^\1]+?)\1\s*[^>]*?>""" # # +# class DocumentTextHandler(BaseHandler): - allowed_methods = ('GET', 'PUT') + allowed_methods = ('GET', 'POST') @hglibrary - def read(self, request, docid, revision, lib): - """Read document as raw text""" + def read(self, request, docid, lib): + """Read document as raw text""" + revision = request.GET.get('revision', 'latest') try: if revision == 'latest': document = lib.document(docid) else: document = lib.document_for_rev(revision) + + if document.id != docid: + return response.BadRequest().django_response({'reason': 'name-mismatch', + 'message': 'Provided revision is not valid for this document'}) # TODO: some finer-grained access control return document.data('xml') - except RevisionNotFound: - return response.EntityNotFound().django_response() + except (EntryNotFound, RevisionNotFound), e: + return response.EntityNotFound().django_response({ + 'exception': type(e), 'message': e.message}) @hglibrary - def update(self, request, docid, revision, lib): + def create(self, request, docid, lib): try: - data = request.PUT['contents'] + data = request.POST['contents'] + revision = request.POST['revision'] - if request.PUT.has_key('message'): - msg = u"$USER$ " + request.PUT['message'] + if request.POST.has_key('message'): + msg = u"$USER$ " + request.POST['message'] else: msg = u"$AUTO$ XML content update." @@ -254,6 +334,8 @@ class DocumentTextHandler(BaseHandler): includes = [m.groupdict()['link'] for m in (re.finditer(\ XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ] + log.info("INCLUDES: %s", includes) + # TODO: provide useful routines to make this simpler def xml_update_action(lib, resolve): try: @@ -276,25 +358,30 @@ class DocumentTextHandler(BaseHandler): # now that the parts are ok, write xml f = lib._fileopen(resolve('xml'), 'w+') - f.write(data) + f.write(data.encode('utf-8')) f.close() - + + ndoc = None ndoc = current.invoke_and_commit(\ xml_update_action, lambda d: (msg, current.owner) ) try: # return the new revision number - return { + return response.SuccessAllOk().django_response({ "document": ndoc.id, "subview": "xml", "previous_revision": current.revision, - "updated_revision": ndoc.revision - } + "revision": ndoc.revision, + 'timestamp': ndoc.revision.timestamp, + "url": reverse("doctext_view", args=[ndoc.id]) + }) except Exception, e: - lib._rollback() + if ndoc: lib._rollback() raise e except RevisionNotFound, e: - return response.EntityNotFound().django_response(e) + return response.EntityNotFound(mimetype="text/plain").\ + django_response(e.message) + # # Dublin Core handlers @@ -302,27 +389,37 @@ class DocumentTextHandler(BaseHandler): # @requires librarian # class DocumentDublinCoreHandler(BaseHandler): - allowed_methods = ('GET', 'PUT') + allowed_methods = ('GET', 'POST') @hglibrary - def read(self, request, docid, revision, lib): + def read(self, request, docid, lib): """Read document as raw text""" try: + revision = request.GET.get('revision', 'latest') + if revision == 'latest': doc = lib.document(docid) else: doc = lib.document_for_rev(revision) + + + if document.id != docid: + return response.BadRequest().django_response({'reason': 'name-mismatch', + 'message': 'Provided revision is not valid for this document'}) bookinfo = dcparser.BookInfo.from_string(doc.data('xml')) return bookinfo.serialize() - except RevisionNotFound: - return response.EntityNotFound().django_response() + except (EntryNotFound, RevisionNotFound), e: + return response.EntityNotFound().django_response({ + 'exception': type(e), 'message': e.message}) @hglibrary - def update(self, request, docid, revision, lib): + def create(self, request, docid, lib): try: - bi_json = request.PUT['contents'] - if request.PUT.has_key('message'): + bi_json = request.POST['contents'] + revision = request.POST['revision'] + + if request.POST.has_key('message'): msg = u"$USER$ " + request.PUT['message'] else: msg = u"$AUTO$ Dublin core update." @@ -350,16 +447,16 @@ class DocumentDublinCoreHandler(BaseHandler): "document": ndoc.id, "subview": "dc", "previous_revision": current.revision, - "updated_revision": ndoc.revision + "revision": ndoc.revision, + 'timestamp': ndoc.revision.timestamp, + "url": reverse("docdc_view", args=[ndoc.id]) } except Exception, e: - lib._rollback() + if ndoc: lib._rollback() raise e except RevisionNotFound: return response.EntityNotFound().django_response() - - class MergeHandler(BaseHandler): allowed_methods = ('POST',) @@ -409,7 +506,7 @@ class MergeHandler(BaseHandler): document=docid, source_revision = str(udoc.revision), status="N", - comment = form.cleaned_data['comment'] or '$AUTO$ Document shared.' + comment = form.cleaned_data['message'] or '$AUTO$ Document shared.' ) prq.save() @@ -423,19 +520,22 @@ class MergeHandler(BaseHandler): success, changed = udoc.update(request.user.username) if form.cleaned_data['type'] == 'share': - success, changed = udoc.share(form.cleaned_data['comment']) + success, changed = udoc.share(form.cleaned_data['message']) if not success: - return response.EntityConflict().django_response() + return response.EntityConflict().django_response({ + 'reason': 'merge-failure', + }) if not changed: return response.SuccessNoContent().django_response() - new_udoc = udoc.latest() + nudoc = udoc.latest() return response.SuccessAllOk().django_response({ - "name": udoc.id, + "name": nudoc.id, "parent_user_resivion": udoc.revision, "parent_revision": doc.revision, - "revision": udoc.revision, + "revision": nudoc.revision, + 'timestamp': nudoc.revision.timestamp, })