X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/51d914bc0007135058a61623a2d057d2a7626a28..97e1a72c211205e4596accbfc86cf5eeaec487ee:/apps/api/handlers/library_handlers.py diff --git a/apps/api/handlers/library_handlers.py b/apps/api/handlers/library_handlers.py index 2170b44b..488c2d40 100644 --- a/apps/api/handlers/library_handlers.py +++ b/apps/api/handlers/library_handlers.py @@ -1,5 +1,6 @@ -import os.path # -*- encoding: utf-8 -*- +import os.path +import logging __author__= "Łukasz Rekucki" __date__ = "$2009-09-25 15:49:50$" @@ -12,10 +13,11 @@ from datetime import date from django.core.urlresolvers import reverse from django.utils import simplejson as json +from django.db import IntegrityError import librarian import librarian.html -from librarian import dcparser +from librarian import dcparser, parser from wlrepo import * from explorer.models import PullRequest, GalleryForDocument @@ -29,9 +31,14 @@ from api.models import PartCache # import settings + +log = logging.getLogger('platforma.api') + + # # Document List Handlers # +# TODO: security check class BasicLibraryHandler(AnonymousBaseHandler): allowed_methods = ('GET',) @@ -45,18 +52,21 @@ class BasicLibraryHandler(AnonymousBaseHandler): return {'documents' : document_list} +# +# This handler controlls the document collection +# class LibraryHandler(BaseHandler): allowed_methods = ('GET', 'POST') anonymous = BasicLibraryHandler + @hglibrary def read(self, request, lib): """Return the list of documents.""" documents = {} - for docid in lib.documents(): - docid = docid.decode('utf-8') + for docid in lib.documents(): documents[docid] = { 'url': reverse('document_view', args=[docid]), 'name': docid, @@ -71,7 +81,7 @@ class LibraryHandler(BaseHandler): for part, docid in parts: # this way, we won't display broken links if not documents.has_key(part): - print "NOT FOUND:", part + log.info("NOT FOUND: %s", part) continue parent = documents[docid] @@ -80,10 +90,7 @@ class LibraryHandler(BaseHandler): # not top-level anymore document_tree.pop(part) parent['parts'].append(child) - - # sort the right way - for doc in documents.itervalues(): doc['parts'].sort(key=natural_order(lambda d: d['name'])) @@ -100,6 +107,9 @@ class LibraryHandler(BaseHandler): else: data = request.FILES['ocr_file'].read().decode('utf-8') + if data is None: + return response.BadRequest().django_response('You must pass ocr_data or ocr_file.') + if form.cleaned_data['generate_dc']: data = librarian.wrap_text(data, unicode(date.today())) @@ -108,7 +118,7 @@ class LibraryHandler(BaseHandler): try: lock = lib.lock() try: - print "DOCID", docid + log.info("DOCID %s", docid) doc = lib.document_create(docid) # document created, but no content yet @@ -116,9 +126,10 @@ class LibraryHandler(BaseHandler): doc = doc.quickwrite('xml', data.encode('utf-8'), '$AUTO$ XML data uploaded.', user=request.user.username) except Exception,e: + import traceback # rollback branch creation lib._rollback() - raise LibraryException("Exception occured:" + repr(e)) + raise LibraryException(traceback.format_exc()) url = reverse('document_view', args=[doc.id]) @@ -131,12 +142,16 @@ class LibraryHandler(BaseHandler): finally: lock.release() except LibraryException, e: - return response.InternalError().django_response(\ - {'exception': repr(e) }) + import traceback + return response.InternalError().django_response({ + "reason": traceback.format_exc() + }) except DocumentAlreadyExists: # Document is already there - return response.EntityConflict().django_response(\ - {"reason": "Document %s already exists." % docid}) + return response.EntityConflict().django_response({ + "reason": "already-exists", + "message": "Document already exists." % docid + }) # # Document Handlers @@ -153,9 +168,9 @@ class BasicDocumentHandler(AnonymousBaseHandler): result = { 'name': doc.id, - 'html_url': reverse('dochtml_view', args=[doc.id,doc.revision]), - 'text_url': reverse('doctext_view', args=[doc.id,doc.revision]), - 'dc_url': reverse('docdc_view', args=[doc.id,doc.revision]), + 'html_url': reverse('dochtml_view', args=[doc.id]), + 'text_url': reverse('doctext_view', args=[doc.id]), + 'dc_url': reverse('docdc_view', args=[doc.id]), 'public_revision': doc.revision, } @@ -171,21 +186,23 @@ class DocumentHandler(BaseHandler): @hglibrary def read(self, request, docid, lib): """Read document's meta data""" + log.info(u"Read %s (%s)" % (docid, type(docid)) ) try: doc = lib.document(docid) udoc = doc.take(request.user.username) except RevisionNotFound, e: return response.EntityNotFound().django_response({ - 'exception': type(e), 'message': e.message}) + 'exception': type(e), 'message': e.message, + 'docid': docid }) # is_shared = udoc.ancestorof(doc) # is_uptodate = is_shared or shared.ancestorof(document) result = { 'name': udoc.id, - 'html_url': reverse('dochtml_view', args=[udoc.id,udoc.revision]), - 'text_url': reverse('doctext_view', args=[udoc.id,udoc.revision]), - 'dc_url': reverse('docdc_view', args=[udoc.id,udoc.revision]), + 'html_url': reverse('dochtml_view', args=[udoc.id]), + 'text_url': reverse('doctext_view', args=[udoc.id]), + 'dc_url': reverse('docdc_view', args=[udoc.id]), 'gallery_url': reverse('docgallery_view', args=[udoc.id]), 'merge_url': reverse('docmerge_view', args=[udoc.id]), 'user_revision': udoc.revision, @@ -207,9 +224,11 @@ class DocumentHTMLHandler(BaseHandler): allowed_methods = ('GET') @hglibrary - def read(self, request, docid, revision, lib): + def read(self, request, docid, lib, stylesheet='partial'): """Read document as html text""" try: + revision = request.GET.get('revision', 'latest') + if revision == 'latest': document = lib.document(docid) else: @@ -219,44 +238,57 @@ class DocumentHTMLHandler(BaseHandler): return response.BadRequest().django_response({'reason': 'name-mismatch', 'message': 'Provided revision refers, to document "%s", but provided "%s"' % (document.id, docid) }) - return librarian.html.transform(document.data('xml'), is_file=False) + return librarian.html.transform(document.data('xml'), is_file=False, \ + parse_dublincore=False, stylesheet=stylesheet,\ + options={ + "with-paths": 'boolean(1)', + }) + except (EntryNotFound, RevisionNotFound), e: return response.EntityNotFound().django_response({ - 'exception': type(e), 'message': e.message}) - + 'reason': 'not-found', 'message': e.message}) + except librarian.ParseError, e: + return response.InternalError().django_response({ + 'reason': 'xml-parse-error', 'message': e.message }) # # Image Gallery # -from django.core.files.storage import FileSystemStorage class DocumentGalleryHandler(BaseHandler): allowed_methods = ('GET') + def read(self, request, docid): """Read meta-data about scans for gallery of this document.""" galleries = [] + from urllib import quote for assoc in GalleryForDocument.objects.filter(document=docid): dirpath = os.path.join(settings.MEDIA_ROOT, assoc.subpath) if not os.path.isdir(dirpath): - print u"[WARNING]: missing gallery %s" % dirpath + log.warn(u"[WARNING]: missing gallery %s", dirpath) continue gallery = {'name': assoc.name, 'pages': []} - for file in sorted(os.listdir(dirpath), key=natural_order()): - print file + for file in os.listdir(dirpath): + if not isinstance(file, unicode): + log.warn(u"File %r is gallery %r is not unicode. Ommiting."\ + % (file, dirpath) ) + continue + name, ext = os.path.splitext(os.path.basename(file)) - if ext.lower() not in ['.png', '.jpeg', '.jpg']: - print "Ignoring:", name, ext + if ext.lower() not in [u'.png', u'.jpeg', u'.jpg']: + log.info(u"Ignoring: %s %s", name, ext) continue - url = settings.MEDIA_URL + assoc.subpath + u'/' + file.decode('utf-8'); - gallery['pages'].append(url) - + url = settings.MEDIA_URL + assoc.subpath + u'/' + file; + gallery['pages'].append( quote(url.encode('utf-8')) ) + + gallery['pages'].sort() galleries.append(gallery) return galleries @@ -269,12 +301,16 @@ XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P[^\1 # # # + class DocumentTextHandler(BaseHandler): - allowed_methods = ('GET', 'PUT') + allowed_methods = ('GET', 'POST') @hglibrary - def read(self, request, docid, revision, lib): - """Read document as raw text""" + def read(self, request, docid, lib): + """Read document as raw text""" + revision = request.GET.get('revision', 'latest') + part = request.GET.get('part', False) + try: if revision == 'latest': document = lib.document(docid) @@ -286,20 +322,35 @@ class DocumentTextHandler(BaseHandler): 'message': 'Provided revision is not valid for this document'}) # TODO: some finer-grained access control - return document.data('xml') + if part is False: + # we're done :) + return document.data('xml') + else: + xdoc = parser.WLDocument.from_string(document.data('xml'),\ + parse_dublincore=False) + ptext = xdoc.part_as_text(part) + + if ptext is None: + return response.EntityNotFound().django_response({ + 'reason': 'no-part-in-document' + }) + + return ptext + except librarian.ParseError: + return response.EntityNotFound().django_response({ + 'reason': 'invalid-document-state', + 'exception': type(e), 'message': e.message + }) except (EntryNotFound, RevisionNotFound), e: return response.EntityNotFound().django_response({ - 'exception': type(e), 'message': e.message}) + 'reason': 'not-found', + 'exception': type(e), 'message': e.message + }) @hglibrary - def update(self, request, docid, revision, lib): + def create(self, request, docid, lib): try: - data = request.PUT['contents'] - - if request.PUT.has_key('message'): - msg = u"$USER$ " + request.PUT['message'] - else: - msg = u"$AUTO$ XML content update." + revision = request.POST['revision'] current = lib.document(docid, request.user.username) orig = lib.document_for_rev(revision) @@ -310,11 +361,38 @@ class DocumentTextHandler(BaseHandler): "provided_revision": orig.revision, "latest_revision": current.revision }) + if request.POST.has_key('message'): + msg = u"$USER$ " + request.POST['message'] + else: + msg = u"$AUTO$ XML content update." + + if request.POST.has_key('contents'): + data = request.POST['contents'] + else: + if not request.POST.has_key('chunks'): + # bad request + return response.BadRequest().django_response({'reason': 'invalid-arguments', + 'message': 'No contents nor chunks specified.'}) + + # TODO: validate + parts = json.loads(request.POST['chunks']) + xdoc = parser.WLDocument.from_string(current.data('xml')) + + errors = xdoc.merge_chunks(parts) + + if len(errors): + return response.EntityConflict().django_response({ + "reason": "invalid-chunks", + "message": "Unable to merge following parts into the document: %s " % ",".join(errors) + }) + + data = xdoc.serialize() + # try to find any Xinclude tags includes = [m.groupdict()['link'] for m in (re.finditer(\ XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ] - print "INCLUDES: ", includes + log.info("INCLUDES: %s", includes) # TODO: provide useful routines to make this simpler def xml_update_action(lib, resolve): @@ -353,7 +431,7 @@ class DocumentTextHandler(BaseHandler): "previous_revision": current.revision, "revision": ndoc.revision, 'timestamp': ndoc.revision.timestamp, - "url": reverse("doctext_view", args=[ndoc.id, ndoc.revision]) + "url": reverse("doctext_view", args=[ndoc.id]) }) except Exception, e: if ndoc: lib._rollback() @@ -369,12 +447,14 @@ class DocumentTextHandler(BaseHandler): # @requires librarian # class DocumentDublinCoreHandler(BaseHandler): - allowed_methods = ('GET', 'PUT') + allowed_methods = ('GET', 'POST') @hglibrary - def read(self, request, docid, revision, lib): + def read(self, request, docid, lib): """Read document as raw text""" try: + revision = request.GET.get('revision', 'latest') + if revision == 'latest': doc = lib.document(docid) else: @@ -392,10 +472,12 @@ class DocumentDublinCoreHandler(BaseHandler): 'exception': type(e), 'message': e.message}) @hglibrary - def update(self, request, docid, revision, lib): + def create(self, request, docid, lib): try: - bi_json = request.PUT['contents'] - if request.PUT.has_key('message'): + bi_json = request.POST['contents'] + revision = request.POST['revision'] + + if request.POST.has_key('message'): msg = u"$USER$ " + request.PUT['message'] else: msg = u"$AUTO$ Dublin core update." @@ -425,7 +507,7 @@ class DocumentDublinCoreHandler(BaseHandler): "previous_revision": current.revision, "revision": ndoc.revision, 'timestamp': ndoc.revision.timestamp, - "url": reverse("docdc_view", args=[ndoc.id, ndoc.revision]) + "url": reverse("docdc_view", args=[ndoc.id]) } except Exception, e: if ndoc: lib._rollback() @@ -474,42 +556,50 @@ class MergeHandler(BaseHandler): "provided": target_rev, "latest": udoc.revision }) - if not request.user.has_perm('explorer.book.can_share'): - # User is not permitted to make a merge, right away - # So we instead create a pull request in the database - prq = PullRequest( - comitter=request.user, - document=docid, - source_revision = str(udoc.revision), - status="N", - comment = form.cleaned_data['message'] or '$AUTO$ Document shared.' - ) - - prq.save() - return response.RequestAccepted().django_response(\ - ticket_status=prq.status, \ - ticket_uri=reverse("pullrequest_view", args=[prq.id]) ) - if form.cleaned_data['type'] == 'update': # update is always performed from the file branch # to the user branch success, changed = udoc.update(request.user.username) - if form.cleaned_data['type'] == 'share': - success, changed = udoc.share(form.cleaned_data['message']) + if form.cleaned_data['type'] == 'share': + if not request.user.has_perm('explorer.document.can_share'): + # User is not permitted to make a merge, right away + # So we instead create a pull request in the database + try: + prq, created = PullRequest.objects.get_or_create( + source_revision = str(udoc.revision), + defaults = { + 'comitter': request.user, + 'document': docid, + 'status': "N", + 'comment': form.cleaned_data['message'] or '$AUTO$ Document shared.', + } + ) + + return response.RequestAccepted().django_response(\ + ticket_status=prq.status, \ + ticket_uri=reverse("pullrequest_view", args=[prq.id]) ) + except IntegrityError: + return response.EntityConflict().django_response({ + 'reason': 'request-already-exist' + }) + else: + success, changed = udoc.share(form.cleaned_data['message']) if not success: - return response.EntityConflict().django_response({}) + return response.EntityConflict().django_response({ + 'reason': 'merge-failure', + }) if not changed: return response.SuccessNoContent().django_response() - new_udoc = udoc.latest() + nudoc = udoc.latest() return response.SuccessAllOk().django_response({ - "name": udoc.id, + "name": nudoc.id, "parent_user_resivion": udoc.revision, "parent_revision": doc.revision, - "revision": ndoc.revision, - 'timestamp': ndoc.revision.timestamp, + "revision": nudoc.revision, + 'timestamp': nudoc.revision.timestamp, })