7db7b7958167d2abe32419322f078e65a349706f
[redakcja.git] / apps / api / handlers / library_handlers.py
1 # -*- encoding: utf-8 -*-
2
3 __author__= "Ɓukasz Rekucki"
4 __date__ = "$2009-09-25 15:49:50$"
5 __doc__ = "Module documentation."
6
7 from piston.handler import BaseHandler, AnonymousBaseHandler
8
9 import re
10 from datetime import date
11
12 from django.core.urlresolvers import reverse
13 from django.utils import simplejson as json
14
15 import librarian
16 import librarian.html
17 from librarian import dcparser
18
19 from wlrepo import RevisionNotFound, LibraryException, DocumentAlreadyExists
20 from explorer.models import PullRequest
21
22 # internal imports
23 import api.forms as forms
24 import api.response as response
25 from api.utils import validate_form, hglibrary
26 from api.models import PartCache
27
28 #
29 # Document List Handlers
30 #
31 class BasicLibraryHandler(AnonymousBaseHandler):
32     allowed_methods = ('GET',)
33
34     @hglibrary
35     def read(self, request, lib):
36         """Return the list of documents."""       
37         document_list = [{
38             'url': reverse('document_view', args=[docid]),
39             'name': docid } for docid in lib.documents() ]
40
41         return {'documents' : document_list}
42         
43
44 class LibraryHandler(BaseHandler):
45     allowed_methods = ('GET', 'POST')
46     anonymous = BasicLibraryHandler
47
48     @hglibrary
49     def read(self, request, lib):
50         """Return the list of documents."""
51
52         documents = {}
53         
54         for docid in lib.documents():
55             documents[docid] = {
56                 'url': reverse('document_view', args=[docid]),
57                 'name': docid,
58                 'parts': []
59             }
60
61         related = PartCache.objects.defer('part_id')\
62             .values_list('part_id', 'document_id').distinct()
63
64         for part, docid in related:
65             # this way, we won't display broken links
66             if not documents.has_key(part):
67                 continue
68
69             child = documents[part]
70             parent = documents[docid]
71             
72             if isinstance(parent, dict): # the parent is top-level
73                 documents.pop(part)                
74                 parent['parts'].append(child)
75                 documents[part] = child['parts']
76             else: # not top-level
77                 parent.append(child)
78             
79         return {
80             'documents': [d for d in documents.itervalues() if isinstance(d, dict)]
81         }
82
83     @validate_form(forms.DocumentUploadForm, 'POST')
84     @hglibrary
85     def create(self, request, form, lib):
86         """Create a new document."""       
87
88         if form.cleaned_data['ocr_data']:
89             data = form.cleaned_data['ocr_data']
90         else:            
91             data = request.FILES['ocr_file'].read().decode('utf-8')
92
93         if form.cleaned_data['generate_dc']:
94             data = librarian.wrap_text(data, unicode(date.today()))
95
96         docid = form.cleaned_data['bookname']
97
98         try:
99             lock = lib.lock()            
100             try:
101                 print "DOCID", docid                
102                 doc = lib.document_create(docid)
103                 # document created, but no content yet
104
105                 try:
106                     doc = doc.quickwrite('xml', data.encode('utf-8'),
107                         '$AUTO$ XML data uploaded.', user=request.user.username)
108                 except Exception,e:
109                     # rollback branch creation
110                     lib._rollback()
111                     raise LibraryException("Exception occured:" + repr(e))
112
113                 url = reverse('document_view', args=[doc.id])
114
115                 return response.EntityCreated().django_response(\
116                     body = {
117                         'url': url,
118                         'name': doc.id,
119                         'revision': doc.revision },
120                     url = url )            
121             finally:
122                 lock.release()
123         except LibraryException, e:
124             return response.InternalError().django_response(\
125                 {'exception': repr(e) })                
126         except DocumentAlreadyExists:
127             # Document is already there
128             return response.EntityConflict().django_response(\
129                 {"reason": "Document %s already exists." % docid})
130
131 #
132 # Document Handlers
133 #
134 class BasicDocumentHandler(AnonymousBaseHandler):
135     allowed_methods = ('GET',)
136
137     @hglibrary
138     def read(self, request, docid, lib):
139         try:    
140             doc = lib.document(docid)
141         except RevisionNotFound:
142             return rc.NOT_FOUND
143
144         result = {
145             'name': doc.id,
146             'html_url': reverse('dochtml_view', args=[doc.id,doc.revision]),
147             'text_url': reverse('doctext_view', args=[doc.id,doc.revision]),
148             'dc_url': reverse('docdc_view', args=[doc.id,doc.revision]),
149             'public_revision': doc.revision,
150         }
151
152         return result
153
154 #
155 # Document Meta Data
156 #
157 class DocumentHandler(BaseHandler):
158     allowed_methods = ('GET', 'PUT')
159     anonymous = BasicDocumentHandler
160
161     @hglibrary
162     def read(self, request, docid, lib):
163         """Read document's meta data"""       
164         try:
165             doc = lib.document(docid)
166             udoc = doc.take(request.user.username)
167         except RevisionNotFound:
168             return request.EnityNotFound().django_response()
169
170         # is_shared = udoc.ancestorof(doc)
171         # is_uptodate = is_shared or shared.ancestorof(document)
172
173         result = {
174             'name': udoc.id,
175             'html_url': reverse('dochtml_view', args=[udoc.id,udoc.revision]),
176             'text_url': reverse('doctext_view', args=[udoc.id,udoc.revision]),
177             'dc_url': reverse('docdc_view', args=[udoc.id,udoc.revision]),
178             'user_revision': udoc.revision,
179             'public_revision': doc.revision,            
180         }       
181
182         return result
183
184     @hglibrary
185     def update(self, request, docid, lib):
186         """Update information about the document, like display not"""
187         return
188 #
189 #
190 #
191 class DocumentHTMLHandler(BaseHandler):
192     allowed_methods = ('GET', 'PUT')
193
194     @hglibrary
195     def read(self, request, docid, revision, lib):
196         """Read document as html text"""
197         try:
198             if revision == 'latest':
199                 document = lib.document(docid)
200             else:
201                 document = lib.document_for_rev(revision)
202
203             return librarian.html.transform(document.data('xml'), is_file=False)
204         except RevisionNotFound:
205             return response.EntityNotFound().django_response()
206
207
208
209
210 #
211 # Document Text View
212 #
213
214 XINCLUDE_REGEXP = r"""<(?:\w+:)?include\s+[^>]*?href=("|')wlrepo://(?P<link>[^\1]+?)\1\s*[^>]*?>"""
215 #
216 #
217 class DocumentTextHandler(BaseHandler):
218     allowed_methods = ('GET', 'PUT')
219
220     @hglibrary
221     def read(self, request, docid, revision, lib):
222         """Read document as raw text"""               
223         try:
224             if revision == 'latest':
225                 document = lib.document(docid)
226             else:
227                 document = lib.document_for_rev(revision)
228             
229             # TODO: some finer-grained access control
230             return document.data('xml')
231         except RevisionNotFound:
232             return response.EntityNotFound().django_response()
233
234     @hglibrary
235     def update(self, request, docid, revision, lib):
236         try:
237             data = request.PUT['contents']            
238
239             if request.PUT.has_key('message'):
240                 msg = u"$USER$ " + request.PUT['message']
241             else:
242                 msg = u"$AUTO$ XML content update."
243
244             current = lib.document(docid, request.user.username)
245             orig = lib.document_for_rev(revision)
246
247             if current != orig:
248                 return response.EntityConflict().django_response({
249                         "reason": "out-of-date",
250                         "provided_revision": orig.revision,
251                         "latest_revision": current.revision })
252
253             # try to find any Xinclude tags
254             includes = [m.groupdict()['link'] for m in (re.finditer(\
255                 XINCLUDE_REGEXP, data, flags=re.UNICODE) or []) ]
256
257             # TODO: provide useful routines to make this simpler
258             def xml_update_action(lib, resolve):
259                 try:
260                     f = lib._fileopen(resolve('parts'), 'r')
261                     stored_includes = json.loads(f.read())
262                     f.close()
263                 except:
264                     stored_includes = []
265                 
266                 if stored_includes != includes:
267                     f = lib._fileopen(resolve('parts'), 'w+')
268                     f.write(json.dumps(includes))
269                     f.close()
270
271                     lib._fileadd(resolve('parts'))
272
273                     # update the parts cache
274                     PartCache.update_cache(docid, current.owner,\
275                         stored_includes, includes)
276
277                 # now that the parts are ok, write xml
278                 f = lib._fileopen(resolve('xml'), 'w+')
279                 f.write(data)
280                 f.close()
281
282             ndoc = None
283             ndoc = current.invoke_and_commit(\
284                 xml_update_action, lambda d: (msg, current.owner) )
285
286             try:
287                 # return the new revision number
288                 return response.SuccessAllOk().django_response({
289                     "document": ndoc.id,
290                     "subview": "xml",
291                     "previous_revision": current.revision,
292                     "updated_revision": ndoc.revision,
293                     "url": reverse("doctext_view", args=[ndoc.id, ndoc.revision])
294                 })
295             except Exception, e:
296                 if ndoc: lib._rollback()
297                 raise e        
298         except RevisionNotFound, e:
299             return response.EntityNotFound().django_response(e)
300
301 #
302 # Dublin Core handlers
303 #
304 # @requires librarian
305 #
306 class DocumentDublinCoreHandler(BaseHandler):
307     allowed_methods = ('GET', 'PUT')
308
309     @hglibrary
310     def read(self, request, docid, revision, lib):
311         """Read document as raw text"""        
312         try:
313             if revision == 'latest':
314                 doc = lib.document(docid)
315             else:
316                 doc = lib.document_for_rev(revision)
317             
318             bookinfo = dcparser.BookInfo.from_string(doc.data('xml'))
319             return bookinfo.serialize()
320         except RevisionNotFound:
321             return response.EntityNotFound().django_response()
322
323     @hglibrary
324     def update(self, request, docid, revision, lib):
325         try:
326             bi_json = request.PUT['contents']            
327             if request.PUT.has_key('message'):
328                 msg = u"$USER$ " + request.PUT['message']
329             else:
330                 msg = u"$AUTO$ Dublin core update."
331
332             current = lib.document(docid, request.user.username)
333             orig = lib.document_for_rev(revision)
334
335             if current != orig:
336                 return response.EntityConflict().django_response({
337                         "reason": "out-of-date",
338                         "provided": orig.revision,
339                         "latest": current.revision })
340
341             xmldoc = parser.WLDocument.from_string(current.data('xml'))
342             document.book_info = dcparser.BookInfo.from_json(bi_json)
343
344             # zapisz
345             ndoc = current.quickwrite('xml', \
346                 document.serialize().encode('utf-8'),\
347                 message=msg, user=request.user.username)
348
349             try:
350                 # return the new revision number
351                 return {
352                     "document": ndoc.id,
353                     "subview": "dc",
354                     "previous_revision": current.revision,
355                     "updated_revision": ndoc.revision
356                 }
357             except Exception, e:
358                 lib._rollback()
359                 raise e
360         except RevisionNotFound:
361             return response.EntityNotFound().django_response()
362
363
364
365 class MergeHandler(BaseHandler):
366     allowed_methods = ('POST',)
367
368     @validate_form(forms.MergeRequestForm, 'POST')
369     @hglibrary
370     def create(self, request, form, docid, lib):
371         """Create a new document revision from the information provided by user"""
372
373         target_rev = form.cleaned_data['target_revision']
374
375         doc = lib.document(docid)
376         udoc = doc.take(request.user.username)
377
378         if target_rev == 'latest':
379             target_rev = udoc.revision
380
381         if str(udoc.revision) != target_rev:
382             # user think doesn't know he has an old version
383             # of his own branch.
384             
385             # Updating is teorericly ok, but we need would
386             # have to force a refresh. Sharing may be not safe,
387             # 'cause it doesn't always result in update.
388
389             # In other words, we can't lie about the resource's state
390             # So we should just yield and 'out-of-date' conflict
391             # and let the client ask again with updated info.
392
393             # NOTE: this could result in a race condition, when there
394             # are 2 instances of the same user editing the same document.
395             # Instance "A" trying to update, and instance "B" always changing
396             # the document right before "A". The anwser to this problem is
397             # for the "A" to request a merge from 'latest' and then
398             # check the parent revisions in response, if he actually
399             # merge from where he thinks he should. If not, the client SHOULD
400             # update his internal state.
401             return response.EntityConflict().django_response({
402                     "reason": "out-of-date",
403                     "provided": target_rev,
404                     "latest": udoc.revision })
405
406         if not request.user.has_perm('explorer.book.can_share'):
407             # User is not permitted to make a merge, right away
408             # So we instead create a pull request in the database
409             prq = PullRequest(
410                 comitter=request.user,
411                 document=docid,
412                 source_revision = str(udoc.revision),
413                 status="N",
414                 comment = form.cleaned_data['comment'] or '$AUTO$ Document shared.'
415             )
416
417             prq.save()
418             return response.RequestAccepted().django_response(\
419                 ticket_status=prq.status, \
420                 ticket_uri=reverse("pullrequest_view", args=[prq.id]) )
421
422         if form.cleaned_data['type'] == 'update':
423             # update is always performed from the file branch
424             # to the user branch
425             success, changed = udoc.update(request.user.username)
426
427         if form.cleaned_data['type'] == 'share':
428             success, changed = udoc.share(form.cleaned_data['comment'])
429
430         if not success:
431             return response.EntityConflict().django_response()
432
433         if not changed:
434             return response.SuccessNoContent().django_response()
435
436         new_udoc = udoc.latest()
437
438         return response.SuccessAllOk().django_response({
439             "name": udoc.id,
440             "parent_user_resivion": udoc.revision,
441             "parent_revision": doc.revision,
442             "revision": udoc.revision,
443         })