fix for bad documents.
[redakcja.git] / src / documents / views.py
1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import defaultdict
5 from datetime import datetime, date, timedelta
6 import logging
7 import os
8 from urllib.parse import quote_plus, unquote, urlsplit, urlunsplit
9
10 from django.conf import settings
11 from django.contrib import auth
12 from django.contrib.auth.models import User
13 from django.contrib.auth.decorators import login_required, permission_required
14 from django.urls import reverse
15 from django.db.models import Count, Q
16 from django.db import transaction
17 from django import http
18 from django.http import Http404, HttpResponse, HttpResponseForbidden
19 from django.http.response import HttpResponseRedirect
20 from django.shortcuts import get_object_or_404, render
21 from django.utils.encoding import iri_to_uri
22 from django.utils.translation import gettext_lazy as _
23 from django.views.decorators.http import require_POST
24 from django_cas_ng.decorators import user_passes_test
25
26 from apiclient import api_call, NotAuthorizedError
27 from . import forms
28 from . import helpers
29 from .helpers import active_tab
30 from .models import (Book, Chunk, Image, BookPublishRecord, 
31         ChunkPublishRecord, ImagePublishRecord, Project)
32 from fileupload.views import UploadView
33
34 #
35 # Quick hack around caching problems, TODO: use ETags
36 #
37 from django.views.decorators.cache import never_cache
38
39 logger = logging.getLogger("fnp.documents")
40
41
42 @active_tab('all')
43 @never_cache
44 def document_list(request):
45     return render(request, 'documents/document_list.html')
46
47
48 @active_tab('images')
49 @never_cache
50 def image_list(request, user=None):
51     return render(request, 'documents/image_list.html')
52
53
54 @never_cache
55 def user(request, username):
56     user = get_object_or_404(User, username=username)
57     return render(request, 'documents/user_page.html', {"viewed_user": user})
58
59
60 @login_required
61 @active_tab('my')
62 @never_cache
63 def my(request):
64     last_books = sorted(request.session.get("wiki_last_books", {}).items(),
65         key=lambda x: x[1]['time'], reverse=True)
66     for k, v in last_books:
67         v['time'] = datetime.fromtimestamp(v['time'])
68     try:
69         resp = api_call(request.user, 'username/')
70     except NotAuthorizedError:
71         wllogin = None
72     else:
73         wllogin = resp['username']
74
75     return render(request, 'documents/my_page.html', {
76         'last_books': last_books,
77         "logout_to": '/',
78         "wllogin": wllogin,
79         })
80
81
82 @active_tab('users')
83 def users(request):
84     return render(request, 'documents/user_list.html', {
85         'users': User.objects.all().annotate(count=Count('chunk')).order_by(
86             '-count', 'last_name', 'first_name'),
87     })
88
89
90 @active_tab('activity')
91 def activity(request, isodate=None):
92     today = date.today()
93     try:
94         day = helpers.parse_isodate(isodate)
95     except ValueError:
96         day = today
97
98     if day > today:
99         raise Http404
100     if day != today:
101         next_day = day + timedelta(1)
102     prev_day = day - timedelta(1)
103
104     return render(request, 'documents/activity.html', locals())
105
106
107 @never_cache
108 def logout_then_redirect(request):
109     auth.logout(request)
110     return http.HttpResponseRedirect(quote_plus(request.GET.get('next', '/'), safe='/?='))
111
112
113 @permission_required('documents.add_book')
114 @active_tab('create')
115 def create_missing(request, slug=None):
116     if slug is None:
117         slug = ''
118     slug = slug.replace(' ', '-')
119
120     if request.method == "POST":
121         form = forms.DocumentCreateForm(request.POST, request.FILES)
122         if form.is_valid():
123             
124             if request.user.is_authenticated:
125                 creator = request.user
126             else:
127                 creator = None
128             book = Book.create(
129                 text=form.cleaned_data['text'],
130                 creator=creator,
131                 slug=form.cleaned_data['slug'],
132                 title=form.cleaned_data['title'],
133                 gallery=form.cleaned_data['gallery'],
134             )
135
136             return http.HttpResponseRedirect(reverse("documents_book", args=[book.slug]))
137     else:
138         form = forms.DocumentCreateForm(initial={
139                 "slug": slug,
140                 "title": slug.replace('-', ' ').title(),
141                 "gallery": slug,
142         })
143
144     return render(request, "documents/document_create_missing.html", {
145         "slug": slug,
146         "form": form,
147
148         "logout_to": '/',
149     })
150
151
152 @permission_required('documents.add_book')
153 @active_tab('upload')
154 def upload(request):
155     if request.method == "POST":
156         form = forms.DocumentsUploadForm(request.POST, request.FILES)
157         if form.is_valid():
158             from slugify import slugify
159
160             if request.user.is_authenticated:
161                 creator = request.user
162             else:
163                 creator = None
164
165             zip = form.cleaned_data['zip']
166             skipped_list = []
167             ok_list = []
168             error_list = []
169             slugs = {}
170             existing = [book.slug for book in Book.objects.all()]
171             for filename in zip.namelist():
172                 if filename[-1] == '/':
173                     continue
174                 title = os.path.basename(filename)[:-4]
175                 slug = slugify(title)
176                 if not (slug and filename.endswith('.xml')):
177                     skipped_list.append(filename)
178                 elif slug in slugs:
179                     error_list.append((filename, slug, _('Slug already used for %s' % slugs[slug])))
180                 elif slug in existing:
181                     error_list.append((filename, slug, _('Slug already used in repository.')))
182                 else:
183                     try:
184                         zip.read(filename).decode('utf-8') # test read
185                         ok_list.append((filename, slug, title))
186                     except UnicodeDecodeError:
187                         error_list.append((filename, title, _('File should be UTF-8 encoded.')))
188                     slugs[slug] = filename
189
190             if not error_list:
191                 for filename, slug, title in ok_list:
192                     book = Book.create(
193                         text=zip.read(filename).decode('utf-8'),
194                         creator=creator,
195                         slug=slug,
196                         title=title,
197                     )
198
199             return render(request, "documents/document_upload.html", {
200                 "form": form,
201                 "ok_list": ok_list,
202                 "skipped_list": skipped_list,
203                 "error_list": error_list,
204
205                 "logout_to": '/',
206             })
207     else:
208         form = forms.DocumentsUploadForm()
209
210     return render(request, "documents/document_upload.html", {
211         "form": form,
212
213         "logout_to": '/',
214     })
215
216
217 def serve_xml(request, book, slug):
218     if not book.accessible(request):
219         return HttpResponseForbidden("Not authorized.")
220     xml = book.materialize(publishable=True)
221     response = http.HttpResponse(xml, content_type='application/xml')
222     response['Content-Disposition'] = 'attachment; filename=%s.xml' % slug
223     return response
224
225
226 @never_cache
227 def book_xml(request, slug):
228     book = get_object_or_404(Book, slug=slug)
229     return serve_xml(request, book, slug)
230
231
232 @never_cache
233 def book_xml_dc(request, slug):
234     book = get_object_or_404(Book, catalogue_book_id=slug)
235     return serve_xml(request, book, slug)
236
237
238 @never_cache
239 def book_txt(request, slug):
240     book = get_object_or_404(Book, slug=slug)
241     if not book.accessible(request):
242         return HttpResponseForbidden("Not authorized.")
243
244     doc = book.wldocument()
245     text = doc.as_text().get_bytes()
246     response = http.HttpResponse(text, content_type='text/plain')
247     response['Content-Disposition'] = 'attachment; filename=%s.txt' % slug
248     return response
249
250
251 @never_cache
252 def book_html(request, slug):
253     book = get_object_or_404(Book, slug=slug)
254     if not book.accessible(request):
255         return HttpResponseForbidden("Not authorized.")
256
257     doc = book.wldocument(parse_dublincore=False)
258     html = doc.as_html(options={'gallery': "'%s'" % book.gallery_url()})
259
260     html = html.get_bytes().decode('utf-8') if html is not None else ''
261     # response = http.HttpResponse(html, content_type='text/html')
262     # return response
263     # book_themes = {}
264     # for fragment in book.fragments.all().iterator():
265     #     for theme in fragment.tags.filter(category='theme').iterator():
266     #         book_themes.setdefault(theme, []).append(fragment)
267
268     # book_themes = book_themes.items()
269     # book_themes.sort(key=lambda s: s[0].sort_key)
270     return render(request, 'documents/book_text.html', locals())
271
272
273 @login_required
274 @never_cache
275 def book_pdf(request, slug, mobile=False):
276     book = get_object_or_404(Book, slug=slug)
277     if not book.accessible(request):
278         return HttpResponseForbidden("Not authorized.")
279
280     # TODO: move to celery
281     doc = book.wldocument()
282     # TODO: error handling
283     customizations = ['26pt', 'nothemes', 'nomargins', 'notoc'] if mobile else None
284     pdf_file = doc.as_pdf(cover=True, base_url=request.build_absolute_uri(book.gallery_path()), customizations=customizations)
285     from .ebook_utils import serve_file
286     return serve_file(pdf_file.get_filename(),
287                 book.slug + '.pdf', 'application/pdf')
288
289
290 @login_required
291 @never_cache
292 def book_epub(request, slug):
293     book = get_object_or_404(Book, slug=slug)
294     if not book.accessible(request):
295         return HttpResponseForbidden("Not authorized.")
296
297     # TODO: move to celery
298     doc = book.wldocument(librarian2=True)
299     # TODO: error handling
300
301     from librarian.builders import EpubBuilder
302     epub = EpubBuilder(
303         base_url='file://' + book.gallery_path() + '/'
304     ).build(doc).get_bytes()
305     response = HttpResponse(content_type='application/epub+zip')
306     response['Content-Disposition'] = 'attachment; filename=%s' % book.slug + '.epub'
307     response.write(epub)
308     return response
309
310
311 @login_required
312 @never_cache
313 def book_mobi(request, slug):
314     book = get_object_or_404(Book, slug=slug)
315     if not book.accessible(request):
316         return HttpResponseForbidden("Not authorized.")
317
318     # TODO: move to celery
319     doc = book.wldocument(librarian2=True)
320     # TODO: error handling
321     from librarian.builders import MobiBuilder
322     mobi = MobiBuilder(
323         base_url='file://' + book.gallery_path() + '/'
324     ).build(doc).get_bytes()
325     response = HttpResponse(content_type='application/x-mobipocket-ebook')
326     response['Content-Disposition'] = 'attachment; filename=%s' % book.slug + '.mobi'
327     response.write(mobi)
328     return response
329
330
331 @never_cache
332 def revision(request, slug, chunk=None):
333     try:
334         doc = Chunk.get(slug, chunk)
335     except (Chunk.MultipleObjectsReturned, Chunk.DoesNotExist):
336         raise Http404
337     if not doc.book.accessible(request):
338         return HttpResponseForbidden("Not authorized.")
339     return http.HttpResponse(str(doc.revision()))
340
341
342 def book(request, slug):
343     book = get_object_or_404(Book, slug=slug)
344     if not book.accessible(request):
345         return HttpResponseForbidden("Not authorized.")
346
347     if request.user.has_perm('documents.change_book'):
348         if request.method == "POST":
349             form = forms.BookForm(request.POST, instance=book)
350             if form.is_valid():
351                 form.save()
352                 return http.HttpResponseRedirect(book.get_absolute_url())
353         else:
354             form = forms.BookForm(instance=book)
355         publish_options_form = forms.PublishOptionsForm()
356         editable = True
357     else:
358         form = forms.ReadonlyBookForm(instance=book)
359         publish_options_form = forms.PublishOptionsForm()
360         editable = False
361
362     publish_error = book.publishable_error()
363     publishable = publish_error is None
364
365     stats = None
366     try:
367         doc = book.wldocument(librarian2=True)
368     except:
369         doc = None
370     else:
371         try:
372             stats = doc.get_statistics()
373         except:
374             pass
375
376     return render(request, "documents/book_detail.html", {
377         "book": book,
378         "doc": doc,
379         "stats": stats,
380         "publishable": publishable,
381         "publishable_error": publish_error,
382         "form": form,
383         "publish_options_form": publish_options_form,
384         "editable": editable,
385     })
386
387
388 def image(request, slug):
389     image = get_object_or_404(Image, slug=slug)
390     if not image.accessible(request):
391         return HttpResponseForbidden("Not authorized.")
392
393     if request.user.has_perm('documents.change_image'):
394         if request.method == "POST":
395             form = forms.ImageForm(request.POST, instance=image)
396             if form.is_valid():
397                 form.save()
398                 return http.HttpResponseRedirect(image.get_absolute_url())
399         else:
400             form = forms.ImageForm(instance=image)
401         editable = True
402     else:
403         form = forms.ReadonlyImageForm(instance=image)
404         editable = False
405
406     publish_error = image.publishable_error()
407     publishable = publish_error is None
408
409     return render(request, "documents/image_detail.html", {
410         "object": image,
411         "publishable": publishable,
412         "publishable_error": publish_error,
413         "form": form,
414         "editable": editable,
415     })
416
417
418 @permission_required('documents.add_chunk')
419 def chunk_add(request, slug, chunk):
420     try:
421         doc = Chunk.get(slug, chunk)
422     except (Chunk.MultipleObjectsReturned, Chunk.DoesNotExist):
423         raise Http404
424     if not doc.book.accessible(request):
425         return HttpResponseForbidden("Not authorized.")
426
427     if request.method == "POST":
428         form = forms.ChunkAddForm(request.POST, instance=doc)
429         if form.is_valid():
430             if request.user.is_authenticated:
431                 creator = request.user
432             else:
433                 creator = None
434             doc.split(creator=creator,
435                 slug=form.cleaned_data['slug'],
436                 title=form.cleaned_data['title'],
437                 gallery_start=form.cleaned_data['gallery_start'],
438                 user=form.cleaned_data['user'],
439                 stage=form.cleaned_data['stage']
440             )
441
442             return http.HttpResponseRedirect(doc.book.get_absolute_url())
443     else:
444         form = forms.ChunkAddForm(initial={
445                 "slug": str(doc.number + 1),
446                 "title": "cz. %d" % (doc.number + 1, ),
447         })
448
449     return render(request, "documents/chunk_add.html", {
450         "chunk": doc,
451         "form": form,
452     })
453
454
455 @login_required
456 def chunk_edit(request, slug, chunk):
457     try:
458         doc = Chunk.get(slug, chunk)
459     except (Chunk.MultipleObjectsReturned, Chunk.DoesNotExist):
460         raise Http404
461     if not doc.book.accessible(request):
462         return HttpResponseForbidden("Not authorized.")
463
464     if request.method == "POST":
465         form = forms.ChunkForm(request.POST, instance=doc)
466         if form.is_valid():
467             form.save()
468             go_next = request.GET.get('next', None)
469             if go_next:
470                 go_next = quote_plus(unquote(iri_to_uri(go_next)), safe='/?=&')
471             else:
472                 go_next = doc.book.get_absolute_url()
473             return http.HttpResponseRedirect(go_next)
474     else:
475         form = forms.ChunkForm(instance=doc)
476
477     referer = request.META.get('HTTP_REFERER')
478     if referer:
479         parts = urlsplit(referer)
480         parts = ['', ''] + list(parts[2:])
481         go_next = quote_plus(urlunsplit(parts))
482     else:
483         go_next = ''
484
485     return render(request, "documents/chunk_edit.html", {
486         "chunk": doc,
487         "form": form,
488         "go_next": go_next,
489     })
490
491
492 @transaction.atomic
493 @login_required
494 @require_POST
495 def chunk_mass_edit(request):
496     ids = [int(i) for i in request.POST.get('ids').split(',') if i.strip()]
497     chunks = list(Chunk.objects.filter(id__in=ids))
498     
499     stage = request.POST.get('stage')
500     if stage:
501         try:
502             stage = Chunk.tag_model.objects.get(slug=stage)
503         except Chunk.DoesNotExist as e:
504             stage = None
505        
506         for c in chunks: c.stage = stage
507
508     username = request.POST.get('user')
509     logger.info("username: %s" % username)
510     logger.info(request.POST)
511     if username:
512         try:
513             user = User.objects.get(username=username)
514         except User.DoesNotExist as e:
515             user = None
516             
517         for c in chunks: c.user = user
518
519     project_id = request.POST.get('project')
520     if project_id:
521         try:
522             project = Project.objects.get(pk=int(project_id))
523         except (Project.DoesNotExist, ValueError) as e:
524             project = None
525         for c in chunks:
526             book = c.book
527             book.project = project
528             book.save()
529
530     for c in chunks: c.save()
531
532     return HttpResponse("", content_type="text/plain")
533
534
535 @transaction.atomic
536 @login_required
537 @require_POST
538 def image_mass_edit(request):
539     ids = map(int, filter(lambda i: i.strip()!='', request.POST.get('ids').split(',')))
540     images = map(lambda i: Image.objects.get(id=i), ids)
541     
542     stage = request.POST.get('stage')
543     if stage:
544         try:
545             stage = Image.tag_model.objects.get(slug=stage)
546         except Image.DoesNotExist as e:
547             stage = None
548        
549         for c in images: c.stage = stage
550
551     username = request.POST.get('user')
552     logger.info("username: %s" % username)
553     logger.info(request.POST)
554     if username:
555         try:
556             user = User.objects.get(username=username)
557         except User.DoesNotExist as e:
558             user = None
559             
560         for c in images: c.user = user
561
562     project_id = request.POST.get('project')
563     if project_id:
564         try:
565             project = Project.objects.get(pk=int(project_id))
566         except (Project.DoesNotExist, ValueError) as e:
567             project = None
568         for c in images:
569             c.project = project
570
571     for c in images: c.save()
572
573     return HttpResponse("", content_type="text/plain")
574
575
576 @permission_required('documents.change_book')
577 def book_append(request, slug):
578     book = get_object_or_404(Book, slug=slug)
579     if not book.accessible(request):
580         return HttpResponseForbidden("Not authorized.")
581
582     if request.method == "POST":
583         form = forms.BookAppendForm(book, request.POST)
584         if form.is_valid():
585             append_to = form.cleaned_data['append_to']
586             append_to.append(book)
587             return http.HttpResponseRedirect(append_to.get_absolute_url())
588     else:
589         form = forms.BookAppendForm(book)
590     return render(request, "documents/book_append_to.html", {
591         "book": book,
592         "form": form,
593
594         "logout_to": '/',
595     })
596
597
598 @require_POST
599 @login_required
600 def publish(request, slug):
601     form = forms.PublishOptionsForm(request.POST)
602     if form.is_valid():
603         days = form.cleaned_data['days']
604         beta = form.cleaned_data['beta']
605         hidden = form.cleaned_data['hidden']
606     else:
607         days = 0
608         beta = False
609         hidden = False
610     book = get_object_or_404(Book, slug=slug)
611     if not book.accessible(request):
612         return HttpResponseForbidden("Not authorized.")
613
614     try:
615         protocol = 'https://' if request.is_secure() else 'http://'
616         book.publish(request.user, host=protocol + request.get_host(), days=days, beta=beta, hidden=hidden)
617     except NotAuthorizedError:
618         return http.HttpResponseRedirect(reverse('apiclient_oauth' if not beta else 'apiclient_beta_oauth'))
619     except BaseException as e:
620         return http.HttpResponse(repr(e))
621     else:
622         return http.HttpResponseRedirect(book.get_absolute_url())
623
624
625 @require_POST
626 @login_required
627 def publish_image(request, slug):
628     image = get_object_or_404(Image, slug=slug)
629     if not image.accessible(request):
630         return HttpResponseForbidden("Not authorized.")
631
632     try:
633         image.publish(request.user)
634     except NotAuthorizedError:
635         return http.HttpResponseRedirect(reverse('apiclient_oauth'))
636     except BaseException as e:
637         return http.HttpResponse(e)
638     else:
639         return http.HttpResponseRedirect(image.get_absolute_url())
640
641
642 class GalleryView(UploadView):
643     def get_object(self, request, slug):
644         book = get_object_or_404(Book, slug=slug)
645         if not book.gallery:
646             raise Http404
647         return book
648
649     def breadcrumbs(self):
650         return [
651             (_('books'), reverse('documents_document_list')),
652             (self.object.title, self.object.get_absolute_url()),
653             (_('scan gallery'),),
654         ]
655
656     def get_directory(self):
657         return "%s%s/" % (settings.IMAGE_DIR, self.object.gallery)
658
659
660 def active_users_list(request, csv=False):
661     year = int(request.GET.get('y', date.today().year))
662     by_user = defaultdict(lambda: 0)
663     by_email = defaultdict(lambda: 0)
664     names_by_email = defaultdict(set)
665     for change_model in (Chunk.change_model, Image.change_model):
666         for c in change_model.objects.filter(
667                 created_at__year=year).order_by(
668                 'author', 'author_email', 'author_name').values(
669                 'author', 'author_name', 'author_email').annotate(
670                 c=Count('author'), ce=Count('author_email')).distinct():
671             if c['author']:
672                 by_user[c['author']] += c['c']
673             else:
674                 by_email[c['author_email']] += c['ce']
675                 if (c['author_name'] or '').strip():
676                     names_by_email[c['author_email']].add(c['author_name'])
677     for user in User.objects.filter(pk__in=by_user):
678         by_email[user.email] += by_user[user.pk]
679         names_by_email[user.email].add("%s %s" % (user.first_name, user.last_name))
680
681     active_users = []
682     for email, count in by_email.items():
683         active_users.append((email, names_by_email[email], count))
684     active_users.sort(key=lambda x: -x[2])
685     if csv:
686         return http.HttpResponse(
687             '\n'.join((
688                 ','.join(
689                     (str(x[2]), x[0], ','.join(x[1]))
690                 )
691                 for x in active_users
692             )),
693             content_type='text/csv',
694             headers={
695                 'Content-Disposition': f'attachment; filename=redakcja-{year}.csv',
696             }
697         )
698     else:
699         return render(request, 'documents/active_users_list.html', {
700             'users': active_users,
701             'year': year,
702         })
703
704
705 @user_passes_test(lambda u: u.is_superuser)
706 def mark_final(request):
707     if request.method == 'POST':
708         form = forms.MarkFinalForm(data=request.POST)
709         if form.is_valid():
710             form.save()
711             return HttpResponseRedirect(reverse('mark_final_completed'))
712     else:
713         form = forms.MarkFinalForm()
714     return render(request, 'documents/mark_final.html', {'form': form})
715
716
717 def mark_final_completed(request):
718     return render(request, 'documents/mark_final_completed.html')