some fixes to oaipmh: namespaces
[wolnelektury.git] / apps / api / handlers.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4
5 from datetime import datetime, timedelta
6 import json
7 from urlparse import urljoin
8
9 from django.conf import settings
10 from django.contrib.sites.models import Site
11 from django.core.cache import get_cache
12 from django.core.urlresolvers import reverse
13 from piston.handler import AnonymousBaseHandler, BaseHandler
14 from piston.utils import rc
15
16 from api.helpers import timestamp
17 from api.models import Deleted
18 from catalogue.forms import BookImportForm
19 from catalogue.models import Book, Tag, BookMedia, Fragment
20 from picture.models import Picture
21 from picture.forms import PictureImportForm
22
23 from stats.utils import piwik_track
24
25 API_BASE = WL_BASE = MEDIA_BASE = 'http://' + Site.objects.get_current().domain
26
27
28 category_singular = {
29     'authors': 'author',
30     'kinds': 'kind',
31     'genres': 'genre',
32     'epochs': 'epoch',
33     'themes': 'theme',
34     'books': 'book',
35 }
36 category_plural={}
37 for k, v in category_singular.items():
38     category_plural[v] = k
39
40 book_tag_categories = ['author', 'epoch', 'kind', 'genre']
41
42
43
44 def read_tags(tags, allowed):
45     """ Reads a path of filtering tags.
46
47     :param str tags: a path of category and slug pairs, like: authors/an-author/...
48     :returns: list of Tag objects
49     :raises: ValueError when tags can't be found
50     """
51     if not tags:
52         return []
53
54     tags = tags.strip('/').split('/')
55     real_tags = []
56     while tags:
57         category = tags.pop(0)
58         slug = tags.pop(0)
59
60         try:
61             category = category_singular[category]
62         except KeyError:
63             raise ValueError('Unknown category.')
64
65         if not category in allowed:
66             raise ValueError('Category not allowed.')
67
68         # !^%@#$^#!
69         if category == 'book':
70             slug = 'l-' + slug
71
72         try:
73             real_tags.append(Tag.objects.get(category=category, slug=slug))
74         except Tag.DoesNotExist:
75             raise ValueError('Tag not found')
76     return real_tags
77
78
79 # RESTful handlers
80
81
82 class BookMediaHandler(BaseHandler):
83     """ Responsible for representing media in Books. """
84
85     model = BookMedia
86     fields = ['name', 'type', 'url', 'artist', 'director']
87
88     @classmethod
89     def url(cls, media):
90         """ Link to media on site. """
91
92         return MEDIA_BASE + media.file.url
93
94     @classmethod
95     def artist(cls, media):
96         return media.extra_info.get('artist_name', '')
97
98     @classmethod
99     def director(cls, media):
100         return media.extra_info.get('director_name', '')
101         
102
103
104 class BookDetails(object):
105     """Custom fields used for representing Books."""
106
107     @classmethod
108     def author(cls, book):
109         return ",".join(t[0] for t in book.related_info()['tags'].get('author', []))
110
111     @classmethod
112     def href(cls, book):
113         """ Returns an URI for a Book in the API. """
114         return API_BASE + reverse("api_book", args=[book.slug])
115
116     @classmethod
117     def url(cls, book):
118         """ Returns Book's URL on the site. """
119
120         return WL_BASE + book.get_absolute_url()
121
122     @classmethod
123     def children(cls, book):
124         """ Returns all children for a book. """
125
126         return book.children.all()
127
128     @classmethod
129     def media(cls, book):
130         """ Returns all media for a book. """
131         return book.media.all()
132
133     @classmethod
134     def cover(cls, book):
135         return MEDIA_BASE + book.cover.url if book.cover else ''
136
137
138
139 class BookDetailHandler(BaseHandler, BookDetails):
140     """ Main handler for Book objects.
141
142     Responsible for single Book details.
143     """
144     allowed_methods = ['GET']
145     fields = ['title', 'parent', 'children'] + Book.formats + [
146         'media', 'url', 'cover'] + [
147             category_plural[c] for c in book_tag_categories]
148
149     @piwik_track
150     def read(self, request, book):
151         """ Returns details of a book, identified by a slug and lang. """
152         try:
153             return Book.objects.get(slug=book)
154         except Book.DoesNotExist:
155             return rc.NOT_FOUND
156
157
158 class AnonymousBooksHandler(AnonymousBaseHandler, BookDetails):
159     """ Main handler for Book objects.
160
161     Responsible for lists of Book objects.
162     """
163     allowed_methods = ('GET',)
164     model = Book
165     fields = ['author', 'href', 'title', 'url', 'cover']
166
167     @piwik_track
168     def read(self, request, tags, top_level=False,
169                 audiobooks=False, daisy=False):
170         """ Lists all books with given tags.
171
172         :param tags: filtering tags; should be a path of categories
173              and slugs, i.e.: authors/an-author/epoch/an-epoch/
174         :param top_level: if True and a book is included in the results,
175              it's children are aren't. By default all books matching the tags
176              are returned.
177         """
178         try:
179             tags = read_tags(tags, allowed=book_tag_categories)
180         except ValueError:
181             return rc.NOT_FOUND
182
183         if tags:
184             if top_level:
185                 books = Book.tagged_top_level(tags)
186                 return books if books else rc.NOT_FOUND
187             else:
188                 books = Book.tagged.with_all(tags)
189         else:
190             books = Book.objects.all()
191             
192         if top_level:
193             books = books.filter(parent=None)
194         if audiobooks:
195             books = books.filter(media__type='mp3')
196         if daisy:
197             books = books.filter(media__type='daisy')
198
199         if books.exists():
200             return books
201         else:
202             return rc.NOT_FOUND
203
204     def create(self, request, *args, **kwargs):
205         return rc.FORBIDDEN
206
207
208 class BooksHandler(BookDetailHandler):
209     allowed_methods = ('GET', 'POST')
210     model = Book
211     fields = ['author', 'href', 'title', 'url']
212     anonymous = AnonymousBooksHandler
213
214     def create(self, request, *args, **kwargs):
215         if not request.user.has_perm('catalogue.add_book'):
216             return rc.FORBIDDEN
217
218         data = json.loads(request.POST.get('data'))
219         form = BookImportForm(data)
220         if form.is_valid():
221             form.save()
222             return rc.CREATED
223         else:
224             return rc.NOT_FOUND
225
226
227 # add categorized tags fields for Book
228 def _tags_getter(category):
229     @classmethod
230     def get_tags(cls, book):
231         return book.tags.filter(category=category)
232     return get_tags
233 for plural, singular in category_singular.items():
234     setattr(BookDetails, plural, _tags_getter(singular))
235
236 # add fields for files in Book
237 def _file_getter(format):
238     field = "%s_file" % format
239     @classmethod
240     def get_file(cls, book):
241         f = getattr(book, field)
242         if f:
243             return MEDIA_BASE + f.url
244         else:
245             return ''
246     return get_file
247 for format in Book.formats:
248     setattr(BookDetails, format, _file_getter(format))
249
250
251 class TagDetails(object):
252     """Custom Tag fields."""
253
254     @classmethod
255     def href(cls, tag):
256         """ Returns URI in the API for the tag. """
257
258         return API_BASE + reverse("api_tag", args=[category_plural[tag.category], tag.slug])
259
260     @classmethod
261     def url(cls, tag):
262         """ Returns URL on the site. """
263
264         return WL_BASE + tag.get_absolute_url()
265
266
267 class TagDetailHandler(BaseHandler, TagDetails):
268     """ Responsible for details of a single Tag object. """
269
270     fields = ['name', 'url', 'sort_key', 'description']
271
272     @piwik_track
273     def read(self, request, category, slug):
274         """ Returns details of a tag, identified by category and slug. """
275
276         try:
277             category_sng = category_singular[category]
278         except KeyError, e:
279             return rc.NOT_FOUND
280
281         try:
282             return Tag.objects.get(category=category_sng, slug=slug)
283         except Tag.DoesNotExist:
284             return rc.NOT_FOUND
285
286
287 class TagsHandler(BaseHandler, TagDetails):
288     """ Main handler for Tag objects.
289
290     Responsible for lists of Tag objects
291     and fields used for representing Tags.
292
293     """
294     allowed_methods = ('GET',)
295     model = Tag
296     fields = ['name', 'href', 'url']
297
298     @piwik_track
299     def read(self, request, category):
300         """ Lists all tags in the category (eg. all themes). """
301
302         try:
303             category_sng = category_singular[category]
304         except KeyError, e:
305             return rc.NOT_FOUND
306
307         tags = Tag.objects.filter(category=category_sng).exclude(book_count=0)
308         if tags.exists():
309             return tags
310         else:
311             return rc.NOT_FOUND
312
313
314 class FragmentDetails(object):
315     """Custom Fragment fields."""
316
317     @classmethod
318     def href(cls, fragment):
319         """ Returns URI in the API for the fragment. """
320
321         return API_BASE + reverse("api_fragment", 
322             args=[fragment.book.slug, fragment.anchor])
323
324     @classmethod
325     def url(cls, fragment):
326         """ Returns URL on the site for the fragment. """
327
328         return WL_BASE + fragment.get_absolute_url()
329
330     @classmethod
331     def themes(cls, fragment):
332         """ Returns a list of theme tags for the fragment. """
333
334         return fragment.tags.filter(category='theme')
335
336
337 class FragmentDetailHandler(BaseHandler, FragmentDetails):
338     fields = ['book', 'anchor', 'text', 'url', 'themes']
339
340     @piwik_track
341     def read(self, request, book, anchor):
342         """ Returns details of a fragment, identified by book slug and anchor. """
343         try:
344             return Fragment.objects.get(book__slug=book, anchor=anchor)
345         except Fragment.DoesNotExist:
346             return rc.NOT_FOUND
347
348
349 class FragmentsHandler(BaseHandler, FragmentDetails):
350     """ Main handler for Fragments.
351
352     Responsible for lists of Fragment objects
353     and fields used for representing Fragments.
354
355     """
356     model = Fragment
357     fields = ['book', 'url', 'anchor', 'href']
358     allowed_methods = ('GET',)
359
360     categories = set(['author', 'epoch', 'kind', 'genre', 'book', 'theme'])
361
362     @piwik_track
363     def read(self, request, tags):
364         """ Lists all fragments with given book, tags, themes.
365
366         :param tags: should be a path of categories and slugs, i.e.:
367              books/book-slug/authors/an-author/themes/a-theme/
368
369         """
370         try:
371             tags = read_tags(tags, allowed=self.categories)
372         except ValueError:
373             return rc.NOT_FOUND
374         fragments = Fragment.tagged.with_all(tags).select_related('book')
375         if fragments.exists():
376             return fragments
377         else:
378             return rc.NOT_FOUND
379
380
381
382 # Changes handlers
383
384 class CatalogueHandler(BaseHandler):
385
386     @staticmethod
387     def fields(request, name):
388         fields_str = request.GET.get(name) if request is not None else None
389         return fields_str.split(',') if fields_str is not None else None
390
391     @staticmethod
392     def until(t=None):
393         """ Returns time suitable for use as upper time boundary for check.
394
395             Used to avoid issues with time between setting the change stamp
396             and actually saving the model in database.
397             Cuts the microsecond part to avoid issues with DBs where time has
398             more precision.
399
400             :param datetime t: manually sets the upper boundary
401
402         """
403         # set to five minutes ago, to avoid concurrency issues
404         if t is None:
405             t = datetime.now() - timedelta(seconds=settings.API_WAIT)
406         # set to whole second in case DB supports something smaller
407         return t.replace(microsecond=0)
408
409     @staticmethod
410     def book_dict(book, fields=None):
411         all_fields = ['url', 'title', 'description',
412                       'gazeta_link', 'wiki_link',
413                       ] + Book.formats + BookMedia.formats.keys() + [
414                       'parent', 'parent_number',
415                       'tags',
416                       'license', 'license_description', 'source_name',
417                       'technical_editors', 'editors',
418                       'author', 'sort_key',
419                      ]
420         if fields:
421             fields = (f for f in fields if f in all_fields)
422         else:
423             fields = all_fields
424
425         extra_info = book.extra_info
426
427         obj = {}
428         for field in fields:
429
430             if field in Book.formats:
431                 f = getattr(book, field+'_file')
432                 if f:
433                     obj[field] = {
434                         'url': f.url,
435                         'size': f.size,
436                     }
437
438             elif field in BookMedia.formats:
439                 media = []
440                 for m in book.media.filter(type=field).iterator():
441                     media.append({
442                         'url': m.file.url,
443                         'size': m.file.size,
444                     })
445                 if media:
446                     obj[field] = media
447
448             elif field == 'url':
449                 obj[field] = book.get_absolute_url()
450
451             elif field == 'tags':
452                 obj[field] = [t.id for t in book.tags.exclude(category__in=('book', 'set')).iterator()]
453
454             elif field == 'author':
455                 obj[field] = ", ".join(t.name for t in book.tags.filter(category='author').iterator())
456
457             elif field == 'parent':
458                 obj[field] = book.parent_id
459
460             elif field in ('license', 'license_description', 'source_name',
461                       'technical_editors', 'editors'):
462                 f = extra_info.get(field)
463                 if f:
464                     obj[field] = f
465
466             else:
467                 f = getattr(book, field)
468                 if f:
469                     obj[field] = f
470
471         obj['id'] = book.id
472         return obj
473
474     @classmethod
475     def book_changes(cls, request=None, since=0, until=None, fields=None):
476         since = datetime.fromtimestamp(int(since))
477         until = cls.until(until)
478
479         changes = {
480             'time_checked': timestamp(until)
481         }
482
483         if not fields:
484             fields = cls.fields(request, 'book_fields')
485
486         added = []
487         updated = []
488         deleted = []
489
490         last_change = since
491         for book in Book.objects.filter(changed_at__gte=since,
492                     changed_at__lt=until).iterator():
493             book_d = cls.book_dict(book, fields)
494             updated.append(book_d)
495         if updated:
496             changes['updated'] = updated
497
498         for book in Deleted.objects.filter(content_type=Book, 
499                     deleted_at__gte=since,
500                     deleted_at__lt=until,
501                     created_at__lt=since).iterator():
502             deleted.append(book.id)
503         if deleted:
504             changes['deleted'] = deleted
505
506         return changes
507
508     @staticmethod
509     def tag_dict(tag, fields=None):
510         all_fields = ('name', 'category', 'sort_key', 'description',
511                       'gazeta_link', 'wiki_link',
512                       'url', 'books',
513                      )
514
515         if fields:
516             fields = (f for f in fields if f in all_fields)
517         else:
518             fields = all_fields
519
520         obj = {}
521         for field in fields:
522
523             if field == 'url':
524                 obj[field] = tag.get_absolute_url()
525
526             elif field == 'books':
527                 obj[field] = [b.id for b in Book.tagged_top_level([tag]).iterator()]
528
529             elif field == 'sort_key':
530                 obj[field] = tag.sort_key
531
532             else:
533                 f = getattr(tag, field)
534                 if f:
535                     obj[field] = f
536
537         obj['id'] = tag.id
538         return obj
539
540     @classmethod
541     def tag_changes(cls, request=None, since=0, until=None, fields=None, categories=None):
542         since = datetime.fromtimestamp(int(since))
543         until = cls.until(until)
544
545         changes = {
546             'time_checked': timestamp(until)
547         }
548
549         if not fields:
550             fields = cls.fields(request, 'tag_fields')
551         if not categories:
552             categories = cls.fields(request, 'tag_categories')
553
554         all_categories = ('author', 'epoch', 'kind', 'genre')
555         if categories:
556             categories = (c for c in categories if c in all_categories)
557         else:
558             categories = all_categories
559
560         updated = []
561         deleted = []
562
563         for tag in Tag.objects.filter(category__in=categories, 
564                     changed_at__gte=since,
565                     changed_at__lt=until).iterator():
566             # only serve non-empty tags
567             if tag.book_count:
568                 tag_d = cls.tag_dict(tag, fields)
569                 updated.append(tag_d)
570             elif tag.created_at < since:
571                 deleted.append(tag.id)
572         if updated:
573             changes['updated'] = updated
574
575         for tag in Deleted.objects.filter(category__in=categories,
576                 content_type=Tag, 
577                     deleted_at__gte=since,
578                     deleted_at__lt=until,
579                     created_at__lt=since).iterator():
580             deleted.append(tag.id)
581         if deleted:
582             changes['deleted'] = deleted
583
584         return changes
585
586     @classmethod
587     def changes(cls, request=None, since=0, until=None, book_fields=None,
588                 tag_fields=None, tag_categories=None):
589         until = cls.until(until)
590         since = int(since)
591
592         if not since:
593             cache = get_cache('api')
594             key = hash((book_fields, tag_fields, tag_categories,
595                     tuple(sorted(request.GET.items()))
596                   ))
597             value = cache.get(key)
598             if value is not None:
599                 return value
600
601         changes = {
602             'time_checked': timestamp(until)
603         }
604
605         changes_by_type = {
606             'books': cls.book_changes(request, since, until, book_fields),
607             'tags': cls.tag_changes(request, since, until, tag_fields, tag_categories),
608         }
609
610         for model in changes_by_type:
611             for field in changes_by_type[model]:
612                 if field == 'time_checked':
613                     continue
614                 changes.setdefault(field, {})[model] = changes_by_type[model][field]
615
616         if not since:
617             cache.set(key, changes)
618
619         return changes
620
621
622 class BookChangesHandler(CatalogueHandler):
623     allowed_methods = ('GET',)
624
625     @piwik_track
626     def read(self, request, since):
627         return self.book_changes(request, since)
628
629
630 class TagChangesHandler(CatalogueHandler):
631     allowed_methods = ('GET',)
632
633     @piwik_track
634     def read(self, request, since):
635         return self.tag_changes(request, since)
636
637
638 class ChangesHandler(CatalogueHandler):
639     allowed_methods = ('GET',)
640
641     @piwik_track
642     def read(self, request, since):
643         return self.changes(request, since)
644
645
646 class PictureHandler(BaseHandler):
647     model = Picture
648     fields = ('slug', 'title')
649     allowed_methods = ('POST',)
650
651     def create(self, request):
652         if not request.user.has_perm('picture.add_picture'):
653             return rc.FORBIDDEN
654
655         data = json.loads(request.POST.get('data'))
656         form = PictureImportForm(data)
657         if form.is_valid():
658             form.save()
659             return rc.CREATED
660         else:
661             return rc.NOT_FOUND