add uuid in bookmarks
[wolnelektury.git] / src / opds / views.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from functools import reduce
5 import os.path
6 from urllib.parse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.shortcuts import get_object_or_404
10 from django.urls import reverse
11 from django.utils.feedgenerator import Atom1Feed, Enclosure
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15 from django.utils.functional import lazy
16
17 from basicauth import logged_in_or_basicauth, factory_decorator
18 from catalogue.models import Book, Tag
19 from search.utils import UnaccentSearchQuery, UnaccentSearchVector
20 from social.models import UserList
21
22 import operator
23 import logging
24 import re
25
26 from stats.utils import piwik_track
27
28 log = logging.getLogger('opds')
29
30 _root_feeds = (
31     {
32         "category": "",
33         "link": "opds_user",
34         "link_args": [],
35         "title": "Moje półki",
36         "description": "Półki użytkownika dostępne po zalogowaniu"
37     },
38     {
39         "category": "author",
40         "link": "opds_by_category",
41         "link_args": ["author"],
42         "title": "Autorzy",
43         "description": "Utwory wg autorów"
44     },
45     {
46         "category": "kind",
47         "link": "opds_by_category",
48         "link_args": ["kind"],
49         "title": "Rodzaje",
50         "description": "Utwory wg rodzajów"
51     },
52     {
53         "category": "genre",
54         "link": "opds_by_category",
55         "link_args": ["genre"],
56         "title": "Gatunki",
57         "description": "Utwory wg gatunków"
58     },
59     {
60         "category": "epoch",
61         "link": "opds_by_category",
62         "link_args": ["epoch"],
63         "title": "Epoki",
64         "description": "Utwory wg epok"
65     },
66 )
67
68
69 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
70
71
72 def full_url(url):
73     return urljoin("http://%s" % current_domain, url)
74
75
76 class OPDSFeed(Atom1Feed):
77     link_rel = "subsection"
78     link_type = "application/atom+xml"
79
80     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
81     try:
82         _book_parent_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
83     except OSError:
84         _book_parent_img_size = ''
85
86     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
87     try:
88         _book_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
89     except OSError:
90         _book_img_size = ''
91
92     def add_root_elements(self, handler):
93         super(OPDSFeed, self).add_root_elements(handler)
94         handler.addQuickElement("link", None,
95                                 {"href": reverse("opds_authors"),
96                                  "rel": "start",
97                                  "type": "application/atom+xml"})
98         handler.addQuickElement("link", None,
99                                 {"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
100                                  "rel": "search",
101                                  "type": "application/opensearchdescription+xml"})
102
103     def add_item_elements(self, handler, item):
104         """ modified from Atom1Feed.add_item_elements """
105         handler.addQuickElement("title", item['title'])
106
107         # add a OPDS Navigation link if there's no enclosure
108         if not item.get('enclosures') is None:
109             handler.addQuickElement(
110                 "link", "", {"href": item['link'], "rel": "subsection", "type": "application/atom+xml"})
111             # add a "green book" icon
112             handler.addQuickElement(
113                 "link", '',
114                 {
115                     "rel": "http://opds-spec.org/thumbnail",
116                     "href": self._book_parent_img,
117                     "length": self._book_parent_img_size,
118                     "type": "image/png",
119                 })
120         if item['pubdate'] is not None:
121             # FIXME: rfc3339_date is undefined, is this ever run?
122             handler.addQuickElement("updated", rfc3339_date(item['pubdate']).decode('utf-8'))
123
124         # Author information.
125         if item['author_name'] is not None:
126             handler.startElement("author", {})
127             handler.addQuickElement("name", item['author_name'])
128             if item['author_email'] is not None:
129                 handler.addQuickElement("email", item['author_email'])
130             if item['author_link'] is not None:
131                 handler.addQuickElement("uri", item['author_link'])
132             handler.endElement("author")
133
134         # Unique ID.
135         if item['unique_id'] is not None:
136             unique_id = item['unique_id']
137         else:
138             # FIXME: get_tag_uri is undefined, is this ever run?
139             unique_id = get_tag_uri(item['link'], item['pubdate'])
140         handler.addQuickElement("id", unique_id)
141
142         # Summary.
143         # OPDS needs type=text
144         if item['description'] is not None:
145             handler.addQuickElement("summary", item['description'], {"type": "text"})
146
147         # Enclosure as OPDS Acquisition Link
148         for enc in item.get('enclosures', []):
149             handler.startElement(
150                 "link",
151                 {
152                     "rel": "http://opds-spec.org/acquisition",
153                     "href": enc.url,
154                     "length": str(enc.length),
155                     "type": enc.mime_type,
156                 })
157             if hasattr(enc, 'indirect'):
158                 NS = 'http://opds-spec.org/2010/catalog'
159                 handler.startPrefixMapping('opds', NS)
160                 handler.startElementNS((NS, 'indirectAcquisition'), 'opds:indirectAcquisition', {
161                     (None, 'type'): enc.indirect,
162                 })
163                 handler.endElementNS((NS, 'indirectAcquisition'), 'opds:indirectAcquisition')
164                 handler.endPrefixMapping('opds')
165             handler.endElement('link')
166         # add a "red book" icon
167         handler.addQuickElement(
168             "link", '',
169             {
170                 "rel": "http://opds-spec.org/thumbnail",
171                 "href": self._book_img,
172                 "length": self._book_img_size,
173                 "type": "image/png",
174             })
175
176         # Categories.
177         for cat in item['categories']:
178             handler.addQuickElement("category", "", {"term": cat})
179
180         # Rights.
181         if item['item_copyright'] is not None:
182             handler.addQuickElement("rights", item['item_copyright'])
183
184
185 class AcquisitionFeed(Feed):
186     feed_type = OPDSFeed
187     link = 'http://www.wolnelektury.pl/'
188     author_name = "Wolne Lektury"
189     author_link = "http://www.wolnelektury.pl/"
190
191     def item_title(self, book):
192         return book.title
193
194     def item_description(self):
195         return ''
196
197     def item_link(self, book):
198         return book.get_absolute_url()
199
200     def item_author_name(self, book):
201         try:
202             return book.authors().first().name
203         except AttributeError:
204             return ''
205
206     def item_author_link(self, book):
207         try:
208             return book.authors().first().get_absolute_url()
209         except AttributeError:
210             return ''
211
212     def item_enclosures(self, book):
213         enc = []
214         if book.epub_file:
215             enc.append(Enclosure(
216                 url=full_url(book.epub_url()),
217                 length=book.epub_file.size,
218                 mime_type="application/epub+zip"
219             ))
220         if book.has_mp3_file():
221             e = Enclosure(
222                 url=full_url(reverse('download_zip_mp3', args=[book.slug])),
223                 length=sum(bm.file.size for bm in book.get_media('mp3')),
224                 mime_type="application/zip"
225             )
226             e.indirect = 'audio/mpeg'
227             enc.append(e)
228         return enc
229
230
231 @piwik_track
232 class RootFeed(Feed):
233     feed_type = OPDSFeed
234     title = 'Wolne Lektury'
235     link = 'http://wolnelektury.pl/'
236     description = "Spis utworów na stronie http://WolneLektury.pl"
237     author_name = "Wolne Lektury"
238     author_link = "http://wolnelektury.pl/"
239
240     def items(self):
241         return _root_feeds
242
243     def item_title(self, item):
244         return item['title']
245
246     def item_link(self, item):
247         return reverse(item['link'], args=item['link_args'])
248
249     def item_description(self, item):
250         return item['description']
251
252
253 @piwik_track
254 class ByCategoryFeed(Feed):
255     feed_type = OPDSFeed
256     link = 'http://wolnelektury.pl/'
257     description = "Spis utworów na stronie http://WolneLektury.pl"
258     author_name = "Wolne Lektury"
259     author_link = "http://wolnelektury.pl/"
260
261     def get_object(self, request, category):
262         feed = [feed for feed in _root_feeds if feed['category'] == category]
263         if feed:
264             feed = feed[0]
265         else:
266             raise Http404
267
268         return feed
269
270     def title(self, feed):
271         return feed['title']
272
273     def items(self, feed):
274         return Tag.objects.filter(category=feed['category']).exclude(items=None)
275
276     def item_title(self, item):
277         return item.name
278
279     def item_link(self, item):
280         return reverse("opds_by_tag", args=[item.category, item.slug])
281
282     def item_description(self):
283         return ''
284
285
286 @piwik_track
287 class ByTagFeed(AcquisitionFeed):
288     def link(self, tag):
289         return tag.get_absolute_url()
290
291     def title(self, tag):
292         return tag.name
293
294     def description(self, tag):
295         return "Spis utworów na stronie http://WolneLektury.pl"
296
297     def get_object(self, request, category, slug):
298         return get_object_or_404(Tag, category=category, slug=slug)
299
300     def items(self, tag):
301         qs = Book.tagged_top_level([tag])
302         qs = qs.filter(preview=False, findable=True)
303         return qs
304
305
306 @factory_decorator(logged_in_or_basicauth())
307 @piwik_track
308 class UserFeed(Feed):
309     feed_type = OPDSFeed
310     link = 'http://www.wolnelektury.pl/'
311     description = "Półki użytkownika na stronie http://WolneLektury.pl"
312     author_name = "Wolne Lektury"
313     author_link = "http://wolnelektury.pl/"
314
315     def get_object(self, request):
316         return request.user
317
318     def title(self, user):
319         return "Półki użytkownika %s" % user.username
320
321     def items(self, user):
322         return UserList.objects.filter(user=user, deleted=False)
323
324     def item_title(self, item):
325         return item.name
326
327     def item_link(self, item):
328         return reverse("opds_user_set", args=[item.slug])
329
330     def item_description(self):
331         return ''
332
333
334 @factory_decorator(logged_in_or_basicauth())
335 @piwik_track
336 class UserSetFeed(AcquisitionFeed):
337     def link(self, tag):
338         return tag.get_absolute_url()
339
340     def title(self, tag):
341         return tag.name
342
343     def description(self, tag):
344         return "Spis utworów na stronie http://WolneLektury.pl"
345
346     def get_object(self, request, slug):
347         return get_object_or_404(UserList, deleted=False, slug=slug, user=request.user)
348
349     def items(self, tag):
350         return tag.get_books()
351
352
353 @piwik_track
354 class SearchFeed(AcquisitionFeed):
355     description = "Wyniki wyszukiwania na stronie WolneLektury.pl"
356     title = "Wyniki wyszukiwania"
357
358     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
359     INLINE_QUERY_RE = re.compile(
360         r"author:" + QUOTE_OR_NOT +
361         "|translator:" + QUOTE_OR_NOT +
362         "|title:" + QUOTE_OR_NOT +
363         "|categories:" + QUOTE_OR_NOT +
364         "|description:" + QUOTE_OR_NOT +
365         "|text:" + QUOTE_OR_NOT
366         )
367     MATCHES = {
368         'author': (0, 1),
369         'translator': (2, 3),
370         'title': (4, 5),
371         'categories': (6, 7),
372         'description': (8, 9),
373         'text': (10, 11),
374         }
375
376     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
377
378     def get_object(self, request):
379         """
380         For OPDS 1.1 We should handle a query for search terms
381         and criteria provided either as opensearch or 'inline' query.
382         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
383         atom:title. Inline query provides author, title, categories (treated as book tags),
384         description (treated as content search terms).
385
386         if search terms are provided, we shall search for books
387         according to Hint information (from author & contributror & title).
388
389         but if search terms are empty, we should do a different search
390         (perhaps for is_book=True)
391
392         """
393
394         query = request.GET.get('q', '')
395
396         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
397         if inline_criteria:
398             remains = re.sub(self.INLINE_QUERY_RE, '', query)
399             remains = re.sub(r'[ \t]+', ' ', remains)
400
401             def get_criteria(criteria, name):
402                 for c in criteria:
403                     for p in self.MATCHES[name]:
404                         if c[p]:
405                             if p % 2 == 0:
406                                 return c[p].replace('+', ' ')
407                             return c[p]
408                 return None
409
410             criteria = dict(map(
411                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
412                 ['author', 'translator', 'title', 'categories',
413                  'description', 'text']))
414             query = remains
415             # empty query and text set case?
416             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
417         else:
418             def remove_dump_data(val):
419                 """Some clients don't get opds placeholders and just send them."""
420                 if self.ATOM_PLACEHOLDER.match(val):
421                     return ''
422                 return val
423
424             criteria = dict(
425                 (cn, remove_dump_data(request.GET.get(cn, '')))
426                 for cn in self.MATCHES.keys())
427             # query is set above.
428             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
429
430         books = Book.objects.filter(findable=True, preview=False).annotate(
431             search_vector=UnaccentSearchVector('title')
432         )
433         if query:
434             squery = UnaccentSearchQuery(query, config=settings.SEARCH_CONFIG)
435             books = books.filter(search_vector=squery)
436         if criteria['author']:
437             authors = Tag.objects.filter(category='author').annotate(
438                 search_vector=UnaccentSearchVector('name_pl')
439             ).filter(search_vector=UnaccentSearchQuery(criteria['author'], config=settings.SEARCH_CONFIG))
440             books = books.filter(tag_relations__tag__in=authors)
441         if criteria['categories']:
442             tags = Tag.objects.filter(category__in=('genre', 'kind', 'epoch')).annotate(
443                 search_vector=UnaccentSearchVector('name_pl')
444             ).filter(search_vector=UnaccentSearchQuery(criteria['categories'], config=settings.SEARCH_CONFIG))
445             books = books.filter(tag_relations__tag__in=tags)
446         if criteria['translator']:
447             # TODO
448             pass
449         if criteria['title']:
450             books = books.filter(
451                 search_vector=UnaccentSearchQuery(criteria['title'], config=settings.SEARCH_CONFIG)
452             )
453
454         books = books.exclude(ancestor__in=books)
455
456         books = books.order_by('popularity__count')
457         return books
458
459     def get_link(self, query):
460         return "%s?q=%s" % (reverse('search'), query)
461
462     def items(self, books):
463         try:
464             return books
465         except ValueError:
466             # too short a query
467             return []