add a filter
[wolnelektury.git] / src / opds / views.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from functools import reduce
5 import os.path
6 from urllib.parse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.shortcuts import get_object_or_404
10 from django.urls import reverse
11 from django.utils.feedgenerator import Atom1Feed, Enclosure
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15 from django.utils.functional import lazy
16
17 from basicauth import logged_in_or_basicauth, factory_decorator
18 from catalogue.models import Book, Tag
19 from search.utils import UnaccentSearchQuery, UnaccentSearchVector
20
21 import operator
22 import logging
23 import re
24
25 from stats.utils import piwik_track
26
27 log = logging.getLogger('opds')
28
29 _root_feeds = (
30     {
31         "category": "",
32         "link": "opds_user",
33         "link_args": [],
34         "title": "Moje półki",
35         "description": "Półki użytkownika dostępne po zalogowaniu"
36     },
37     {
38         "category": "author",
39         "link": "opds_by_category",
40         "link_args": ["author"],
41         "title": "Autorzy",
42         "description": "Utwory wg autorów"
43     },
44     {
45         "category": "kind",
46         "link": "opds_by_category",
47         "link_args": ["kind"],
48         "title": "Rodzaje",
49         "description": "Utwory wg rodzajów"
50     },
51     {
52         "category": "genre",
53         "link": "opds_by_category",
54         "link_args": ["genre"],
55         "title": "Gatunki",
56         "description": "Utwory wg gatunków"
57     },
58     {
59         "category": "epoch",
60         "link": "opds_by_category",
61         "link_args": ["epoch"],
62         "title": "Epoki",
63         "description": "Utwory wg epok"
64     },
65 )
66
67
68 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
69
70
71 def full_url(url):
72     return urljoin("http://%s" % current_domain, url)
73
74
75 class OPDSFeed(Atom1Feed):
76     link_rel = "subsection"
77     link_type = "application/atom+xml"
78
79     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
80     try:
81         _book_parent_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
82     except OSError:
83         _book_parent_img_size = ''
84
85     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
86     try:
87         _book_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
88     except OSError:
89         _book_img_size = ''
90
91     def add_root_elements(self, handler):
92         super(OPDSFeed, self).add_root_elements(handler)
93         handler.addQuickElement("link", None,
94                                 {"href": reverse("opds_authors"),
95                                  "rel": "start",
96                                  "type": "application/atom+xml"})
97         handler.addQuickElement("link", None,
98                                 {"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
99                                  "rel": "search",
100                                  "type": "application/opensearchdescription+xml"})
101
102     def add_item_elements(self, handler, item):
103         """ modified from Atom1Feed.add_item_elements """
104         handler.addQuickElement("title", item['title'])
105
106         # add a OPDS Navigation link if there's no enclosure
107         if not item.get('enclosures') is None:
108             handler.addQuickElement(
109                 "link", "", {"href": item['link'], "rel": "subsection", "type": "application/atom+xml"})
110             # add a "green book" icon
111             handler.addQuickElement(
112                 "link", '',
113                 {
114                     "rel": "http://opds-spec.org/thumbnail",
115                     "href": self._book_parent_img,
116                     "length": self._book_parent_img_size,
117                     "type": "image/png",
118                 })
119         if item['pubdate'] is not None:
120             # FIXME: rfc3339_date is undefined, is this ever run?
121             handler.addQuickElement("updated", rfc3339_date(item['pubdate']).decode('utf-8'))
122
123         # Author information.
124         if item['author_name'] is not None:
125             handler.startElement("author", {})
126             handler.addQuickElement("name", item['author_name'])
127             if item['author_email'] is not None:
128                 handler.addQuickElement("email", item['author_email'])
129             if item['author_link'] is not None:
130                 handler.addQuickElement("uri", item['author_link'])
131             handler.endElement("author")
132
133         # Unique ID.
134         if item['unique_id'] is not None:
135             unique_id = item['unique_id']
136         else:
137             # FIXME: get_tag_uri is undefined, is this ever run?
138             unique_id = get_tag_uri(item['link'], item['pubdate'])
139         handler.addQuickElement("id", unique_id)
140
141         # Summary.
142         # OPDS needs type=text
143         if item['description'] is not None:
144             handler.addQuickElement("summary", item['description'], {"type": "text"})
145
146         # Enclosure as OPDS Acquisition Link
147         for enc in item.get('enclosures', []):
148             handler.startElement(
149                 "link",
150                 {
151                     "rel": "http://opds-spec.org/acquisition",
152                     "href": enc.url,
153                     "length": str(enc.length),
154                     "type": enc.mime_type,
155                 })
156             if hasattr(enc, 'indirect'):
157                 NS = 'http://opds-spec.org/2010/catalog'
158                 handler.startPrefixMapping('opds', NS)
159                 handler.startElementNS((NS, 'indirectAcquisition'), 'opds:indirectAcquisition', {
160                     (None, 'type'): enc.indirect,
161                 })
162                 handler.endElementNS((NS, 'indirectAcquisition'), 'opds:indirectAcquisition')
163                 handler.endPrefixMapping('opds')
164             handler.endElement('link')
165         # add a "red book" icon
166         handler.addQuickElement(
167             "link", '',
168             {
169                 "rel": "http://opds-spec.org/thumbnail",
170                 "href": self._book_img,
171                 "length": self._book_img_size,
172                 "type": "image/png",
173             })
174
175         # Categories.
176         for cat in item['categories']:
177             handler.addQuickElement("category", "", {"term": cat})
178
179         # Rights.
180         if item['item_copyright'] is not None:
181             handler.addQuickElement("rights", item['item_copyright'])
182
183
184 class AcquisitionFeed(Feed):
185     feed_type = OPDSFeed
186     link = 'http://www.wolnelektury.pl/'
187     author_name = "Wolne Lektury"
188     author_link = "http://www.wolnelektury.pl/"
189
190     def item_title(self, book):
191         return book.title
192
193     def item_description(self):
194         return ''
195
196     def item_link(self, book):
197         return book.get_absolute_url()
198
199     def item_author_name(self, book):
200         try:
201             return book.authors().first().name
202         except AttributeError:
203             return ''
204
205     def item_author_link(self, book):
206         try:
207             return book.authors().first().get_absolute_url()
208         except AttributeError:
209             return ''
210
211     def item_enclosures(self, book):
212         enc = []
213         if book.epub_file:
214             enc.append(Enclosure(
215                 url=full_url(book.epub_url()),
216                 length=book.epub_file.size,
217                 mime_type="application/epub+zip"
218             ))
219         if book.has_mp3_file():
220             e = Enclosure(
221                 url=full_url(reverse('download_zip_mp3', args=[book.slug])),
222                 length=sum(bm.file.size for bm in book.get_media('mp3')),
223                 mime_type="application/zip"
224             )
225             e.indirect = 'audio/mpeg'
226             enc.append(e)
227         return enc
228
229
230 @piwik_track
231 class RootFeed(Feed):
232     feed_type = OPDSFeed
233     title = 'Wolne Lektury'
234     link = 'http://wolnelektury.pl/'
235     description = "Spis utworów na stronie http://WolneLektury.pl"
236     author_name = "Wolne Lektury"
237     author_link = "http://wolnelektury.pl/"
238
239     def items(self):
240         return _root_feeds
241
242     def item_title(self, item):
243         return item['title']
244
245     def item_link(self, item):
246         return reverse(item['link'], args=item['link_args'])
247
248     def item_description(self, item):
249         return item['description']
250
251
252 @piwik_track
253 class ByCategoryFeed(Feed):
254     feed_type = OPDSFeed
255     link = 'http://wolnelektury.pl/'
256     description = "Spis utworów na stronie http://WolneLektury.pl"
257     author_name = "Wolne Lektury"
258     author_link = "http://wolnelektury.pl/"
259
260     def get_object(self, request, category):
261         feed = [feed for feed in _root_feeds if feed['category'] == category]
262         if feed:
263             feed = feed[0]
264         else:
265             raise Http404
266
267         return feed
268
269     def title(self, feed):
270         return feed['title']
271
272     def items(self, feed):
273         return Tag.objects.filter(category=feed['category']).exclude(items=None)
274
275     def item_title(self, item):
276         return item.name
277
278     def item_link(self, item):
279         return reverse("opds_by_tag", args=[item.category, item.slug])
280
281     def item_description(self):
282         return ''
283
284
285 @piwik_track
286 class ByTagFeed(AcquisitionFeed):
287     def link(self, tag):
288         return tag.get_absolute_url()
289
290     def title(self, tag):
291         return tag.name
292
293     def description(self, tag):
294         return "Spis utworów na stronie http://WolneLektury.pl"
295
296     def get_object(self, request, category, slug):
297         return get_object_or_404(Tag, category=category, slug=slug)
298
299     def items(self, tag):
300         qs = Book.tagged_top_level([tag])
301         qs = qs.filter(preview=False, findable=True)
302         return qs
303
304
305 @factory_decorator(logged_in_or_basicauth())
306 @piwik_track
307 class UserFeed(Feed):
308     feed_type = OPDSFeed
309     link = 'http://www.wolnelektury.pl/'
310     description = "Półki użytkownika na stronie http://WolneLektury.pl"
311     author_name = "Wolne Lektury"
312     author_link = "http://wolnelektury.pl/"
313
314     def get_object(self, request):
315         return request.user
316
317     def title(self, user):
318         return "Półki użytkownika %s" % user.username
319
320     def items(self, user):
321         return Tag.objects.filter(category='set', user=user).exclude(items=None)
322
323     def item_title(self, item):
324         return item.name
325
326     def item_link(self, item):
327         return reverse("opds_user_set", args=[item.slug])
328
329     def item_description(self):
330         return ''
331
332
333 @factory_decorator(logged_in_or_basicauth())
334 @piwik_track
335 class UserSetFeed(AcquisitionFeed):
336     def link(self, tag):
337         return tag.get_absolute_url()
338
339     def title(self, tag):
340         return tag.name
341
342     def description(self, tag):
343         return "Spis utworów na stronie http://WolneLektury.pl"
344
345     def get_object(self, request, slug):
346         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
347
348     def items(self, tag):
349         return Book.tagged.with_any([tag])
350
351
352 @piwik_track
353 class SearchFeed(AcquisitionFeed):
354     description = "Wyniki wyszukiwania na stronie WolneLektury.pl"
355     title = "Wyniki wyszukiwania"
356
357     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
358     INLINE_QUERY_RE = re.compile(
359         r"author:" + QUOTE_OR_NOT +
360         "|translator:" + QUOTE_OR_NOT +
361         "|title:" + QUOTE_OR_NOT +
362         "|categories:" + QUOTE_OR_NOT +
363         "|description:" + QUOTE_OR_NOT +
364         "|text:" + QUOTE_OR_NOT
365         )
366     MATCHES = {
367         'author': (0, 1),
368         'translator': (2, 3),
369         'title': (4, 5),
370         'categories': (6, 7),
371         'description': (8, 9),
372         'text': (10, 11),
373         }
374
375     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
376
377     def get_object(self, request):
378         """
379         For OPDS 1.1 We should handle a query for search terms
380         and criteria provided either as opensearch or 'inline' query.
381         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
382         atom:title. Inline query provides author, title, categories (treated as book tags),
383         description (treated as content search terms).
384
385         if search terms are provided, we shall search for books
386         according to Hint information (from author & contributror & title).
387
388         but if search terms are empty, we should do a different search
389         (perhaps for is_book=True)
390
391         """
392
393         query = request.GET.get('q', '')
394
395         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
396         if inline_criteria:
397             remains = re.sub(self.INLINE_QUERY_RE, '', query)
398             remains = re.sub(r'[ \t]+', ' ', remains)
399
400             def get_criteria(criteria, name):
401                 for c in criteria:
402                     for p in self.MATCHES[name]:
403                         if c[p]:
404                             if p % 2 == 0:
405                                 return c[p].replace('+', ' ')
406                             return c[p]
407                 return None
408
409             criteria = dict(map(
410                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
411                 ['author', 'translator', 'title', 'categories',
412                  'description', 'text']))
413             query = remains
414             # empty query and text set case?
415             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
416         else:
417             def remove_dump_data(val):
418                 """Some clients don't get opds placeholders and just send them."""
419                 if self.ATOM_PLACEHOLDER.match(val):
420                     return ''
421                 return val
422
423             criteria = dict(
424                 (cn, remove_dump_data(request.GET.get(cn, '')))
425                 for cn in self.MATCHES.keys())
426             # query is set above.
427             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
428
429         books = Book.objects.filter(findable=True, preview=False).annotate(
430             search_vector=UnaccentSearchVector('title')
431         )
432         if query:
433             squery = UnaccentSearchQuery(query, config=settings.SEARCH_CONFIG)
434             books = books.filter(search_vector=squery)
435         if criteria['author']:
436             authors = Tag.objects.filter(category='author').annotate(
437                 search_vector=UnaccentSearchVector('name_pl')
438             ).filter(search_vector=UnaccentSearchQuery(criteria['author'], config=settings.SEARCH_CONFIG))
439             books = books.filter(tag_relations__tag__in=authors)
440         if criteria['categories']:
441             tags = Tag.objects.filter(category__in=('genre', 'kind', 'epoch')).annotate(
442                 search_vector=UnaccentSearchVector('name_pl')
443             ).filter(search_vector=UnaccentSearchQuery(criteria['categories'], config=settings.SEARCH_CONFIG))
444             books = books.filter(tag_relations__tag__in=tags)
445         if criteria['translator']:
446             # TODO
447             pass
448         if criteria['title']:
449             books = books.filter(
450                 search_vector=UnaccentSearchQuery(criteria['title'], config=settings.SEARCH_CONFIG)
451             )
452
453         books = books.exclude(ancestor__in=books)
454
455         books = books.order_by('popularity__count')
456         return books
457
458     def get_link(self, query):
459         return "%s?q=%s" % (reverse('search'), query)
460
461     def items(self, books):
462         try:
463             return books
464         except ValueError:
465             # too short a query
466             return []