Minimal double reader.
[wolnelektury.git] / apps / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os.path
6 from urlparse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.core.urlresolvers import reverse
10 from django.shortcuts import get_object_or_404
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15
16 from basicauth import logged_in_or_basicauth, factory_decorator
17 from catalogue.models import Book, Tag
18
19 from search.views import Search, SearchResult
20 import operator
21 import logging
22 import re
23
24 log = logging.getLogger('opds')
25
26 from stats.utils import piwik_track
27
28 _root_feeds = (
29     {
30         u"category": u"",
31         u"link": u"opds_user",
32         u"link_args": [],
33         u"title": u"Moje półki",
34         u"description": u"Półki użytkownika dostępne po zalogowaniu"
35     },
36     {
37         u"category": u"author",
38         u"link": u"opds_by_category",
39         u"link_args": [u"author"],
40         u"title": u"Autorzy",
41         u"description": u"Utwory wg autorów"
42     },
43     {
44         u"category": u"kind",
45         u"link": u"opds_by_category",
46         u"link_args": [u"kind"],
47         u"title": u"Rodzaje",
48         u"description": u"Utwory wg rodzajów"
49     },
50     {
51         u"category": u"genre",
52         u"link": u"opds_by_category",
53         u"link_args": [u"genre"],
54         u"title": u"Gatunki",
55         u"description": u"Utwory wg gatunków"
56     },
57     {
58         u"category": u"epoch",
59         u"link": u"opds_by_category",
60         u"link_args": [u"epoch"],
61         u"title": u"Epoki",
62         u"description": u"Utwory wg epok"
63     },
64 )
65
66
67 def full_url(url):
68     return urljoin("http://%s" % Site.objects.get_current().domain, url)
69
70
71 class OPDSFeed(Atom1Feed):
72     link_rel = u"subsection"
73     link_type = u"application/atom+xml"
74
75     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
76     try:
77         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
78     except:
79         _book_parent_img_size = ''
80
81     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
82     try:
83         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
84     except:
85         _book_img_size = ''
86
87
88     def add_root_elements(self, handler):
89         super(OPDSFeed, self).add_root_elements(handler)
90         handler.addQuickElement(u"link", None,
91                                 {u"href": reverse("opds_authors"),
92                                  u"rel": u"start",
93                                  u"type": u"application/atom+xml"})
94         handler.addQuickElement(u"link", None,
95                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
96                                  u"rel": u"search",
97                                  u"type": u"application/opensearchdescription+xml"})
98
99
100     def add_item_elements(self, handler, item):
101         """ modified from Atom1Feed.add_item_elements """
102         handler.addQuickElement(u"title", item['title'])
103
104         # add a OPDS Navigation link if there's no enclosure
105         if item['enclosure'] is None:
106             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
107             # add a "green book" icon
108             handler.addQuickElement(u"link", '',
109                 {u"rel": u"http://opds-spec.org/thumbnail",
110                  u"href": self._book_parent_img,
111                  u"length": self._book_parent_img_size,
112                  u"type": u"image/png"})
113         if item['pubdate'] is not None:
114             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
115
116         # Author information.
117         if item['author_name'] is not None:
118             handler.startElement(u"author", {})
119             handler.addQuickElement(u"name", item['author_name'])
120             if item['author_email'] is not None:
121                 handler.addQuickElement(u"email", item['author_email'])
122             if item['author_link'] is not None:
123                 handler.addQuickElement(u"uri", item['author_link'])
124             handler.endElement(u"author")
125
126         # Unique ID.
127         if item['unique_id'] is not None:
128             unique_id = item['unique_id']
129         else:
130             unique_id = get_tag_uri(item['link'], item['pubdate'])
131         handler.addQuickElement(u"id", unique_id)
132
133         # Summary.
134         # OPDS needs type=text
135         if item['description'] is not None:
136             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
137
138         # Enclosure as OPDS Acquisition Link
139         if item['enclosure'] is not None:
140             handler.addQuickElement(u"link", '',
141                 {u"rel": u"http://opds-spec.org/acquisition",
142                  u"href": item['enclosure'].url,
143                  u"length": item['enclosure'].length,
144                  u"type": item['enclosure'].mime_type})
145             # add a "red book" icon
146             handler.addQuickElement(u"link", '',
147                 {u"rel": u"http://opds-spec.org/thumbnail",
148                  u"href": self._book_img,
149                  u"length": self._book_img_size,
150                  u"type": u"image/png"})
151
152         # Categories.
153         for cat in item['categories']:
154             handler.addQuickElement(u"category", u"", {u"term": cat})
155
156         # Rights.
157         if item['item_copyright'] is not None:
158             handler.addQuickElement(u"rights", item['item_copyright'])
159
160
161 class AcquisitionFeed(Feed):
162     feed_type = OPDSFeed
163     link = u'http://www.wolnelektury.pl/'
164     item_enclosure_mime_type = "application/epub+zip"
165     author_name = u"Wolne Lektury"
166     author_link = u"http://www.wolnelektury.pl/"
167
168     def item_title(self, book):
169         return book.title
170
171     def item_description(self):
172         return u''
173
174     def item_link(self, book):
175         return book.get_absolute_url()
176
177     def item_author_name(self, book):
178         try:
179             return book.tags.filter(category='author')[0].name
180         except KeyError:
181             return u''
182
183     def item_author_link(self, book):
184         try:
185             return book.tags.filter(category='author')[0].get_absolute_url()
186         except KeyError:
187             return u''
188
189     def item_enclosure_url(self, book):
190         return full_url(book.epub_file.url) if book.epub_file else None
191
192     def item_enclosure_length(self, book):
193         return book.epub_file.size if book.epub_file else None
194
195 @piwik_track
196 class RootFeed(Feed):
197     feed_type = OPDSFeed
198     title = u'Wolne Lektury'
199     link = u'http://wolnelektury.pl/'
200     description = u"Spis utworów na stronie http://WolneLektury.pl"
201     author_name = u"Wolne Lektury"
202     author_link = u"http://wolnelektury.pl/"
203
204     def items(self):
205         return _root_feeds
206
207     def item_title(self, item):
208         return item['title']
209
210     def item_link(self, item):
211         return reverse(item['link'], args=item['link_args'])
212
213     def item_description(self, item):
214         return item['description']
215
216 @piwik_track
217 class ByCategoryFeed(Feed):
218     feed_type = OPDSFeed
219     link = u'http://wolnelektury.pl/'
220     description = u"Spis utworów na stronie http://WolneLektury.pl"
221     author_name = u"Wolne Lektury"
222     author_link = u"http://wolnelektury.pl/"
223
224     def get_object(self, request, category):
225         feed = [feed for feed in _root_feeds if feed['category']==category]
226         if feed:
227             feed = feed[0]
228         else:
229             raise Http404
230
231         return feed
232
233     def title(self, feed):
234         return feed['title']
235
236     def items(self, feed):
237         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
238
239     def item_title(self, item):
240         return item.name
241
242     def item_link(self, item):
243         return reverse("opds_by_tag", args=[item.category, item.slug])
244
245     def item_description(self):
246         return u''
247
248 @piwik_track
249 class ByTagFeed(AcquisitionFeed):
250     def link(self, tag):
251         return tag.get_absolute_url()
252
253     def title(self, tag):
254         return tag.name
255
256     def description(self, tag):
257         return u"Spis utworów na stronie http://WolneLektury.pl"
258
259     def get_object(self, request, category, slug):
260         return get_object_or_404(Tag, category=category, slug=slug)
261
262     def items(self, tag):
263         books = Book.tagged.with_any([tag])
264         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
265         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
266         if descendants_keys:
267             books = books.exclude(pk__in=descendants_keys)
268
269         return books
270
271
272 @factory_decorator(logged_in_or_basicauth())
273 @piwik_track
274 class UserFeed(Feed):
275     feed_type = OPDSFeed
276     link = u'http://www.wolnelektury.pl/'
277     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
278     author_name = u"Wolne Lektury"
279     author_link = u"http://wolnelektury.pl/"
280
281     def get_object(self, request):
282         return request.user
283
284     def title(self, user):
285         return u"Półki użytkownika %s" % user.username
286
287     def items(self, user):
288         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
289
290     def item_title(self, item):
291         return item.name
292
293     def item_link(self, item):
294         return reverse("opds_user_set", args=[item.slug])
295
296     def item_description(self):
297         return u''
298
299 # no class decorators in python 2.5
300 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
301
302
303 @factory_decorator(logged_in_or_basicauth())
304 @piwik_track
305 class UserSetFeed(AcquisitionFeed):
306     def link(self, tag):
307         return tag.get_absolute_url()
308
309     def title(self, tag):
310         return tag.name
311
312     def description(self, tag):
313         return u"Spis utworów na stronie http://WolneLektury.pl"
314
315     def get_object(self, request, slug):
316         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
317
318     def items(self, tag):
319         return Book.tagged.with_any([tag])
320
321 # no class decorators in python 2.5
322 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
323
324
325 @piwik_track
326 class SearchFeed(AcquisitionFeed):
327     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
328     title = u"Wyniki wyszukiwania"
329
330     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
331     INLINE_QUERY_RE = re.compile(
332         r"author:" + QUOTE_OR_NOT +
333         "|translator:" + QUOTE_OR_NOT +
334         "|title:" + QUOTE_OR_NOT +
335         "|categories:" + QUOTE_OR_NOT +
336         "|description:" + QUOTE_OR_NOT +
337         "|text:" + QUOTE_OR_NOT
338         )
339     MATCHES = {
340         'author': (0, 1),
341         'translator': (2, 3),
342         'title': (4, 5),
343         'categories': (6, 7),
344         'description': (8, 9),
345         'text': (10, 11),
346         }
347
348     PARAMS_TO_FIELDS = {
349         'author': 'authors',
350         'translator': 'translators',
351         #        'title': 'title',
352         'categories': 'tag_name_pl',
353         'description': 'text',
354         #        'text': 'text',
355         }
356
357     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
358
359     def get_object(self, request):
360         """
361         For OPDS 1.1 We should handle a query for search terms
362         and criteria provided either as opensearch or 'inline' query.
363         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
364         atom:title. Inline query provides author, title, categories (treated as book tags),
365         description (treated as content search terms).
366
367         if search terms are provided, we shall search for books
368         according to Hint information (from author & contributror & title).
369
370         but if search terms are empty, we should do a different search
371         (perhaps for is_book=True)
372
373         """
374
375         query = request.GET.get('q', '')
376
377         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
378         if inline_criteria:
379             remains = re.sub(self.INLINE_QUERY_RE, '', query)
380             remains = re.sub(r'[ \t]+', ' ', remains)
381
382             def get_criteria(criteria, name):
383                 for c in criteria:
384                     for p in self.MATCHES[name]:
385                         if c[p]:
386                             if p % 2 == 0:
387                                 return c[p].replace('+', ' ')
388                             return c[p]
389                 return None
390
391             criteria = dict(map(
392                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
393                 ['author', 'translator', 'title', 'categories',
394                  'description', 'text']))
395             query = remains
396             # empty query and text set case?
397             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
398         else:
399             def remove_dump_data(val):
400                 """Some clients don't get opds placeholders and just send them."""
401                 if self.ATOM_PLACEHOLDER.match(val):
402                     return ''
403                 return val
404
405             criteria = dict([(cn, remove_dump_data(request.GET.get(cn, '')))
406                         for cn in self.MATCHES.keys()])
407             # query is set above.
408             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
409
410         srch = Search()
411
412         book_hit_filter = srch.index.Q(book_id__any=True)
413         filters = [book_hit_filter] + [srch.index.Q(
414             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
415             ) for cn in self.MATCHES.keys() if cn in criteria
416             if criteria[cn]]
417
418         if query:
419             q = srch.index.query(
420                 reduce(operator.or_,
421                        [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query})
422                         for cn in self.MATCHES.keys()],
423                 srch.index.Q()))
424         else:
425             q = srch.index.query(srch.index.Q())
426
427         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
428         results = q.execute()
429
430         book_scores = dict([(r['book_id'], r['score']) for r in results])
431         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
432         books = list(books)
433         books.sort(reverse=True, key=lambda book: book_scores[book.id])
434         return books
435
436     def get_link(self, query):
437         return "%s?q=%s" % (reverse('search'), query)
438
439     def items(self, books):
440         try:
441             return books
442         except ValueError:
443             # too short a query
444             return []