Tighter sponsors html, every pageview gets ~10kB lighter.
[wolnelektury.git] / apps / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os.path
6 from urlparse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.core.urlresolvers import reverse
10 from django.shortcuts import get_object_or_404
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15 from django.utils.functional import lazy
16
17 from basicauth import logged_in_or_basicauth, factory_decorator
18 from catalogue.models import Book, Tag
19
20 from search.views import Search
21 import operator
22 import logging
23 import re
24
25 log = logging.getLogger('opds')
26
27 from stats.utils import piwik_track
28
29 _root_feeds = (
30     {
31         u"category": u"",
32         u"link": u"opds_user",
33         u"link_args": [],
34         u"title": u"Moje półki",
35         u"description": u"Półki użytkownika dostępne po zalogowaniu"
36     },
37     {
38         u"category": u"author",
39         u"link": u"opds_by_category",
40         u"link_args": [u"author"],
41         u"title": u"Autorzy",
42         u"description": u"Utwory wg autorów"
43     },
44     {
45         u"category": u"kind",
46         u"link": u"opds_by_category",
47         u"link_args": [u"kind"],
48         u"title": u"Rodzaje",
49         u"description": u"Utwory wg rodzajów"
50     },
51     {
52         u"category": u"genre",
53         u"link": u"opds_by_category",
54         u"link_args": [u"genre"],
55         u"title": u"Gatunki",
56         u"description": u"Utwory wg gatunków"
57     },
58     {
59         u"category": u"epoch",
60         u"link": u"opds_by_category",
61         u"link_args": [u"epoch"],
62         u"title": u"Epoki",
63         u"description": u"Utwory wg epok"
64     },
65 )
66
67
68 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
69 def full_url(url):
70     return urljoin("http://%s" % current_domain, url)
71
72
73 class OPDSFeed(Atom1Feed):
74     link_rel = u"subsection"
75     link_type = u"application/atom+xml"
76
77     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
78     try:
79         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
80     except:
81         _book_parent_img_size = ''
82
83     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
84     try:
85         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
86     except:
87         _book_img_size = ''
88
89
90     def add_root_elements(self, handler):
91         super(OPDSFeed, self).add_root_elements(handler)
92         handler.addQuickElement(u"link", None,
93                                 {u"href": reverse("opds_authors"),
94                                  u"rel": u"start",
95                                  u"type": u"application/atom+xml"})
96         handler.addQuickElement(u"link", None,
97                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
98                                  u"rel": u"search",
99                                  u"type": u"application/opensearchdescription+xml"})
100
101
102     def add_item_elements(self, handler, item):
103         """ modified from Atom1Feed.add_item_elements """
104         handler.addQuickElement(u"title", item['title'])
105
106         # add a OPDS Navigation link if there's no enclosure
107         if item['enclosure'] is None:
108             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
109             # add a "green book" icon
110             handler.addQuickElement(u"link", '',
111                 {u"rel": u"http://opds-spec.org/thumbnail",
112                  u"href": self._book_parent_img,
113                  u"length": self._book_parent_img_size,
114                  u"type": u"image/png"})
115         if item['pubdate'] is not None:
116             # FIXME: rfc3339_date is undefined, is this ever run?
117             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
118
119         # Author information.
120         if item['author_name'] is not None:
121             handler.startElement(u"author", {})
122             handler.addQuickElement(u"name", item['author_name'])
123             if item['author_email'] is not None:
124                 handler.addQuickElement(u"email", item['author_email'])
125             if item['author_link'] is not None:
126                 handler.addQuickElement(u"uri", item['author_link'])
127             handler.endElement(u"author")
128
129         # Unique ID.
130         if item['unique_id'] is not None:
131             unique_id = item['unique_id']
132         else:
133             # FIXME: get_tag_uri is undefined, is this ever run?
134             unique_id = get_tag_uri(item['link'], item['pubdate'])
135         handler.addQuickElement(u"id", unique_id)
136
137         # Summary.
138         # OPDS needs type=text
139         if item['description'] is not None:
140             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
141
142         # Enclosure as OPDS Acquisition Link
143         if item['enclosure'] is not None:
144             handler.addQuickElement(u"link", '',
145                 {u"rel": u"http://opds-spec.org/acquisition",
146                  u"href": item['enclosure'].url,
147                  u"length": item['enclosure'].length,
148                  u"type": item['enclosure'].mime_type})
149             # add a "red book" icon
150             handler.addQuickElement(u"link", '',
151                 {u"rel": u"http://opds-spec.org/thumbnail",
152                  u"href": self._book_img,
153                  u"length": self._book_img_size,
154                  u"type": u"image/png"})
155
156         # Categories.
157         for cat in item['categories']:
158             handler.addQuickElement(u"category", u"", {u"term": cat})
159
160         # Rights.
161         if item['item_copyright'] is not None:
162             handler.addQuickElement(u"rights", item['item_copyright'])
163
164
165 class AcquisitionFeed(Feed):
166     feed_type = OPDSFeed
167     link = u'http://www.wolnelektury.pl/'
168     item_enclosure_mime_type = "application/epub+zip"
169     author_name = u"Wolne Lektury"
170     author_link = u"http://www.wolnelektury.pl/"
171
172     def item_title(self, book):
173         return book.title
174
175     def item_description(self):
176         return u''
177
178     def item_link(self, book):
179         return book.get_absolute_url()
180
181     def item_author_name(self, book):
182         try:
183             return book.tags.filter(category='author')[0].name
184         except KeyError:
185             return u''
186
187     def item_author_link(self, book):
188         try:
189             return book.tags.filter(category='author')[0].get_absolute_url()
190         except KeyError:
191             return u''
192
193     def item_enclosure_url(self, book):
194         return full_url(book.epub_file.url) if book.epub_file else None
195
196     def item_enclosure_length(self, book):
197         return book.epub_file.size if book.epub_file else None
198
199 @piwik_track
200 class RootFeed(Feed):
201     feed_type = OPDSFeed
202     title = u'Wolne Lektury'
203     link = u'http://wolnelektury.pl/'
204     description = u"Spis utworów na stronie http://WolneLektury.pl"
205     author_name = u"Wolne Lektury"
206     author_link = u"http://wolnelektury.pl/"
207
208     def items(self):
209         return _root_feeds
210
211     def item_title(self, item):
212         return item['title']
213
214     def item_link(self, item):
215         return reverse(item['link'], args=item['link_args'])
216
217     def item_description(self, item):
218         return item['description']
219
220 @piwik_track
221 class ByCategoryFeed(Feed):
222     feed_type = OPDSFeed
223     link = u'http://wolnelektury.pl/'
224     description = u"Spis utworów na stronie http://WolneLektury.pl"
225     author_name = u"Wolne Lektury"
226     author_link = u"http://wolnelektury.pl/"
227
228     def get_object(self, request, category):
229         feed = [feed for feed in _root_feeds if feed['category'] == category]
230         if feed:
231             feed = feed[0]
232         else:
233             raise Http404
234
235         return feed
236
237     def title(self, feed):
238         return feed['title']
239
240     def items(self, feed):
241         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
242
243     def item_title(self, item):
244         return item.name
245
246     def item_link(self, item):
247         return reverse("opds_by_tag", args=[item.category, item.slug])
248
249     def item_description(self):
250         return u''
251
252 @piwik_track
253 class ByTagFeed(AcquisitionFeed):
254     def link(self, tag):
255         return tag.get_absolute_url()
256
257     def title(self, tag):
258         return tag.name
259
260     def description(self, tag):
261         return u"Spis utworów na stronie http://WolneLektury.pl"
262
263     def get_object(self, request, category, slug):
264         return get_object_or_404(Tag, category=category, slug=slug)
265
266     def items(self, tag):
267         books = Book.tagged.with_any([tag])
268         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
269         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
270         if descendants_keys:
271             books = books.exclude(pk__in=descendants_keys)
272
273         return books
274
275
276 @factory_decorator(logged_in_or_basicauth())
277 @piwik_track
278 class UserFeed(Feed):
279     feed_type = OPDSFeed
280     link = u'http://www.wolnelektury.pl/'
281     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
282     author_name = u"Wolne Lektury"
283     author_link = u"http://wolnelektury.pl/"
284
285     def get_object(self, request):
286         return request.user
287
288     def title(self, user):
289         return u"Półki użytkownika %s" % user.username
290
291     def items(self, user):
292         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
293
294     def item_title(self, item):
295         return item.name
296
297     def item_link(self, item):
298         return reverse("opds_user_set", args=[item.slug])
299
300     def item_description(self):
301         return u''
302
303 # no class decorators in python 2.5
304 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
305
306
307 @factory_decorator(logged_in_or_basicauth())
308 @piwik_track
309 class UserSetFeed(AcquisitionFeed):
310     def link(self, tag):
311         return tag.get_absolute_url()
312
313     def title(self, tag):
314         return tag.name
315
316     def description(self, tag):
317         return u"Spis utworów na stronie http://WolneLektury.pl"
318
319     def get_object(self, request, slug):
320         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
321
322     def items(self, tag):
323         return Book.tagged.with_any([tag])
324
325 # no class decorators in python 2.5
326 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
327
328
329 @piwik_track
330 class SearchFeed(AcquisitionFeed):
331     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
332     title = u"Wyniki wyszukiwania"
333
334     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
335     INLINE_QUERY_RE = re.compile(
336         r"author:" + QUOTE_OR_NOT +
337         "|translator:" + QUOTE_OR_NOT +
338         "|title:" + QUOTE_OR_NOT +
339         "|categories:" + QUOTE_OR_NOT +
340         "|description:" + QUOTE_OR_NOT +
341         "|text:" + QUOTE_OR_NOT
342         )
343     MATCHES = {
344         'author': (0, 1),
345         'translator': (2, 3),
346         'title': (4, 5),
347         'categories': (6, 7),
348         'description': (8, 9),
349         'text': (10, 11),
350         }
351
352     PARAMS_TO_FIELDS = {
353         'author': 'authors',
354         'translator': 'translators',
355         #        'title': 'title',
356         'categories': 'tag_name_pl',
357         'description': 'text',
358         #        'text': 'text',
359         }
360
361     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
362
363     def get_object(self, request):
364         """
365         For OPDS 1.1 We should handle a query for search terms
366         and criteria provided either as opensearch or 'inline' query.
367         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
368         atom:title. Inline query provides author, title, categories (treated as book tags),
369         description (treated as content search terms).
370
371         if search terms are provided, we shall search for books
372         according to Hint information (from author & contributror & title).
373
374         but if search terms are empty, we should do a different search
375         (perhaps for is_book=True)
376
377         """
378
379         query = request.GET.get('q', '')
380
381         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
382         if inline_criteria:
383             remains = re.sub(self.INLINE_QUERY_RE, '', query)
384             remains = re.sub(r'[ \t]+', ' ', remains)
385
386             def get_criteria(criteria, name):
387                 for c in criteria:
388                     for p in self.MATCHES[name]:
389                         if c[p]:
390                             if p % 2 == 0:
391                                 return c[p].replace('+', ' ')
392                             return c[p]
393                 return None
394
395             criteria = dict(map(
396                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
397                 ['author', 'translator', 'title', 'categories',
398                  'description', 'text']))
399             query = remains
400             # empty query and text set case?
401             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
402         else:
403             def remove_dump_data(val):
404                 """Some clients don't get opds placeholders and just send them."""
405                 if self.ATOM_PLACEHOLDER.match(val):
406                     return ''
407                 return val
408
409             criteria = dict([(cn, remove_dump_data(request.GET.get(cn, '')))
410                         for cn in self.MATCHES.keys()])
411             # query is set above.
412             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
413
414         srch = Search()
415
416         book_hit_filter = srch.index.Q(book_id__any=True)
417         filters = [book_hit_filter] + [srch.index.Q(
418             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
419             ) for cn in self.MATCHES.keys() if cn in criteria
420             if criteria[cn]]
421
422         if query:
423             q = srch.index.query(
424                 reduce(operator.or_,
425                        [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query})
426                         for cn in self.MATCHES.keys()],
427                 srch.index.Q()))
428         else:
429             q = srch.index.query(srch.index.Q())
430
431         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
432         results = q.execute()
433
434         book_scores = dict([(r['book_id'], r['score']) for r in results])
435         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
436         books = list(books)
437         books.sort(reverse=True, key=lambda book: book_scores[book.id])
438         return books
439
440     def get_link(self, query):
441         return "%s?q=%s" % (reverse('search'), query)
442
443     def items(self, books):
444         try:
445             return books
446         except ValueError:
447             # too short a query
448             return []