Unused imports & whitespace
[wolnelektury.git] / apps / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os.path
6 from urlparse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.core.urlresolvers import reverse
10 from django.shortcuts import get_object_or_404
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15
16 from basicauth import logged_in_or_basicauth, factory_decorator
17 from catalogue.models import Book, Tag
18
19 from search.views import Search
20 import operator
21 import logging
22 import re
23
24 log = logging.getLogger('opds')
25
26 from stats.utils import piwik_track
27
28 _root_feeds = (
29     {
30         u"category": u"",
31         u"link": u"opds_user",
32         u"link_args": [],
33         u"title": u"Moje półki",
34         u"description": u"Półki użytkownika dostępne po zalogowaniu"
35     },
36     {
37         u"category": u"author",
38         u"link": u"opds_by_category",
39         u"link_args": [u"author"],
40         u"title": u"Autorzy",
41         u"description": u"Utwory wg autorów"
42     },
43     {
44         u"category": u"kind",
45         u"link": u"opds_by_category",
46         u"link_args": [u"kind"],
47         u"title": u"Rodzaje",
48         u"description": u"Utwory wg rodzajów"
49     },
50     {
51         u"category": u"genre",
52         u"link": u"opds_by_category",
53         u"link_args": [u"genre"],
54         u"title": u"Gatunki",
55         u"description": u"Utwory wg gatunków"
56     },
57     {
58         u"category": u"epoch",
59         u"link": u"opds_by_category",
60         u"link_args": [u"epoch"],
61         u"title": u"Epoki",
62         u"description": u"Utwory wg epok"
63     },
64 )
65
66
67 def full_url(url):
68     return urljoin("http://%s" % Site.objects.get_current().domain, url)
69
70
71 class OPDSFeed(Atom1Feed):
72     link_rel = u"subsection"
73     link_type = u"application/atom+xml"
74
75     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
76     try:
77         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
78     except:
79         _book_parent_img_size = ''
80
81     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
82     try:
83         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
84     except:
85         _book_img_size = ''
86
87
88     def add_root_elements(self, handler):
89         super(OPDSFeed, self).add_root_elements(handler)
90         handler.addQuickElement(u"link", None,
91                                 {u"href": reverse("opds_authors"),
92                                  u"rel": u"start",
93                                  u"type": u"application/atom+xml"})
94         handler.addQuickElement(u"link", None,
95                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
96                                  u"rel": u"search",
97                                  u"type": u"application/opensearchdescription+xml"})
98
99
100     def add_item_elements(self, handler, item):
101         """ modified from Atom1Feed.add_item_elements """
102         handler.addQuickElement(u"title", item['title'])
103
104         # add a OPDS Navigation link if there's no enclosure
105         if item['enclosure'] is None:
106             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
107             # add a "green book" icon
108             handler.addQuickElement(u"link", '',
109                 {u"rel": u"http://opds-spec.org/thumbnail",
110                  u"href": self._book_parent_img,
111                  u"length": self._book_parent_img_size,
112                  u"type": u"image/png"})
113         if item['pubdate'] is not None:
114             # FIXME: rfc3339_date is undefined, is this ever run?
115             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
116
117         # Author information.
118         if item['author_name'] is not None:
119             handler.startElement(u"author", {})
120             handler.addQuickElement(u"name", item['author_name'])
121             if item['author_email'] is not None:
122                 handler.addQuickElement(u"email", item['author_email'])
123             if item['author_link'] is not None:
124                 handler.addQuickElement(u"uri", item['author_link'])
125             handler.endElement(u"author")
126
127         # Unique ID.
128         if item['unique_id'] is not None:
129             unique_id = item['unique_id']
130         else:
131             # FIXME: get_tag_uri is undefined, is this ever run?
132             unique_id = get_tag_uri(item['link'], item['pubdate'])
133         handler.addQuickElement(u"id", unique_id)
134
135         # Summary.
136         # OPDS needs type=text
137         if item['description'] is not None:
138             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
139
140         # Enclosure as OPDS Acquisition Link
141         if item['enclosure'] is not None:
142             handler.addQuickElement(u"link", '',
143                 {u"rel": u"http://opds-spec.org/acquisition",
144                  u"href": item['enclosure'].url,
145                  u"length": item['enclosure'].length,
146                  u"type": item['enclosure'].mime_type})
147             # add a "red book" icon
148             handler.addQuickElement(u"link", '',
149                 {u"rel": u"http://opds-spec.org/thumbnail",
150                  u"href": self._book_img,
151                  u"length": self._book_img_size,
152                  u"type": u"image/png"})
153
154         # Categories.
155         for cat in item['categories']:
156             handler.addQuickElement(u"category", u"", {u"term": cat})
157
158         # Rights.
159         if item['item_copyright'] is not None:
160             handler.addQuickElement(u"rights", item['item_copyright'])
161
162
163 class AcquisitionFeed(Feed):
164     feed_type = OPDSFeed
165     link = u'http://www.wolnelektury.pl/'
166     item_enclosure_mime_type = "application/epub+zip"
167     author_name = u"Wolne Lektury"
168     author_link = u"http://www.wolnelektury.pl/"
169
170     def item_title(self, book):
171         return book.title
172
173     def item_description(self):
174         return u''
175
176     def item_link(self, book):
177         return book.get_absolute_url()
178
179     def item_author_name(self, book):
180         try:
181             return book.tags.filter(category='author')[0].name
182         except KeyError:
183             return u''
184
185     def item_author_link(self, book):
186         try:
187             return book.tags.filter(category='author')[0].get_absolute_url()
188         except KeyError:
189             return u''
190
191     def item_enclosure_url(self, book):
192         return full_url(book.epub_file.url) if book.epub_file else None
193
194     def item_enclosure_length(self, book):
195         return book.epub_file.size if book.epub_file else None
196
197 @piwik_track
198 class RootFeed(Feed):
199     feed_type = OPDSFeed
200     title = u'Wolne Lektury'
201     link = u'http://wolnelektury.pl/'
202     description = u"Spis utworów na stronie http://WolneLektury.pl"
203     author_name = u"Wolne Lektury"
204     author_link = u"http://wolnelektury.pl/"
205
206     def items(self):
207         return _root_feeds
208
209     def item_title(self, item):
210         return item['title']
211
212     def item_link(self, item):
213         return reverse(item['link'], args=item['link_args'])
214
215     def item_description(self, item):
216         return item['description']
217
218 @piwik_track
219 class ByCategoryFeed(Feed):
220     feed_type = OPDSFeed
221     link = u'http://wolnelektury.pl/'
222     description = u"Spis utworów na stronie http://WolneLektury.pl"
223     author_name = u"Wolne Lektury"
224     author_link = u"http://wolnelektury.pl/"
225
226     def get_object(self, request, category):
227         feed = [feed for feed in _root_feeds if feed['category'] == category]
228         if feed:
229             feed = feed[0]
230         else:
231             raise Http404
232
233         return feed
234
235     def title(self, feed):
236         return feed['title']
237
238     def items(self, feed):
239         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
240
241     def item_title(self, item):
242         return item.name
243
244     def item_link(self, item):
245         return reverse("opds_by_tag", args=[item.category, item.slug])
246
247     def item_description(self):
248         return u''
249
250 @piwik_track
251 class ByTagFeed(AcquisitionFeed):
252     def link(self, tag):
253         return tag.get_absolute_url()
254
255     def title(self, tag):
256         return tag.name
257
258     def description(self, tag):
259         return u"Spis utworów na stronie http://WolneLektury.pl"
260
261     def get_object(self, request, category, slug):
262         return get_object_or_404(Tag, category=category, slug=slug)
263
264     def items(self, tag):
265         books = Book.tagged.with_any([tag])
266         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
267         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
268         if descendants_keys:
269             books = books.exclude(pk__in=descendants_keys)
270
271         return books
272
273
274 @factory_decorator(logged_in_or_basicauth())
275 @piwik_track
276 class UserFeed(Feed):
277     feed_type = OPDSFeed
278     link = u'http://www.wolnelektury.pl/'
279     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
280     author_name = u"Wolne Lektury"
281     author_link = u"http://wolnelektury.pl/"
282
283     def get_object(self, request):
284         return request.user
285
286     def title(self, user):
287         return u"Półki użytkownika %s" % user.username
288
289     def items(self, user):
290         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
291
292     def item_title(self, item):
293         return item.name
294
295     def item_link(self, item):
296         return reverse("opds_user_set", args=[item.slug])
297
298     def item_description(self):
299         return u''
300
301 # no class decorators in python 2.5
302 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
303
304
305 @factory_decorator(logged_in_or_basicauth())
306 @piwik_track
307 class UserSetFeed(AcquisitionFeed):
308     def link(self, tag):
309         return tag.get_absolute_url()
310
311     def title(self, tag):
312         return tag.name
313
314     def description(self, tag):
315         return u"Spis utworów na stronie http://WolneLektury.pl"
316
317     def get_object(self, request, slug):
318         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
319
320     def items(self, tag):
321         return Book.tagged.with_any([tag])
322
323 # no class decorators in python 2.5
324 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
325
326
327 @piwik_track
328 class SearchFeed(AcquisitionFeed):
329     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
330     title = u"Wyniki wyszukiwania"
331
332     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
333     INLINE_QUERY_RE = re.compile(
334         r"author:" + QUOTE_OR_NOT +
335         "|translator:" + QUOTE_OR_NOT +
336         "|title:" + QUOTE_OR_NOT +
337         "|categories:" + QUOTE_OR_NOT +
338         "|description:" + QUOTE_OR_NOT +
339         "|text:" + QUOTE_OR_NOT
340         )
341     MATCHES = {
342         'author': (0, 1),
343         'translator': (2, 3),
344         'title': (4, 5),
345         'categories': (6, 7),
346         'description': (8, 9),
347         'text': (10, 11),
348         }
349
350     PARAMS_TO_FIELDS = {
351         'author': 'authors',
352         'translator': 'translators',
353         #        'title': 'title',
354         'categories': 'tag_name_pl',
355         'description': 'text',
356         #        'text': 'text',
357         }
358
359     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
360
361     def get_object(self, request):
362         """
363         For OPDS 1.1 We should handle a query for search terms
364         and criteria provided either as opensearch or 'inline' query.
365         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
366         atom:title. Inline query provides author, title, categories (treated as book tags),
367         description (treated as content search terms).
368
369         if search terms are provided, we shall search for books
370         according to Hint information (from author & contributror & title).
371
372         but if search terms are empty, we should do a different search
373         (perhaps for is_book=True)
374
375         """
376
377         query = request.GET.get('q', '')
378
379         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
380         if inline_criteria:
381             remains = re.sub(self.INLINE_QUERY_RE, '', query)
382             remains = re.sub(r'[ \t]+', ' ', remains)
383
384             def get_criteria(criteria, name):
385                 for c in criteria:
386                     for p in self.MATCHES[name]:
387                         if c[p]:
388                             if p % 2 == 0:
389                                 return c[p].replace('+', ' ')
390                             return c[p]
391                 return None
392
393             criteria = dict(map(
394                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
395                 ['author', 'translator', 'title', 'categories',
396                  'description', 'text']))
397             query = remains
398             # empty query and text set case?
399             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
400         else:
401             def remove_dump_data(val):
402                 """Some clients don't get opds placeholders and just send them."""
403                 if self.ATOM_PLACEHOLDER.match(val):
404                     return ''
405                 return val
406
407             criteria = dict([(cn, remove_dump_data(request.GET.get(cn, '')))
408                         for cn in self.MATCHES.keys()])
409             # query is set above.
410             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
411
412         srch = Search()
413
414         book_hit_filter = srch.index.Q(book_id__any=True)
415         filters = [book_hit_filter] + [srch.index.Q(
416             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
417             ) for cn in self.MATCHES.keys() if cn in criteria
418             if criteria[cn]]
419
420         if query:
421             q = srch.index.query(
422                 reduce(operator.or_,
423                        [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query})
424                         for cn in self.MATCHES.keys()],
425                 srch.index.Q()))
426         else:
427             q = srch.index.query(srch.index.Q())
428
429         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
430         results = q.execute()
431
432         book_scores = dict([(r['book_id'], r['score']) for r in results])
433         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
434         books = list(books)
435         books.sort(reverse=True, key=lambda book: book_scores[book.id])
436         return books
437
438     def get_link(self, query):
439         return "%s?q=%s" % (reverse('search'), query)
440
441     def items(self, books):
442         try:
443             return books
444         except ValueError:
445             # too short a query
446             return []