Librarian: support for <abstrakt>
[wolnelektury.git] / apps / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os.path
6 from urlparse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.core.urlresolvers import reverse
10 from django.shortcuts import get_object_or_404
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15 from django.utils.functional import lazy
16
17 from basicauth import logged_in_or_basicauth, factory_decorator
18 from catalogue.models import Book, Tag
19
20 from search.views import Search
21 import operator
22 import logging
23 import re
24
25 log = logging.getLogger('opds')
26
27 from stats.utils import piwik_track
28
29 _root_feeds = (
30     {
31         u"category": u"",
32         u"link": u"opds_user",
33         u"link_args": [],
34         u"title": u"Moje półki",
35         u"description": u"Półki użytkownika dostępne po zalogowaniu"
36     },
37     {
38         u"category": u"author",
39         u"link": u"opds_by_category",
40         u"link_args": [u"author"],
41         u"title": u"Autorzy",
42         u"description": u"Utwory wg autorów"
43     },
44     {
45         u"category": u"kind",
46         u"link": u"opds_by_category",
47         u"link_args": [u"kind"],
48         u"title": u"Rodzaje",
49         u"description": u"Utwory wg rodzajów"
50     },
51     {
52         u"category": u"genre",
53         u"link": u"opds_by_category",
54         u"link_args": [u"genre"],
55         u"title": u"Gatunki",
56         u"description": u"Utwory wg gatunków"
57     },
58     {
59         u"category": u"epoch",
60         u"link": u"opds_by_category",
61         u"link_args": [u"epoch"],
62         u"title": u"Epoki",
63         u"description": u"Utwory wg epok"
64     },
65 )
66
67
68 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
69 def full_url(url):
70     return urljoin("http://%s" % current_domain, url)
71
72
73 class OPDSFeed(Atom1Feed):
74     link_rel = u"subsection"
75     link_type = u"application/atom+xml"
76
77     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
78     try:
79         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
80     except:
81         _book_parent_img_size = ''
82
83     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
84     try:
85         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
86     except:
87         _book_img_size = ''
88
89
90     def add_root_elements(self, handler):
91         super(OPDSFeed, self).add_root_elements(handler)
92         handler.addQuickElement(u"link", None,
93                                 {u"href": reverse("opds_authors"),
94                                  u"rel": u"start",
95                                  u"type": u"application/atom+xml"})
96         handler.addQuickElement(u"link", None,
97                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
98                                  u"rel": u"search",
99                                  u"type": u"application/opensearchdescription+xml"})
100
101
102     def add_item_elements(self, handler, item):
103         """ modified from Atom1Feed.add_item_elements """
104         handler.addQuickElement(u"title", item['title'])
105
106         # add a OPDS Navigation link if there's no enclosure
107         if item['enclosure'] is None:
108             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
109             # add a "green book" icon
110             handler.addQuickElement(u"link", '',
111                 {u"rel": u"http://opds-spec.org/thumbnail",
112                  u"href": self._book_parent_img,
113                  u"length": self._book_parent_img_size,
114                  u"type": u"image/png"})
115         if item['pubdate'] is not None:
116             # FIXME: rfc3339_date is undefined, is this ever run?
117             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
118
119         # Author information.
120         if item['author_name'] is not None:
121             handler.startElement(u"author", {})
122             handler.addQuickElement(u"name", item['author_name'])
123             if item['author_email'] is not None:
124                 handler.addQuickElement(u"email", item['author_email'])
125             if item['author_link'] is not None:
126                 handler.addQuickElement(u"uri", item['author_link'])
127             handler.endElement(u"author")
128
129         # Unique ID.
130         if item['unique_id'] is not None:
131             unique_id = item['unique_id']
132         else:
133             # FIXME: get_tag_uri is undefined, is this ever run?
134             unique_id = get_tag_uri(item['link'], item['pubdate'])
135         handler.addQuickElement(u"id", unique_id)
136
137         # Summary.
138         # OPDS needs type=text
139         if item['description'] is not None:
140             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
141
142         # Enclosure as OPDS Acquisition Link
143         if item['enclosure'] is not None:
144             handler.addQuickElement(u"link", '',
145                 {u"rel": u"http://opds-spec.org/acquisition",
146                  u"href": item['enclosure'].url,
147                  u"length": item['enclosure'].length,
148                  u"type": item['enclosure'].mime_type})
149             # add a "red book" icon
150             handler.addQuickElement(u"link", '',
151                 {u"rel": u"http://opds-spec.org/thumbnail",
152                  u"href": self._book_img,
153                  u"length": self._book_img_size,
154                  u"type": u"image/png"})
155
156         # Categories.
157         for cat in item['categories']:
158             handler.addQuickElement(u"category", u"", {u"term": cat})
159
160         # Rights.
161         if item['item_copyright'] is not None:
162             handler.addQuickElement(u"rights", item['item_copyright'])
163
164
165 class AcquisitionFeed(Feed):
166     feed_type = OPDSFeed
167     link = u'http://www.wolnelektury.pl/'
168     item_enclosure_mime_type = "application/epub+zip"
169     author_name = u"Wolne Lektury"
170     author_link = u"http://www.wolnelektury.pl/"
171
172     def item_title(self, book):
173         return book.title
174
175     def item_description(self):
176         return u''
177
178     def item_link(self, book):
179         return book.get_absolute_url()
180
181     def item_author_name(self, book):
182         try:
183             return book.tags.filter(category='author')[0].name
184         except KeyError:
185             return u''
186
187     def item_author_link(self, book):
188         try:
189             return book.tags.filter(category='author')[0].get_absolute_url()
190         except KeyError:
191             return u''
192
193     def item_enclosure_url(self, book):
194         return full_url(book.epub_file.url) if book.epub_file else None
195
196     def item_enclosure_length(self, book):
197         return book.epub_file.size if book.epub_file else None
198
199 @piwik_track
200 class RootFeed(Feed):
201     feed_type = OPDSFeed
202     title = u'Wolne Lektury'
203     link = u'http://wolnelektury.pl/'
204     description = u"Spis utworów na stronie http://WolneLektury.pl"
205     author_name = u"Wolne Lektury"
206     author_link = u"http://wolnelektury.pl/"
207
208     def items(self):
209         return _root_feeds
210
211     def item_title(self, item):
212         return item['title']
213
214     def item_link(self, item):
215         return reverse(item['link'], args=item['link_args'])
216
217     def item_description(self, item):
218         return item['description']
219
220 @piwik_track
221 class ByCategoryFeed(Feed):
222     feed_type = OPDSFeed
223     link = u'http://wolnelektury.pl/'
224     description = u"Spis utworów na stronie http://WolneLektury.pl"
225     author_name = u"Wolne Lektury"
226     author_link = u"http://wolnelektury.pl/"
227
228     def get_object(self, request, category):
229         feed = [feed for feed in _root_feeds if feed['category'] == category]
230         if feed:
231             feed = feed[0]
232         else:
233             raise Http404
234
235         return feed
236
237     def title(self, feed):
238         return feed['title']
239
240     def items(self, feed):
241         return Tag.objects.filter(category=feed['category']).exclude(items=None)
242
243     def item_title(self, item):
244         return item.name
245
246     def item_link(self, item):
247         return reverse("opds_by_tag", args=[item.category, item.slug])
248
249     def item_description(self):
250         return u''
251
252 @piwik_track
253 class ByTagFeed(AcquisitionFeed):
254     def link(self, tag):
255         return tag.get_absolute_url()
256
257     def title(self, tag):
258         return tag.name
259
260     def description(self, tag):
261         return u"Spis utworów na stronie http://WolneLektury.pl"
262
263     def get_object(self, request, category, slug):
264         return get_object_or_404(Tag, category=category, slug=slug)
265
266     def items(self, tag):
267         return Book.tagged_top_level([tag])
268
269
270 @factory_decorator(logged_in_or_basicauth())
271 @piwik_track
272 class UserFeed(Feed):
273     feed_type = OPDSFeed
274     link = u'http://www.wolnelektury.pl/'
275     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
276     author_name = u"Wolne Lektury"
277     author_link = u"http://wolnelektury.pl/"
278
279     def get_object(self, request):
280         return request.user
281
282     def title(self, user):
283         return u"Półki użytkownika %s" % user.username
284
285     def items(self, user):
286         return Tag.objects.filter(category='set', user=user).exclude(items=None)
287
288     def item_title(self, item):
289         return item.name
290
291     def item_link(self, item):
292         return reverse("opds_user_set", args=[item.slug])
293
294     def item_description(self):
295         return u''
296
297
298 @factory_decorator(logged_in_or_basicauth())
299 @piwik_track
300 class UserSetFeed(AcquisitionFeed):
301     def link(self, tag):
302         return tag.get_absolute_url()
303
304     def title(self, tag):
305         return tag.name
306
307     def description(self, tag):
308         return u"Spis utworów na stronie http://WolneLektury.pl"
309
310     def get_object(self, request, slug):
311         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
312
313     def items(self, tag):
314         return Book.tagged.with_any([tag])
315
316
317 @piwik_track
318 class SearchFeed(AcquisitionFeed):
319     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
320     title = u"Wyniki wyszukiwania"
321
322     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
323     INLINE_QUERY_RE = re.compile(
324         r"author:" + QUOTE_OR_NOT +
325         "|translator:" + QUOTE_OR_NOT +
326         "|title:" + QUOTE_OR_NOT +
327         "|categories:" + QUOTE_OR_NOT +
328         "|description:" + QUOTE_OR_NOT +
329         "|text:" + QUOTE_OR_NOT
330         )
331     MATCHES = {
332         'author': (0, 1),
333         'translator': (2, 3),
334         'title': (4, 5),
335         'categories': (6, 7),
336         'description': (8, 9),
337         'text': (10, 11),
338         }
339
340     PARAMS_TO_FIELDS = {
341         'author': 'authors',
342         'translator': 'translators',
343         #        'title': 'title',
344         'categories': 'tag_name_pl',
345         'description': 'text',
346         #        'text': 'text',
347         }
348
349     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
350
351     def get_object(self, request):
352         """
353         For OPDS 1.1 We should handle a query for search terms
354         and criteria provided either as opensearch or 'inline' query.
355         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
356         atom:title. Inline query provides author, title, categories (treated as book tags),
357         description (treated as content search terms).
358
359         if search terms are provided, we shall search for books
360         according to Hint information (from author & contributror & title).
361
362         but if search terms are empty, we should do a different search
363         (perhaps for is_book=True)
364
365         """
366
367         query = request.GET.get('q', '')
368
369         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
370         if inline_criteria:
371             remains = re.sub(self.INLINE_QUERY_RE, '', query)
372             remains = re.sub(r'[ \t]+', ' ', remains)
373
374             def get_criteria(criteria, name):
375                 for c in criteria:
376                     for p in self.MATCHES[name]:
377                         if c[p]:
378                             if p % 2 == 0:
379                                 return c[p].replace('+', ' ')
380                             return c[p]
381                 return None
382
383             criteria = dict(map(
384                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
385                 ['author', 'translator', 'title', 'categories',
386                  'description', 'text']))
387             query = remains
388             # empty query and text set case?
389             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
390         else:
391             def remove_dump_data(val):
392                 """Some clients don't get opds placeholders and just send them."""
393                 if self.ATOM_PLACEHOLDER.match(val):
394                     return ''
395                 return val
396
397             criteria = dict([(cn, remove_dump_data(request.GET.get(cn, '')))
398                         for cn in self.MATCHES.keys()])
399             # query is set above.
400             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
401
402         srch = Search()
403
404         book_hit_filter = srch.index.Q(book_id__any=True)
405         filters = [book_hit_filter] + [srch.index.Q(
406             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
407             ) for cn in self.MATCHES.keys() if cn in criteria
408             if criteria[cn]]
409
410         if query:
411             q = srch.index.query(
412                 reduce(operator.or_,
413                        [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query})
414                         for cn in self.MATCHES.keys()],
415                 srch.index.Q()))
416         else:
417             q = srch.index.query(srch.index.Q())
418
419         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
420         results = q.execute()
421
422         book_scores = dict([(r['book_id'], r['score']) for r in results])
423         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
424         books = list(books)
425         books.sort(reverse=True, key=lambda book: book_scores[book.id])
426         return books
427
428     def get_link(self, query):
429         return "%s?q=%s" % (reverse('search'), query)
430
431     def items(self, books):
432         try:
433             return books
434         except ValueError:
435             # too short a query
436             return []