apps/opds/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import os.path
   6 from urlparse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.core.urlresolvers import reverse
  10 from django.shortcuts import get_object_or_404
  11 from django.utils.feedgenerator import Atom1Feed
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15
  16 from basicauth import logged_in_or_basicauth, factory_decorator
  17 from catalogue.models import Book, Tag
  18
  19 from search.views import Search
  20 import operator
  21 import logging
  22 import re
  23
  24 log = logging.getLogger('opds')
  25
  26 from stats.utils import piwik_track
  27
  28 _root_feeds = (
  29     {
  30         u"category": u"",
  31         u"link": u"opds_user",
  32         u"link_args": [],
  33         u"title": u"Moje półki",
  34         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  35     },
  36     {
  37         u"category": u"author",
  38         u"link": u"opds_by_category",
  39         u"link_args": [u"author"],
  40         u"title": u"Autorzy",
  41         u"description": u"Utwory wg autorów"
  42     },
  43     {
  44         u"category": u"kind",
  45         u"link": u"opds_by_category",
  46         u"link_args": [u"kind"],
  47         u"title": u"Rodzaje",
  48         u"description": u"Utwory wg rodzajów"
  49     },
  50     {
  51         u"category": u"genre",
  52         u"link": u"opds_by_category",
  53         u"link_args": [u"genre"],
  54         u"title": u"Gatunki",
  55         u"description": u"Utwory wg gatunków"
  56     },
  57     {
  58         u"category": u"epoch",
  59         u"link": u"opds_by_category",
  60         u"link_args": [u"epoch"],
  61         u"title": u"Epoki",
  62         u"description": u"Utwory wg epok"
  63     },
  64 )
  65
  66
  67 def full_url(url):
  68     return urljoin("http://%s" % Site.objects.get_current().domain, url)
  69
  70
  71 class OPDSFeed(Atom1Feed):
  72     link_rel = u"subsection"
  73     link_type = u"application/atom+xml"
  74
  75     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
  76     try:
  77         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  78     except:
  79         _book_parent_img_size = ''
  80
  81     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
  82     try:
  83         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  84     except:
  85         _book_img_size = ''
  86
  87
  88     def add_root_elements(self, handler):
  89         super(OPDSFeed, self).add_root_elements(handler)
  90         handler.addQuickElement(u"link", None,
  91                                 {u"href": reverse("opds_authors"),
  92                                  u"rel": u"start",
  93                                  u"type": u"application/atom+xml"})
  94         handler.addQuickElement(u"link", None,
  95                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  96                                  u"rel": u"search",
  97                                  u"type": u"application/opensearchdescription+xml"})
  98
  99
 100     def add_item_elements(self, handler, item):
 101         """ modified from Atom1Feed.add_item_elements """
 102         handler.addQuickElement(u"title", item['title'])
 103
 104         # add a OPDS Navigation link if there's no enclosure
 105         if item['enclosure'] is None:
 106             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 107             # add a "green book" icon
 108             handler.addQuickElement(u"link", '',
 109                 {u"rel": u"http://opds-spec.org/thumbnail",
 110                  u"href": self._book_parent_img,
 111                  u"length": self._book_parent_img_size,
 112                  u"type": u"image/png"})
 113         if item['pubdate'] is not None:
 114             # FIXME: rfc3339_date is undefined, is this ever run?
 115             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 116
 117         # Author information.
 118         if item['author_name'] is not None:
 119             handler.startElement(u"author", {})
 120             handler.addQuickElement(u"name", item['author_name'])
 121             if item['author_email'] is not None:
 122                 handler.addQuickElement(u"email", item['author_email'])
 123             if item['author_link'] is not None:
 124                 handler.addQuickElement(u"uri", item['author_link'])
 125             handler.endElement(u"author")
 126
 127         # Unique ID.
 128         if item['unique_id'] is not None:
 129             unique_id = item['unique_id']
 130         else:
 131             # FIXME: get_tag_uri is undefined, is this ever run?
 132             unique_id = get_tag_uri(item['link'], item['pubdate'])
 133         handler.addQuickElement(u"id", unique_id)
 134
 135         # Summary.
 136         # OPDS needs type=text
 137         if item['description'] is not None:
 138             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 139
 140         # Enclosure as OPDS Acquisition Link
 141         if item['enclosure'] is not None:
 142             handler.addQuickElement(u"link", '',
 143                 {u"rel": u"http://opds-spec.org/acquisition",
 144                  u"href": item['enclosure'].url,
 145                  u"length": item['enclosure'].length,
 146                  u"type": item['enclosure'].mime_type})
 147             # add a "red book" icon
 148             handler.addQuickElement(u"link", '',
 149                 {u"rel": u"http://opds-spec.org/thumbnail",
 150                  u"href": self._book_img,
 151                  u"length": self._book_img_size,
 152                  u"type": u"image/png"})
 153
 154         # Categories.
 155         for cat in item['categories']:
 156             handler.addQuickElement(u"category", u"", {u"term": cat})
 157
 158         # Rights.
 159         if item['item_copyright'] is not None:
 160             handler.addQuickElement(u"rights", item['item_copyright'])
 161
 162
 163 class AcquisitionFeed(Feed):
 164     feed_type = OPDSFeed
 165     link = u'http://www.wolnelektury.pl/'
 166     item_enclosure_mime_type = "application/epub+zip"
 167     author_name = u"Wolne Lektury"
 168     author_link = u"http://www.wolnelektury.pl/"
 169
 170     def item_title(self, book):
 171         return book.title
 172
 173     def item_description(self):
 174         return u''
 175
 176     def item_link(self, book):
 177         return book.get_absolute_url()
 178
 179     def item_author_name(self, book):
 180         try:
 181             return book.tags.filter(category='author')[0].name
 182         except KeyError:
 183             return u''
 184
 185     def item_author_link(self, book):
 186         try:
 187             return book.tags.filter(category='author')[0].get_absolute_url()
 188         except KeyError:
 189             return u''
 190
 191     def item_enclosure_url(self, book):
 192         return full_url(book.epub_file.url) if book.epub_file else None
 193
 194     def item_enclosure_length(self, book):
 195         return book.epub_file.size if book.epub_file else None
 196
 197 @piwik_track
 198 class RootFeed(Feed):
 199     feed_type = OPDSFeed
 200     title = u'Wolne Lektury'
 201     link = u'http://wolnelektury.pl/'
 202     description = u"Spis utworów na stronie http://WolneLektury.pl"
 203     author_name = u"Wolne Lektury"
 204     author_link = u"http://wolnelektury.pl/"
 205
 206     def items(self):
 207         return _root_feeds
 208
 209     def item_title(self, item):
 210         return item['title']
 211
 212     def item_link(self, item):
 213         return reverse(item['link'], args=item['link_args'])
 214
 215     def item_description(self, item):
 216         return item['description']
 217
 218 @piwik_track
 219 class ByCategoryFeed(Feed):
 220     feed_type = OPDSFeed
 221     link = u'http://wolnelektury.pl/'
 222     description = u"Spis utworów na stronie http://WolneLektury.pl"
 223     author_name = u"Wolne Lektury"
 224     author_link = u"http://wolnelektury.pl/"
 225
 226     def get_object(self, request, category):
 227         feed = [feed for feed in _root_feeds if feed['category'] == category]
 228         if feed:
 229             feed = feed[0]
 230         else:
 231             raise Http404
 232
 233         return feed
 234
 235     def title(self, feed):
 236         return feed['title']
 237
 238     def items(self, feed):
 239         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
 240
 241     def item_title(self, item):
 242         return item.name
 243
 244     def item_link(self, item):
 245         return reverse("opds_by_tag", args=[item.category, item.slug])
 246
 247     def item_description(self):
 248         return u''
 249
 250 @piwik_track
 251 class ByTagFeed(AcquisitionFeed):
 252     def link(self, tag):
 253         return tag.get_absolute_url()
 254
 255     def title(self, tag):
 256         return tag.name
 257
 258     def description(self, tag):
 259         return u"Spis utworów na stronie http://WolneLektury.pl"
 260
 261     def get_object(self, request, category, slug):
 262         return get_object_or_404(Tag, category=category, slug=slug)
 263
 264     def items(self, tag):
 265         books = Book.tagged.with_any([tag])
 266         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
 267         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 268         if descendants_keys:
 269             books = books.exclude(pk__in=descendants_keys)
 270
 271         return books
 272
 273
 274 @factory_decorator(logged_in_or_basicauth())
 275 @piwik_track
 276 class UserFeed(Feed):
 277     feed_type = OPDSFeed
 278     link = u'http://www.wolnelektury.pl/'
 279     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 280     author_name = u"Wolne Lektury"
 281     author_link = u"http://wolnelektury.pl/"
 282
 283     def get_object(self, request):
 284         return request.user
 285
 286     def title(self, user):
 287         return u"Półki użytkownika %s" % user.username
 288
 289     def items(self, user):
 290         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
 291
 292     def item_title(self, item):
 293         return item.name
 294
 295     def item_link(self, item):
 296         return reverse("opds_user_set", args=[item.slug])
 297
 298     def item_description(self):
 299         return u''
 300
 301 # no class decorators in python 2.5
 302 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
 303
 304
 305 @factory_decorator(logged_in_or_basicauth())
 306 @piwik_track
 307 class UserSetFeed(AcquisitionFeed):
 308     def link(self, tag):
 309         return tag.get_absolute_url()
 310
 311     def title(self, tag):
 312         return tag.name
 313
 314     def description(self, tag):
 315         return u"Spis utworów na stronie http://WolneLektury.pl"
 316
 317     def get_object(self, request, slug):
 318         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 319
 320     def items(self, tag):
 321         return Book.tagged.with_any([tag])
 322
 323 # no class decorators in python 2.5
 324 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
 325
 326
 327 @piwik_track
 328 class SearchFeed(AcquisitionFeed):
 329     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 330     title = u"Wyniki wyszukiwania"
 331
 332     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
 333     INLINE_QUERY_RE = re.compile(
 334         r"author:" + QUOTE_OR_NOT +
 335         "|translator:" + QUOTE_OR_NOT +
 336         "|title:" + QUOTE_OR_NOT +
 337         "|categories:" + QUOTE_OR_NOT +
 338         "|description:" + QUOTE_OR_NOT +
 339         "|text:" + QUOTE_OR_NOT
 340         )
 341     MATCHES = {
 342         'author': (0, 1),
 343         'translator': (2, 3),
 344         'title': (4, 5),
 345         'categories': (6, 7),
 346         'description': (8, 9),
 347         'text': (10, 11),
 348         }
 349
 350     PARAMS_TO_FIELDS = {
 351         'author': 'authors',
 352         'translator': 'translators',
 353         #        'title': 'title',
 354         'categories': 'tag_name_pl',
 355         'description': 'text',
 356         #        'text': 'text',
 357         }
 358
 359     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
 360
 361     def get_object(self, request):
 362         """
 363         For OPDS 1.1 We should handle a query for search terms
 364         and criteria provided either as opensearch or 'inline' query.
 365         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 366         atom:title. Inline query provides author, title, categories (treated as book tags),
 367         description (treated as content search terms).
 368
 369         if search terms are provided, we shall search for books
 370         according to Hint information (from author & contributror & title).
 371
 372         but if search terms are empty, we should do a different search
 373         (perhaps for is_book=True)
 374
 375         """
 376
 377         query = request.GET.get('q', '')
 378
 379         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 380         if inline_criteria:
 381             remains = re.sub(self.INLINE_QUERY_RE, '', query)
 382             remains = re.sub(r'[ \t]+', ' ', remains)
 383
 384             def get_criteria(criteria, name):
 385                 for c in criteria:
 386                     for p in self.MATCHES[name]:
 387                         if c[p]:
 388                             if p % 2 == 0:
 389                                 return c[p].replace('+', ' ')
 390                             return c[p]
 391                 return None
 392
 393             criteria = dict(map(
 394                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
 395                 ['author', 'translator', 'title', 'categories',
 396                  'description', 'text']))
 397             query = remains
 398             # empty query and text set case?
 399             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 400         else:
 401             def remove_dump_data(val):
 402                 """Some clients don't get opds placeholders and just send them."""
 403                 if self.ATOM_PLACEHOLDER.match(val):
 404                     return ''
 405                 return val
 406
 407             criteria = dict([(cn, remove_dump_data(request.GET.get(cn, '')))
 408                         for cn in self.MATCHES.keys()])
 409             # query is set above.
 410             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 411
 412         srch = Search()
 413
 414         book_hit_filter = srch.index.Q(book_id__any=True)
 415         filters = [book_hit_filter] + [srch.index.Q(
 416             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
 417             ) for cn in self.MATCHES.keys() if cn in criteria
 418             if criteria[cn]]
 419
 420         if query:
 421             q = srch.index.query(
 422                 reduce(operator.or_,
 423                        [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query})
 424                         for cn in self.MATCHES.keys()],
 425                 srch.index.Q()))
 426         else:
 427             q = srch.index.query(srch.index.Q())
 428
 429         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
 430         results = q.execute()
 431
 432         book_scores = dict([(r['book_id'], r['score']) for r in results])
 433         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
 434         books = list(books)
 435         books.sort(reverse=True, key=lambda book: book_scores[book.id])
 436         return books
 437
 438     def get_link(self, query):
 439         return "%s?q=%s" % (reverse('search'), query)
 440
 441     def items(self, books):
 442         try:
 443             return books
 444         except ValueError:
 445             # too short a query
 446             return []