apps/opds/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import os.path
   6 from urlparse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.core.urlresolvers import reverse
  10 from django.shortcuts import get_object_or_404
  11 from django.utils.feedgenerator import Atom1Feed
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15
  16 from basicauth import logged_in_or_basicauth, factory_decorator
  17 from catalogue.models import Book, Tag
  18
  19 from search.views import Search, SearchResult
  20 import operator
  21 import logging
  22 import re
  23
  24 log = logging.getLogger('opds')
  25
  26 from stats.utils import piwik_track
  27
  28 _root_feeds = (
  29     {
  30         u"category": u"",
  31         u"link": u"opds_user",
  32         u"link_args": [],
  33         u"title": u"Moje półki",
  34         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  35     },
  36     {
  37         u"category": u"author",
  38         u"link": u"opds_by_category",
  39         u"link_args": [u"author"],
  40         u"title": u"Autorzy",
  41         u"description": u"Utwory wg autorów"
  42     },
  43     {
  44         u"category": u"kind",
  45         u"link": u"opds_by_category",
  46         u"link_args": [u"kind"],
  47         u"title": u"Rodzaje",
  48         u"description": u"Utwory wg rodzajów"
  49     },
  50     {
  51         u"category": u"genre",
  52         u"link": u"opds_by_category",
  53         u"link_args": [u"genre"],
  54         u"title": u"Gatunki",
  55         u"description": u"Utwory wg gatunków"
  56     },
  57     {
  58         u"category": u"epoch",
  59         u"link": u"opds_by_category",
  60         u"link_args": [u"epoch"],
  61         u"title": u"Epoki",
  62         u"description": u"Utwory wg epok"
  63     },
  64 )
  65
  66
  67 def full_url(url):
  68     return urljoin("http://%s" % Site.objects.get_current().domain, url)
  69
  70
  71 class OPDSFeed(Atom1Feed):
  72     link_rel = u"subsection"
  73     link_type = u"application/atom+xml"
  74
  75     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
  76     try:
  77         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  78     except:
  79         _book_parent_img_size = ''
  80
  81     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
  82     try:
  83         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  84     except:
  85         _book_img_size = ''
  86
  87
  88     def add_root_elements(self, handler):
  89         super(OPDSFeed, self).add_root_elements(handler)
  90         handler.addQuickElement(u"link", None,
  91                                 {u"href": reverse("opds_authors"),
  92                                  u"rel": u"start",
  93                                  u"type": u"application/atom+xml"})
  94         handler.addQuickElement(u"link", None,
  95                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  96                                  u"rel": u"search",
  97                                  u"type": u"application/opensearchdescription+xml"})
  98
  99
 100     def add_item_elements(self, handler, item):
 101         """ modified from Atom1Feed.add_item_elements """
 102         handler.addQuickElement(u"title", item['title'])
 103
 104         # add a OPDS Navigation link if there's no enclosure
 105         if item['enclosure'] is None:
 106             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 107             # add a "green book" icon
 108             handler.addQuickElement(u"link", '',
 109                 {u"rel": u"http://opds-spec.org/thumbnail",
 110                  u"href": self._book_parent_img,
 111                  u"length": self._book_parent_img_size,
 112                  u"type": u"image/png"})
 113         if item['pubdate'] is not None:
 114             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 115
 116         # Author information.
 117         if item['author_name'] is not None:
 118             handler.startElement(u"author", {})
 119             handler.addQuickElement(u"name", item['author_name'])
 120             if item['author_email'] is not None:
 121                 handler.addQuickElement(u"email", item['author_email'])
 122             if item['author_link'] is not None:
 123                 handler.addQuickElement(u"uri", item['author_link'])
 124             handler.endElement(u"author")
 125
 126         # Unique ID.
 127         if item['unique_id'] is not None:
 128             unique_id = item['unique_id']
 129         else:
 130             unique_id = get_tag_uri(item['link'], item['pubdate'])
 131         handler.addQuickElement(u"id", unique_id)
 132
 133         # Summary.
 134         # OPDS needs type=text
 135         if item['description'] is not None:
 136             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 137
 138         # Enclosure as OPDS Acquisition Link
 139         if item['enclosure'] is not None:
 140             handler.addQuickElement(u"link", '',
 141                 {u"rel": u"http://opds-spec.org/acquisition",
 142                  u"href": item['enclosure'].url,
 143                  u"length": item['enclosure'].length,
 144                  u"type": item['enclosure'].mime_type})
 145             # add a "red book" icon
 146             handler.addQuickElement(u"link", '',
 147                 {u"rel": u"http://opds-spec.org/thumbnail",
 148                  u"href": self._book_img,
 149                  u"length": self._book_img_size,
 150                  u"type": u"image/png"})
 151
 152         # Categories.
 153         for cat in item['categories']:
 154             handler.addQuickElement(u"category", u"", {u"term": cat})
 155
 156         # Rights.
 157         if item['item_copyright'] is not None:
 158             handler.addQuickElement(u"rights", item['item_copyright'])
 159
 160
 161 class AcquisitionFeed(Feed):
 162     feed_type = OPDSFeed
 163     link = u'http://www.wolnelektury.pl/'
 164     item_enclosure_mime_type = "application/epub+zip"
 165     author_name = u"Wolne Lektury"
 166     author_link = u"http://www.wolnelektury.pl/"
 167
 168     def item_title(self, book):
 169         return book.title
 170
 171     def item_description(self):
 172         return u''
 173
 174     def item_link(self, book):
 175         return book.get_absolute_url()
 176
 177     def item_author_name(self, book):
 178         try:
 179             return book.tags.filter(category='author')[0].name
 180         except KeyError:
 181             return u''
 182
 183     def item_author_link(self, book):
 184         try:
 185             return book.tags.filter(category='author')[0].get_absolute_url()
 186         except KeyError:
 187             return u''
 188
 189     def item_enclosure_url(self, book):
 190         return full_url(book.epub_file.url) if book.epub_file else None
 191
 192     def item_enclosure_length(self, book):
 193         return book.epub_file.size if book.epub_file else None
 194
 195 @piwik_track
 196 class RootFeed(Feed):
 197     feed_type = OPDSFeed
 198     title = u'Wolne Lektury'
 199     link = u'http://wolnelektury.pl/'
 200     description = u"Spis utworów na stronie http://WolneLektury.pl"
 201     author_name = u"Wolne Lektury"
 202     author_link = u"http://wolnelektury.pl/"
 203
 204     def items(self):
 205         return _root_feeds
 206
 207     def item_title(self, item):
 208         return item['title']
 209
 210     def item_link(self, item):
 211         return reverse(item['link'], args=item['link_args'])
 212
 213     def item_description(self, item):
 214         return item['description']
 215
 216 @piwik_track
 217 class ByCategoryFeed(Feed):
 218     feed_type = OPDSFeed
 219     link = u'http://wolnelektury.pl/'
 220     description = u"Spis utworów na stronie http://WolneLektury.pl"
 221     author_name = u"Wolne Lektury"
 222     author_link = u"http://wolnelektury.pl/"
 223
 224     def get_object(self, request, category):
 225         feed = [feed for feed in _root_feeds if feed['category']==category]
 226         if feed:
 227             feed = feed[0]
 228         else:
 229             raise Http404
 230
 231         return feed
 232
 233     def title(self, feed):
 234         return feed['title']
 235
 236     def items(self, feed):
 237         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
 238
 239     def item_title(self, item):
 240         return item.name
 241
 242     def item_link(self, item):
 243         return reverse("opds_by_tag", args=[item.category, item.slug])
 244
 245     def item_description(self):
 246         return u''
 247
 248 @piwik_track
 249 class ByTagFeed(AcquisitionFeed):
 250     def link(self, tag):
 251         return tag.get_absolute_url()
 252
 253     def title(self, tag):
 254         return tag.name
 255
 256     def description(self, tag):
 257         return u"Spis utworów na stronie http://WolneLektury.pl"
 258
 259     def get_object(self, request, category, slug):
 260         return get_object_or_404(Tag, category=category, slug=slug)
 261
 262     def items(self, tag):
 263         books = Book.tagged.with_any([tag])
 264         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
 265         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 266         if descendants_keys:
 267             books = books.exclude(pk__in=descendants_keys)
 268
 269         return books
 270
 271
 272 @factory_decorator(logged_in_or_basicauth())
 273 @piwik_track
 274 class UserFeed(Feed):
 275     feed_type = OPDSFeed
 276     link = u'http://www.wolnelektury.pl/'
 277     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 278     author_name = u"Wolne Lektury"
 279     author_link = u"http://wolnelektury.pl/"
 280
 281     def get_object(self, request):
 282         return request.user
 283
 284     def title(self, user):
 285         return u"Półki użytkownika %s" % user.username
 286
 287     def items(self, user):
 288         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
 289
 290     def item_title(self, item):
 291         return item.name
 292
 293     def item_link(self, item):
 294         return reverse("opds_user_set", args=[item.slug])
 295
 296     def item_description(self):
 297         return u''
 298
 299 # no class decorators in python 2.5
 300 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
 301
 302
 303 @factory_decorator(logged_in_or_basicauth())
 304 @piwik_track
 305 class UserSetFeed(AcquisitionFeed):
 306     def link(self, tag):
 307         return tag.get_absolute_url()
 308
 309     def title(self, tag):
 310         return tag.name
 311
 312     def description(self, tag):
 313         return u"Spis utworów na stronie http://WolneLektury.pl"
 314
 315     def get_object(self, request, slug):
 316         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 317
 318     def items(self, tag):
 319         return Book.tagged.with_any([tag])
 320
 321 # no class decorators in python 2.5
 322 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
 323
 324
 325 @piwik_track
 326 class SearchFeed(AcquisitionFeed):
 327     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 328     title = u"Wyniki wyszukiwania"
 329
 330     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
 331     INLINE_QUERY_RE = re.compile(
 332         r"author:" + QUOTE_OR_NOT +
 333         "|translator:" + QUOTE_OR_NOT +
 334         "|title:" + QUOTE_OR_NOT +
 335         "|categories:" + QUOTE_OR_NOT +
 336         "|description:" + QUOTE_OR_NOT +
 337         "|text:" + QUOTE_OR_NOT
 338         )
 339     MATCHES = {
 340         'author': (0, 1),
 341         'translator': (2, 3),
 342         'title': (4, 5),
 343         'categories': (6, 7),
 344         'description': (8, 9),
 345         'text': (10, 11),
 346         }
 347
 348     PARAMS_TO_FIELDS = {
 349         'author': 'authors',
 350         'translator': 'translators',
 351         #        'title': 'title',
 352         'categories': 'tag_name_pl',
 353         'description': 'text',
 354         #        'text': 'text',
 355         }
 356
 357     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
 358
 359     def get_object(self, request):
 360         """
 361         For OPDS 1.1 We should handle a query for search terms
 362         and criteria provided either as opensearch or 'inline' query.
 363         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 364         atom:title. Inline query provides author, title, categories (treated as book tags),
 365         description (treated as content search terms).
 366
 367         if search terms are provided, we shall search for books
 368         according to Hint information (from author & contributror & title).
 369
 370         but if search terms are empty, we should do a different search
 371         (perhaps for is_book=True)
 372
 373         """
 374
 375         query = request.GET.get('q', '')
 376
 377         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 378         if inline_criteria:
 379             remains = re.sub(self.INLINE_QUERY_RE, '', query)
 380             remains = re.sub(r'[ \t]+', ' ', remains)
 381
 382             def get_criteria(criteria, name):
 383                 for c in criteria:
 384                     for p in self.MATCHES[name]:
 385                         if c[p]:
 386                             if p % 2 == 0:
 387                                 return c[p].replace('+', ' ')
 388                             return c[p]
 389                 return None
 390
 391             criteria = dict(map(
 392                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
 393                 ['author', 'translator', 'title', 'categories',
 394                  'description', 'text']))
 395             query = remains
 396             # empty query and text set case?
 397             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 398         else:
 399             def remove_dump_data(val):
 400                 """Some clients don't get opds placeholders and just send them."""
 401                 if self.ATOM_PLACEHOLDER.match(val):
 402                     return ''
 403                 return val
 404
 405             criteria = dict([(cn, remove_dump_data(request.GET.get(cn, '')))
 406                         for cn in self.MATCHES.keys()])
 407             # query is set above.
 408             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 409
 410         srch = Search()
 411
 412         book_hit_filter = srch.index.Q(book_id__any=True)
 413         filters = [book_hit_filter] + [srch.index.Q(
 414             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
 415             ) for cn in self.MATCHES.keys() if cn in criteria
 416             if criteria[cn]]
 417
 418         if query:
 419             q = srch.index.query(
 420                 reduce(operator.or_,
 421                        [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query})
 422                         for cn in self.MATCHES.keys()],
 423                 srch.index.Q()))
 424         else:
 425             q = srch.index.query(srch.index.Q())
 426
 427         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
 428         results = q.execute()
 429
 430         book_scores = dict([(r['book_id'], r['score']) for r in results])
 431         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
 432         books = list(books)
 433         books.sort(reverse=True, key=lambda book: book_scores[book.id])
 434         return books
 435
 436     def get_link(self, query):
 437         return "%s?q=%s" % (reverse('search'), query)
 438
 439     def items(self, books):
 440         try:
 441             return books
 442         except ValueError:
 443             # too short a query
 444             return []