src/opds/views.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from functools import reduce
   5 import os.path
   6 from urllib.parse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.shortcuts import get_object_or_404
  10 from django.urls import reverse
  11 from django.utils.feedgenerator import Atom1Feed
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15 from django.utils.functional import lazy
  16
  17 from basicauth import logged_in_or_basicauth, factory_decorator
  18 from catalogue.models import Book, Tag
  19
  20 from search.views import Search
  21 import operator
  22 import logging
  23 import re
  24
  25 from stats.utils import piwik_track
  26
  27 log = logging.getLogger('opds')
  28
  29 _root_feeds = (
  30     {
  31         u"category": u"",
  32         u"link": u"opds_user",
  33         u"link_args": [],
  34         u"title": u"Moje półki",
  35         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  36     },
  37     {
  38         u"category": u"author",
  39         u"link": u"opds_by_category",
  40         u"link_args": [u"author"],
  41         u"title": u"Autorzy",
  42         u"description": u"Utwory wg autorów"
  43     },
  44     {
  45         u"category": u"kind",
  46         u"link": u"opds_by_category",
  47         u"link_args": [u"kind"],
  48         u"title": u"Rodzaje",
  49         u"description": u"Utwory wg rodzajów"
  50     },
  51     {
  52         u"category": u"genre",
  53         u"link": u"opds_by_category",
  54         u"link_args": [u"genre"],
  55         u"title": u"Gatunki",
  56         u"description": u"Utwory wg gatunków"
  57     },
  58     {
  59         u"category": u"epoch",
  60         u"link": u"opds_by_category",
  61         u"link_args": [u"epoch"],
  62         u"title": u"Epoki",
  63         u"description": u"Utwory wg epok"
  64     },
  65 )
  66
  67
  68 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
  69
  70
  71 def full_url(url):
  72     return urljoin("http://%s" % current_domain, url)
  73
  74
  75 class OPDSFeed(Atom1Feed):
  76     link_rel = u"subsection"
  77     link_type = u"application/atom+xml"
  78
  79     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
  80     try:
  81         _book_parent_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  82     except OSError:
  83         _book_parent_img_size = ''
  84
  85     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
  86     try:
  87         _book_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  88     except OSError:
  89         _book_img_size = ''
  90
  91     def add_root_elements(self, handler):
  92         super(OPDSFeed, self).add_root_elements(handler)
  93         handler.addQuickElement(u"link", None,
  94                                 {u"href": reverse("opds_authors"),
  95                                  u"rel": u"start",
  96                                  u"type": u"application/atom+xml"})
  97         handler.addQuickElement(u"link", None,
  98                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  99                                  u"rel": u"search",
 100                                  u"type": u"application/opensearchdescription+xml"})
 101
 102     def add_item_elements(self, handler, item):
 103         """ modified from Atom1Feed.add_item_elements """
 104         handler.addQuickElement(u"title", item['title'])
 105
 106         # add a OPDS Navigation link if there's no enclosure
 107         if not item.get('enclosures') is None:
 108             handler.addQuickElement(
 109                 u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 110             # add a "green book" icon
 111             handler.addQuickElement(
 112                 u"link", '',
 113                 {
 114                     u"rel": u"http://opds-spec.org/thumbnail",
 115                     u"href": self._book_parent_img,
 116                     u"length": self._book_parent_img_size,
 117                     u"type": u"image/png",
 118                 })
 119         if item['pubdate'] is not None:
 120             # FIXME: rfc3339_date is undefined, is this ever run?
 121             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 122
 123         # Author information.
 124         if item['author_name'] is not None:
 125             handler.startElement(u"author", {})
 126             handler.addQuickElement(u"name", item['author_name'])
 127             if item['author_email'] is not None:
 128                 handler.addQuickElement(u"email", item['author_email'])
 129             if item['author_link'] is not None:
 130                 handler.addQuickElement(u"uri", item['author_link'])
 131             handler.endElement(u"author")
 132
 133         # Unique ID.
 134         if item['unique_id'] is not None:
 135             unique_id = item['unique_id']
 136         else:
 137             # FIXME: get_tag_uri is undefined, is this ever run?
 138             unique_id = get_tag_uri(item['link'], item['pubdate'])
 139         handler.addQuickElement(u"id", unique_id)
 140
 141         # Summary.
 142         # OPDS needs type=text
 143         if item['description'] is not None:
 144             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 145
 146         # Enclosure as OPDS Acquisition Link
 147         for enc in item.get('enclosures', []):
 148             handler.addQuickElement(
 149                 u"link", '',
 150                 {
 151                     u"rel": u"http://opds-spec.org/acquisition",
 152                     u"href": enc.url,
 153                     u"length": enc.length,
 154                     u"type": enc.mime_type,
 155                 })
 156             # add a "red book" icon
 157             handler.addQuickElement(
 158                 u"link", '',
 159                 {
 160                     u"rel": u"http://opds-spec.org/thumbnail",
 161                     u"href": self._book_img,
 162                     u"length": self._book_img_size,
 163                     u"type": u"image/png",
 164                 })
 165
 166         # Categories.
 167         for cat in item['categories']:
 168             handler.addQuickElement(u"category", u"", {u"term": cat})
 169
 170         # Rights.
 171         if item['item_copyright'] is not None:
 172             handler.addQuickElement(u"rights", item['item_copyright'])
 173
 174
 175 class AcquisitionFeed(Feed):
 176     feed_type = OPDSFeed
 177     link = u'http://www.wolnelektury.pl/'
 178     item_enclosure_mime_type = "application/epub+zip"
 179     author_name = u"Wolne Lektury"
 180     author_link = u"http://www.wolnelektury.pl/"
 181
 182     def item_title(self, book):
 183         return book.title
 184
 185     def item_description(self):
 186         return u''
 187
 188     def item_link(self, book):
 189         return book.get_absolute_url()
 190
 191     def item_author_name(self, book):
 192         try:
 193             return book.authors().first().name
 194         except AttributeError:
 195             return u''
 196
 197     def item_author_link(self, book):
 198         try:
 199             return book.authors().first().get_absolute_url()
 200         except AttributeError:
 201             return u''
 202
 203     def item_enclosure_url(self, book):
 204         return full_url(book.epub_url()) if book.epub_file else None
 205
 206     def item_enclosure_length(self, book):
 207         return book.epub_file.size if book.epub_file else None
 208
 209
 210 @piwik_track
 211 class RootFeed(Feed):
 212     feed_type = OPDSFeed
 213     title = u'Wolne Lektury'
 214     link = u'http://wolnelektury.pl/'
 215     description = u"Spis utworów na stronie http://WolneLektury.pl"
 216     author_name = u"Wolne Lektury"
 217     author_link = u"http://wolnelektury.pl/"
 218
 219     def items(self):
 220         return _root_feeds
 221
 222     def item_title(self, item):
 223         return item['title']
 224
 225     def item_link(self, item):
 226         return reverse(item['link'], args=item['link_args'])
 227
 228     def item_description(self, item):
 229         return item['description']
 230
 231
 232 @piwik_track
 233 class ByCategoryFeed(Feed):
 234     feed_type = OPDSFeed
 235     link = u'http://wolnelektury.pl/'
 236     description = u"Spis utworów na stronie http://WolneLektury.pl"
 237     author_name = u"Wolne Lektury"
 238     author_link = u"http://wolnelektury.pl/"
 239
 240     def get_object(self, request, category):
 241         feed = [feed for feed in _root_feeds if feed['category'] == category]
 242         if feed:
 243             feed = feed[0]
 244         else:
 245             raise Http404
 246
 247         return feed
 248
 249     def title(self, feed):
 250         return feed['title']
 251
 252     def items(self, feed):
 253         return Tag.objects.filter(category=feed['category']).exclude(items=None)
 254
 255     def item_title(self, item):
 256         return item.name
 257
 258     def item_link(self, item):
 259         return reverse("opds_by_tag", args=[item.category, item.slug])
 260
 261     def item_description(self):
 262         return u''
 263
 264
 265 @piwik_track
 266 class ByTagFeed(AcquisitionFeed):
 267     def link(self, tag):
 268         return tag.get_absolute_url()
 269
 270     def title(self, tag):
 271         return tag.name
 272
 273     def description(self, tag):
 274         return u"Spis utworów na stronie http://WolneLektury.pl"
 275
 276     def get_object(self, request, category, slug):
 277         return get_object_or_404(Tag, category=category, slug=slug)
 278
 279     def items(self, tag):
 280         return Book.tagged_top_level([tag])
 281
 282
 283 @factory_decorator(logged_in_or_basicauth())
 284 @piwik_track
 285 class UserFeed(Feed):
 286     feed_type = OPDSFeed
 287     link = u'http://www.wolnelektury.pl/'
 288     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 289     author_name = u"Wolne Lektury"
 290     author_link = u"http://wolnelektury.pl/"
 291
 292     def get_object(self, request):
 293         return request.user
 294
 295     def title(self, user):
 296         return u"Półki użytkownika %s" % user.username
 297
 298     def items(self, user):
 299         return Tag.objects.filter(category='set', user=user).exclude(items=None)
 300
 301     def item_title(self, item):
 302         return item.name
 303
 304     def item_link(self, item):
 305         return reverse("opds_user_set", args=[item.slug])
 306
 307     def item_description(self):
 308         return u''
 309
 310
 311 @factory_decorator(logged_in_or_basicauth())
 312 @piwik_track
 313 class UserSetFeed(AcquisitionFeed):
 314     def link(self, tag):
 315         return tag.get_absolute_url()
 316
 317     def title(self, tag):
 318         return tag.name
 319
 320     def description(self, tag):
 321         return u"Spis utworów na stronie http://WolneLektury.pl"
 322
 323     def get_object(self, request, slug):
 324         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 325
 326     def items(self, tag):
 327         return Book.tagged.with_any([tag])
 328
 329
 330 @piwik_track
 331 class SearchFeed(AcquisitionFeed):
 332     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 333     title = u"Wyniki wyszukiwania"
 334
 335     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
 336     INLINE_QUERY_RE = re.compile(
 337         r"author:" + QUOTE_OR_NOT +
 338         "|translator:" + QUOTE_OR_NOT +
 339         "|title:" + QUOTE_OR_NOT +
 340         "|categories:" + QUOTE_OR_NOT +
 341         "|description:" + QUOTE_OR_NOT +
 342         "|text:" + QUOTE_OR_NOT
 343         )
 344     MATCHES = {
 345         'author': (0, 1),
 346         'translator': (2, 3),
 347         'title': (4, 5),
 348         'categories': (6, 7),
 349         'description': (8, 9),
 350         'text': (10, 11),
 351         }
 352
 353     PARAMS_TO_FIELDS = {
 354         'author': 'authors',
 355         'translator': 'translators',
 356         #        'title': 'title',
 357         'categories': 'tag_name_pl',
 358         'description': 'text',
 359         #        'text': 'text',
 360         }
 361
 362     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
 363
 364     def get_object(self, request):
 365         """
 366         For OPDS 1.1 We should handle a query for search terms
 367         and criteria provided either as opensearch or 'inline' query.
 368         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 369         atom:title. Inline query provides author, title, categories (treated as book tags),
 370         description (treated as content search terms).
 371
 372         if search terms are provided, we shall search for books
 373         according to Hint information (from author & contributror & title).
 374
 375         but if search terms are empty, we should do a different search
 376         (perhaps for is_book=True)
 377
 378         """
 379
 380         query = request.GET.get('q', '')
 381
 382         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 383         if inline_criteria:
 384             remains = re.sub(self.INLINE_QUERY_RE, '', query)
 385             remains = re.sub(r'[ \t]+', ' ', remains)
 386
 387             def get_criteria(criteria, name):
 388                 for c in criteria:
 389                     for p in self.MATCHES[name]:
 390                         if c[p]:
 391                             if p % 2 == 0:
 392                                 return c[p].replace('+', ' ')
 393                             return c[p]
 394                 return None
 395
 396             criteria = dict(map(
 397                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
 398                 ['author', 'translator', 'title', 'categories',
 399                  'description', 'text']))
 400             query = remains
 401             # empty query and text set case?
 402             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 403         else:
 404             def remove_dump_data(val):
 405                 """Some clients don't get opds placeholders and just send them."""
 406                 if self.ATOM_PLACEHOLDER.match(val):
 407                     return ''
 408                 return val
 409
 410             criteria = dict(
 411                 (cn, remove_dump_data(request.GET.get(cn, '')))
 412                 for cn in self.MATCHES.keys())
 413             # query is set above.
 414             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 415
 416         srch = Search()
 417
 418         book_hit_filter = srch.index.Q(book_id__any=True)
 419         filters = [book_hit_filter] + [srch.index.Q(
 420             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
 421             ) for cn in self.MATCHES.keys() if cn in criteria
 422             if criteria[cn]]
 423
 424         if query:
 425             q = srch.index.query(
 426                 reduce(
 427                     operator.or_,
 428                     [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query}) for cn in self.MATCHES.keys()],
 429                     srch.index.Q()))
 430         else:
 431             q = srch.index.query(srch.index.Q())
 432
 433         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
 434         results = q.execute()
 435
 436         book_scores = dict([(r['book_id'], r['score']) for r in results])
 437         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
 438         books = list(books)
 439         books.sort(reverse=True, key=lambda book: book_scores[book.id])
 440         return books
 441
 442     def get_link(self, query):
 443         return "%s?q=%s" % (reverse('search'), query)
 444
 445     def items(self, books):
 446         try:
 447             return books
 448         except ValueError:
 449             # too short a query
 450             return []