src/opds/views.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from functools import reduce
   5 import os.path
   6 from urllib.parse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.shortcuts import get_object_or_404
  10 from django.urls import reverse
  11 from django.utils.feedgenerator import Atom1Feed, Enclosure
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15 from django.utils.functional import lazy
  16
  17 from basicauth import logged_in_or_basicauth, factory_decorator
  18 from catalogue.models import Book, Tag
  19 from search.utils import UnaccentSearchQuery, UnaccentSearchVector
  20
  21 import operator
  22 import logging
  23 import re
  24
  25 from stats.utils import piwik_track
  26
  27 log = logging.getLogger('opds')
  28
  29 _root_feeds = (
  30     {
  31         "category": "",
  32         "link": "opds_user",
  33         "link_args": [],
  34         "title": "Moje półki",
  35         "description": "Półki użytkownika dostępne po zalogowaniu"
  36     },
  37     {
  38         "category": "author",
  39         "link": "opds_by_category",
  40         "link_args": ["author"],
  41         "title": "Autorzy",
  42         "description": "Utwory wg autorów"
  43     },
  44     {
  45         "category": "kind",
  46         "link": "opds_by_category",
  47         "link_args": ["kind"],
  48         "title": "Rodzaje",
  49         "description": "Utwory wg rodzajów"
  50     },
  51     {
  52         "category": "genre",
  53         "link": "opds_by_category",
  54         "link_args": ["genre"],
  55         "title": "Gatunki",
  56         "description": "Utwory wg gatunków"
  57     },
  58     {
  59         "category": "epoch",
  60         "link": "opds_by_category",
  61         "link_args": ["epoch"],
  62         "title": "Epoki",
  63         "description": "Utwory wg epok"
  64     },
  65 )
  66
  67
  68 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
  69
  70
  71 def full_url(url):
  72     return urljoin("http://%s" % current_domain, url)
  73
  74
  75 class OPDSFeed(Atom1Feed):
  76     link_rel = "subsection"
  77     link_type = "application/atom+xml"
  78
  79     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
  80     try:
  81         _book_parent_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  82     except OSError:
  83         _book_parent_img_size = ''
  84
  85     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
  86     try:
  87         _book_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  88     except OSError:
  89         _book_img_size = ''
  90
  91     def add_root_elements(self, handler):
  92         super(OPDSFeed, self).add_root_elements(handler)
  93         handler.addQuickElement("link", None,
  94                                 {"href": reverse("opds_authors"),
  95                                  "rel": "start",
  96                                  "type": "application/atom+xml"})
  97         handler.addQuickElement("link", None,
  98                                 {"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  99                                  "rel": "search",
 100                                  "type": "application/opensearchdescription+xml"})
 101
 102     def add_item_elements(self, handler, item):
 103         """ modified from Atom1Feed.add_item_elements """
 104         handler.addQuickElement("title", item['title'])
 105
 106         # add a OPDS Navigation link if there's no enclosure
 107         if not item.get('enclosures') is None:
 108             handler.addQuickElement(
 109                 "link", "", {"href": item['link'], "rel": "subsection", "type": "application/atom+xml"})
 110             # add a "green book" icon
 111             handler.addQuickElement(
 112                 "link", '',
 113                 {
 114                     "rel": "http://opds-spec.org/thumbnail",
 115                     "href": self._book_parent_img,
 116                     "length": self._book_parent_img_size,
 117                     "type": "image/png",
 118                 })
 119         if item['pubdate'] is not None:
 120             # FIXME: rfc3339_date is undefined, is this ever run?
 121             handler.addQuickElement("updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 122
 123         # Author information.
 124         if item['author_name'] is not None:
 125             handler.startElement("author", {})
 126             handler.addQuickElement("name", item['author_name'])
 127             if item['author_email'] is not None:
 128                 handler.addQuickElement("email", item['author_email'])
 129             if item['author_link'] is not None:
 130                 handler.addQuickElement("uri", item['author_link'])
 131             handler.endElement("author")
 132
 133         # Unique ID.
 134         if item['unique_id'] is not None:
 135             unique_id = item['unique_id']
 136         else:
 137             # FIXME: get_tag_uri is undefined, is this ever run?
 138             unique_id = get_tag_uri(item['link'], item['pubdate'])
 139         handler.addQuickElement("id", unique_id)
 140
 141         # Summary.
 142         # OPDS needs type=text
 143         if item['description'] is not None:
 144             handler.addQuickElement("summary", item['description'], {"type": "text"})
 145
 146         # Enclosure as OPDS Acquisition Link
 147         for enc in item.get('enclosures', []):
 148             handler.startElement(
 149                 "link",
 150                 {
 151                     "rel": "http://opds-spec.org/acquisition",
 152                     "href": enc.url,
 153                     "length": str(enc.length),
 154                     "type": enc.mime_type,
 155                 })
 156             if hasattr(enc, 'indirect'):
 157                 NS = 'http://opds-spec.org/2010/catalog'
 158                 handler.startPrefixMapping('opds', NS)
 159                 handler.startElementNS((NS, 'indirectAcquisition'), 'opds:indirectAcquisition', {
 160                     (None, 'type'): enc.indirect,
 161                 })
 162                 handler.endElementNS((NS, 'indirectAcquisition'), 'opds:indirectAcquisition')
 163                 handler.endPrefixMapping('opds')
 164             handler.endElement('link')
 165         # add a "red book" icon
 166         handler.addQuickElement(
 167             "link", '',
 168             {
 169                 "rel": "http://opds-spec.org/thumbnail",
 170                 "href": self._book_img,
 171                 "length": self._book_img_size,
 172                 "type": "image/png",
 173             })
 174
 175         # Categories.
 176         for cat in item['categories']:
 177             handler.addQuickElement("category", "", {"term": cat})
 178
 179         # Rights.
 180         if item['item_copyright'] is not None:
 181             handler.addQuickElement("rights", item['item_copyright'])
 182
 183
 184 class AcquisitionFeed(Feed):
 185     feed_type = OPDSFeed
 186     link = 'http://www.wolnelektury.pl/'
 187     author_name = "Wolne Lektury"
 188     author_link = "http://www.wolnelektury.pl/"
 189
 190     def item_title(self, book):
 191         return book.title
 192
 193     def item_description(self):
 194         return ''
 195
 196     def item_link(self, book):
 197         return book.get_absolute_url()
 198
 199     def item_author_name(self, book):
 200         try:
 201             return book.authors().first().name
 202         except AttributeError:
 203             return ''
 204
 205     def item_author_link(self, book):
 206         try:
 207             return book.authors().first().get_absolute_url()
 208         except AttributeError:
 209             return ''
 210
 211     def item_enclosures(self, book):
 212         enc = []
 213         if book.epub_file:
 214             enc.append(Enclosure(
 215                 url=full_url(book.epub_url()),
 216                 length=book.epub_file.size,
 217                 mime_type="application/epub+zip"
 218             ))
 219         if book.has_mp3_file():
 220             e = Enclosure(
 221                 url=full_url(reverse('download_zip_mp3', args=[book.slug])),
 222                 length=sum(bm.file.size for bm in book.get_media('mp3')),
 223                 mime_type="application/zip"
 224             )
 225             e.indirect = 'audio/mpeg'
 226             enc.append(e)
 227         return enc
 228
 229
 230 @piwik_track
 231 class RootFeed(Feed):
 232     feed_type = OPDSFeed
 233     title = 'Wolne Lektury'
 234     link = 'http://wolnelektury.pl/'
 235     description = "Spis utworów na stronie http://WolneLektury.pl"
 236     author_name = "Wolne Lektury"
 237     author_link = "http://wolnelektury.pl/"
 238
 239     def items(self):
 240         return _root_feeds
 241
 242     def item_title(self, item):
 243         return item['title']
 244
 245     def item_link(self, item):
 246         return reverse(item['link'], args=item['link_args'])
 247
 248     def item_description(self, item):
 249         return item['description']
 250
 251
 252 @piwik_track
 253 class ByCategoryFeed(Feed):
 254     feed_type = OPDSFeed
 255     link = 'http://wolnelektury.pl/'
 256     description = "Spis utworów na stronie http://WolneLektury.pl"
 257     author_name = "Wolne Lektury"
 258     author_link = "http://wolnelektury.pl/"
 259
 260     def get_object(self, request, category):
 261         feed = [feed for feed in _root_feeds if feed['category'] == category]
 262         if feed:
 263             feed = feed[0]
 264         else:
 265             raise Http404
 266
 267         return feed
 268
 269     def title(self, feed):
 270         return feed['title']
 271
 272     def items(self, feed):
 273         return Tag.objects.filter(category=feed['category']).exclude(items=None)
 274
 275     def item_title(self, item):
 276         return item.name
 277
 278     def item_link(self, item):
 279         return reverse("opds_by_tag", args=[item.category, item.slug])
 280
 281     def item_description(self):
 282         return ''
 283
 284
 285 @piwik_track
 286 class ByTagFeed(AcquisitionFeed):
 287     def link(self, tag):
 288         return tag.get_absolute_url()
 289
 290     def title(self, tag):
 291         return tag.name
 292
 293     def description(self, tag):
 294         return "Spis utworów na stronie http://WolneLektury.pl"
 295
 296     def get_object(self, request, category, slug):
 297         return get_object_or_404(Tag, category=category, slug=slug)
 298
 299     def items(self, tag):
 300         qs = Book.tagged_top_level([tag])
 301         qs = qs.filter(preview=False, findable=True)
 302         return qs
 303
 304
 305 @factory_decorator(logged_in_or_basicauth())
 306 @piwik_track
 307 class UserFeed(Feed):
 308     feed_type = OPDSFeed
 309     link = 'http://www.wolnelektury.pl/'
 310     description = "Półki użytkownika na stronie http://WolneLektury.pl"
 311     author_name = "Wolne Lektury"
 312     author_link = "http://wolnelektury.pl/"
 313
 314     def get_object(self, request):
 315         return request.user
 316
 317     def title(self, user):
 318         return "Półki użytkownika %s" % user.username
 319
 320     def items(self, user):
 321         return Tag.objects.filter(category='set', user=user).exclude(items=None)
 322
 323     def item_title(self, item):
 324         return item.name
 325
 326     def item_link(self, item):
 327         return reverse("opds_user_set", args=[item.slug])
 328
 329     def item_description(self):
 330         return ''
 331
 332
 333 @factory_decorator(logged_in_or_basicauth())
 334 @piwik_track
 335 class UserSetFeed(AcquisitionFeed):
 336     def link(self, tag):
 337         return tag.get_absolute_url()
 338
 339     def title(self, tag):
 340         return tag.name
 341
 342     def description(self, tag):
 343         return "Spis utworów na stronie http://WolneLektury.pl"
 344
 345     def get_object(self, request, slug):
 346         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 347
 348     def items(self, tag):
 349         return Book.tagged.with_any([tag])
 350
 351
 352 @piwik_track
 353 class SearchFeed(AcquisitionFeed):
 354     description = "Wyniki wyszukiwania na stronie WolneLektury.pl"
 355     title = "Wyniki wyszukiwania"
 356
 357     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
 358     INLINE_QUERY_RE = re.compile(
 359         r"author:" + QUOTE_OR_NOT +
 360         "|translator:" + QUOTE_OR_NOT +
 361         "|title:" + QUOTE_OR_NOT +
 362         "|categories:" + QUOTE_OR_NOT +
 363         "|description:" + QUOTE_OR_NOT +
 364         "|text:" + QUOTE_OR_NOT
 365         )
 366     MATCHES = {
 367         'author': (0, 1),
 368         'translator': (2, 3),
 369         'title': (4, 5),
 370         'categories': (6, 7),
 371         'description': (8, 9),
 372         'text': (10, 11),
 373         }
 374
 375     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
 376
 377     def get_object(self, request):
 378         """
 379         For OPDS 1.1 We should handle a query for search terms
 380         and criteria provided either as opensearch or 'inline' query.
 381         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 382         atom:title. Inline query provides author, title, categories (treated as book tags),
 383         description (treated as content search terms).
 384
 385         if search terms are provided, we shall search for books
 386         according to Hint information (from author & contributror & title).
 387
 388         but if search terms are empty, we should do a different search
 389         (perhaps for is_book=True)
 390
 391         """
 392
 393         query = request.GET.get('q', '')
 394
 395         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 396         if inline_criteria:
 397             remains = re.sub(self.INLINE_QUERY_RE, '', query)
 398             remains = re.sub(r'[ \t]+', ' ', remains)
 399
 400             def get_criteria(criteria, name):
 401                 for c in criteria:
 402                     for p in self.MATCHES[name]:
 403                         if c[p]:
 404                             if p % 2 == 0:
 405                                 return c[p].replace('+', ' ')
 406                             return c[p]
 407                 return None
 408
 409             criteria = dict(map(
 410                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
 411                 ['author', 'translator', 'title', 'categories',
 412                  'description', 'text']))
 413             query = remains
 414             # empty query and text set case?
 415             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 416         else:
 417             def remove_dump_data(val):
 418                 """Some clients don't get opds placeholders and just send them."""
 419                 if self.ATOM_PLACEHOLDER.match(val):
 420                     return ''
 421                 return val
 422
 423             criteria = dict(
 424                 (cn, remove_dump_data(request.GET.get(cn, '')))
 425                 for cn in self.MATCHES.keys())
 426             # query is set above.
 427             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 428
 429         books = Book.objects.filter(findable=True, preview=False).annotate(
 430             search_vector=UnaccentSearchVector('title')
 431         )
 432         if query:
 433             squery = UnaccentSearchQuery(query, config=settings.SEARCH_CONFIG)
 434             books = books.filter(search_vector=squery)
 435         if criteria['author']:
 436             authors = Tag.objects.filter(category='author').annotate(
 437                 search_vector=UnaccentSearchVector('name_pl')
 438             ).filter(search_vector=UnaccentSearchQuery(criteria['author'], config=settings.SEARCH_CONFIG))
 439             books = books.filter(tag_relations__tag__in=authors)
 440         if criteria['categories']:
 441             tags = Tag.objects.filter(category__in=('genre', 'kind', 'epoch')).annotate(
 442                 search_vector=UnaccentSearchVector('name_pl')
 443             ).filter(search_vector=UnaccentSearchQuery(criteria['categories'], config=settings.SEARCH_CONFIG))
 444             books = books.filter(tag_relations__tag__in=tags)
 445         if criteria['translator']:
 446             # TODO
 447             pass
 448         if criteria['title']:
 449             books = books.filter(
 450                 search_vector=UnaccentSearchQuery(criteria['title'], config=settings.SEARCH_CONFIG)
 451             )
 452
 453         books = books.exclude(ancestor__in=books)
 454
 455         books = books.order_by('popularity__count')
 456         return books
 457
 458     def get_link(self, query):
 459         return "%s?q=%s" % (reverse('search'), query)
 460
 461     def items(self, books):
 462         try:
 463             return books
 464         except ValueError:
 465             # too short a query
 466             return []