src/opds/views.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from functools import reduce
   5 import os.path
   6 from urllib.parse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.shortcuts import get_object_or_404
  10 from django.urls import reverse
  11 from django.utils.feedgenerator import Atom1Feed, Enclosure
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15 from django.utils.functional import lazy
  16
  17 from basicauth import logged_in_or_basicauth, factory_decorator
  18 from catalogue.models import Book, Tag
  19 from search.utils import UnaccentSearchQuery, UnaccentSearchVector
  20 from social.models import UserList
  21
  22 import operator
  23 import logging
  24 import re
  25
  26 from stats.utils import piwik_track
  27
  28 log = logging.getLogger('opds')
  29
  30 _root_feeds = (
  31     {
  32         "category": "",
  33         "link": "opds_user",
  34         "link_args": [],
  35         "title": "Moje półki",
  36         "description": "Półki użytkownika dostępne po zalogowaniu"
  37     },
  38     {
  39         "category": "author",
  40         "link": "opds_by_category",
  41         "link_args": ["author"],
  42         "title": "Autorzy",
  43         "description": "Utwory wg autorów"
  44     },
  45     {
  46         "category": "kind",
  47         "link": "opds_by_category",
  48         "link_args": ["kind"],
  49         "title": "Rodzaje",
  50         "description": "Utwory wg rodzajów"
  51     },
  52     {
  53         "category": "genre",
  54         "link": "opds_by_category",
  55         "link_args": ["genre"],
  56         "title": "Gatunki",
  57         "description": "Utwory wg gatunków"
  58     },
  59     {
  60         "category": "epoch",
  61         "link": "opds_by_category",
  62         "link_args": ["epoch"],
  63         "title": "Epoki",
  64         "description": "Utwory wg epok"
  65     },
  66 )
  67
  68
  69 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
  70
  71
  72 def full_url(url):
  73     return urljoin("http://%s" % current_domain, url)
  74
  75
  76 class OPDSFeed(Atom1Feed):
  77     link_rel = "subsection"
  78     link_type = "application/atom+xml"
  79
  80     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
  81     try:
  82         _book_parent_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  83     except OSError:
  84         _book_parent_img_size = ''
  85
  86     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
  87     try:
  88         _book_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  89     except OSError:
  90         _book_img_size = ''
  91
  92     def add_root_elements(self, handler):
  93         super(OPDSFeed, self).add_root_elements(handler)
  94         handler.addQuickElement("link", None,
  95                                 {"href": reverse("opds_authors"),
  96                                  "rel": "start",
  97                                  "type": "application/atom+xml"})
  98         handler.addQuickElement("link", None,
  99                                 {"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
 100                                  "rel": "search",
 101                                  "type": "application/opensearchdescription+xml"})
 102
 103     def add_item_elements(self, handler, item):
 104         """ modified from Atom1Feed.add_item_elements """
 105         handler.addQuickElement("title", item['title'])
 106
 107         # add a OPDS Navigation link if there's no enclosure
 108         if not item.get('enclosures') is None:
 109             handler.addQuickElement(
 110                 "link", "", {"href": item['link'], "rel": "subsection", "type": "application/atom+xml"})
 111             # add a "green book" icon
 112             handler.addQuickElement(
 113                 "link", '',
 114                 {
 115                     "rel": "http://opds-spec.org/thumbnail",
 116                     "href": self._book_parent_img,
 117                     "length": self._book_parent_img_size,
 118                     "type": "image/png",
 119                 })
 120         if item['pubdate'] is not None:
 121             # FIXME: rfc3339_date is undefined, is this ever run?
 122             handler.addQuickElement("updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 123
 124         # Author information.
 125         if item['author_name'] is not None:
 126             handler.startElement("author", {})
 127             handler.addQuickElement("name", item['author_name'])
 128             if item['author_email'] is not None:
 129                 handler.addQuickElement("email", item['author_email'])
 130             if item['author_link'] is not None:
 131                 handler.addQuickElement("uri", item['author_link'])
 132             handler.endElement("author")
 133
 134         # Unique ID.
 135         if item['unique_id'] is not None:
 136             unique_id = item['unique_id']
 137         else:
 138             # FIXME: get_tag_uri is undefined, is this ever run?
 139             unique_id = get_tag_uri(item['link'], item['pubdate'])
 140         handler.addQuickElement("id", unique_id)
 141
 142         # Summary.
 143         # OPDS needs type=text
 144         if item['description'] is not None:
 145             handler.addQuickElement("summary", item['description'], {"type": "text"})
 146
 147         # Enclosure as OPDS Acquisition Link
 148         for enc in item.get('enclosures', []):
 149             handler.startElement(
 150                 "link",
 151                 {
 152                     "rel": "http://opds-spec.org/acquisition",
 153                     "href": enc.url,
 154                     "length": str(enc.length),
 155                     "type": enc.mime_type,
 156                 })
 157             if hasattr(enc, 'indirect'):
 158                 NS = 'http://opds-spec.org/2010/catalog'
 159                 handler.startPrefixMapping('opds', NS)
 160                 handler.startElementNS((NS, 'indirectAcquisition'), 'opds:indirectAcquisition', {
 161                     (None, 'type'): enc.indirect,
 162                 })
 163                 handler.endElementNS((NS, 'indirectAcquisition'), 'opds:indirectAcquisition')
 164                 handler.endPrefixMapping('opds')
 165             handler.endElement('link')
 166         # add a "red book" icon
 167         handler.addQuickElement(
 168             "link", '',
 169             {
 170                 "rel": "http://opds-spec.org/thumbnail",
 171                 "href": self._book_img,
 172                 "length": self._book_img_size,
 173                 "type": "image/png",
 174             })
 175
 176         # Categories.
 177         for cat in item['categories']:
 178             handler.addQuickElement("category", "", {"term": cat})
 179
 180         # Rights.
 181         if item['item_copyright'] is not None:
 182             handler.addQuickElement("rights", item['item_copyright'])
 183
 184
 185 class AcquisitionFeed(Feed):
 186     feed_type = OPDSFeed
 187     link = 'http://www.wolnelektury.pl/'
 188     author_name = "Wolne Lektury"
 189     author_link = "http://www.wolnelektury.pl/"
 190
 191     def item_title(self, book):
 192         return book.title
 193
 194     def item_description(self):
 195         return ''
 196
 197     def item_link(self, book):
 198         return book.get_absolute_url()
 199
 200     def item_author_name(self, book):
 201         try:
 202             return book.authors().first().name
 203         except AttributeError:
 204             return ''
 205
 206     def item_author_link(self, book):
 207         try:
 208             return book.authors().first().get_absolute_url()
 209         except AttributeError:
 210             return ''
 211
 212     def item_enclosures(self, book):
 213         enc = []
 214         if book.epub_file:
 215             enc.append(Enclosure(
 216                 url=full_url(book.epub_url()),
 217                 length=book.epub_file.size,
 218                 mime_type="application/epub+zip"
 219             ))
 220         if book.has_mp3_file():
 221             e = Enclosure(
 222                 url=full_url(reverse('download_zip_mp3', args=[book.slug])),
 223                 length=sum(bm.file.size for bm in book.get_media('mp3')),
 224                 mime_type="application/zip"
 225             )
 226             e.indirect = 'audio/mpeg'
 227             enc.append(e)
 228         return enc
 229
 230
 231 @piwik_track
 232 class RootFeed(Feed):
 233     feed_type = OPDSFeed
 234     title = 'Wolne Lektury'
 235     link = 'http://wolnelektury.pl/'
 236     description = "Spis utworów na stronie http://WolneLektury.pl"
 237     author_name = "Wolne Lektury"
 238     author_link = "http://wolnelektury.pl/"
 239
 240     def items(self):
 241         return _root_feeds
 242
 243     def item_title(self, item):
 244         return item['title']
 245
 246     def item_link(self, item):
 247         return reverse(item['link'], args=item['link_args'])
 248
 249     def item_description(self, item):
 250         return item['description']
 251
 252
 253 @piwik_track
 254 class ByCategoryFeed(Feed):
 255     feed_type = OPDSFeed
 256     link = 'http://wolnelektury.pl/'
 257     description = "Spis utworów na stronie http://WolneLektury.pl"
 258     author_name = "Wolne Lektury"
 259     author_link = "http://wolnelektury.pl/"
 260
 261     def get_object(self, request, category):
 262         feed = [feed for feed in _root_feeds if feed['category'] == category]
 263         if feed:
 264             feed = feed[0]
 265         else:
 266             raise Http404
 267
 268         return feed
 269
 270     def title(self, feed):
 271         return feed['title']
 272
 273     def items(self, feed):
 274         return Tag.objects.filter(category=feed['category']).exclude(items=None)
 275
 276     def item_title(self, item):
 277         return item.name
 278
 279     def item_link(self, item):
 280         return reverse("opds_by_tag", args=[item.category, item.slug])
 281
 282     def item_description(self):
 283         return ''
 284
 285
 286 @piwik_track
 287 class ByTagFeed(AcquisitionFeed):
 288     def link(self, tag):
 289         return tag.get_absolute_url()
 290
 291     def title(self, tag):
 292         return tag.name
 293
 294     def description(self, tag):
 295         return "Spis utworów na stronie http://WolneLektury.pl"
 296
 297     def get_object(self, request, category, slug):
 298         return get_object_or_404(Tag, category=category, slug=slug)
 299
 300     def items(self, tag):
 301         qs = Book.tagged_top_level([tag])
 302         qs = qs.filter(preview=False, findable=True)
 303         return qs
 304
 305
 306 @factory_decorator(logged_in_or_basicauth())
 307 @piwik_track
 308 class UserFeed(Feed):
 309     feed_type = OPDSFeed
 310     link = 'http://www.wolnelektury.pl/'
 311     description = "Półki użytkownika na stronie http://WolneLektury.pl"
 312     author_name = "Wolne Lektury"
 313     author_link = "http://wolnelektury.pl/"
 314
 315     def get_object(self, request):
 316         return request.user
 317
 318     def title(self, user):
 319         return "Półki użytkownika %s" % user.username
 320
 321     def items(self, user):
 322         return UserList.objects.filter(user=user, deleted=False)
 323
 324     def item_title(self, item):
 325         return item.name
 326
 327     def item_link(self, item):
 328         return reverse("opds_user_set", args=[item.slug])
 329
 330     def item_description(self):
 331         return ''
 332
 333
 334 @factory_decorator(logged_in_or_basicauth())
 335 @piwik_track
 336 class UserSetFeed(AcquisitionFeed):
 337     def link(self, tag):
 338         return tag.get_absolute_url()
 339
 340     def title(self, tag):
 341         return tag.name
 342
 343     def description(self, tag):
 344         return "Spis utworów na stronie http://WolneLektury.pl"
 345
 346     def get_object(self, request, slug):
 347         return get_object_or_404(UserList, deleted=False, slug=slug, user=request.user)
 348
 349     def items(self, tag):
 350         return tag.get_books()
 351
 352
 353 @piwik_track
 354 class SearchFeed(AcquisitionFeed):
 355     description = "Wyniki wyszukiwania na stronie WolneLektury.pl"
 356     title = "Wyniki wyszukiwania"
 357
 358     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
 359     INLINE_QUERY_RE = re.compile(
 360         r"author:" + QUOTE_OR_NOT +
 361         "|translator:" + QUOTE_OR_NOT +
 362         "|title:" + QUOTE_OR_NOT +
 363         "|categories:" + QUOTE_OR_NOT +
 364         "|description:" + QUOTE_OR_NOT +
 365         "|text:" + QUOTE_OR_NOT
 366         )
 367     MATCHES = {
 368         'author': (0, 1),
 369         'translator': (2, 3),
 370         'title': (4, 5),
 371         'categories': (6, 7),
 372         'description': (8, 9),
 373         'text': (10, 11),
 374         }
 375
 376     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
 377
 378     def get_object(self, request):
 379         """
 380         For OPDS 1.1 We should handle a query for search terms
 381         and criteria provided either as opensearch or 'inline' query.
 382         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 383         atom:title. Inline query provides author, title, categories (treated as book tags),
 384         description (treated as content search terms).
 385
 386         if search terms are provided, we shall search for books
 387         according to Hint information (from author & contributror & title).
 388
 389         but if search terms are empty, we should do a different search
 390         (perhaps for is_book=True)
 391
 392         """
 393
 394         query = request.GET.get('q', '')
 395
 396         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 397         if inline_criteria:
 398             remains = re.sub(self.INLINE_QUERY_RE, '', query)
 399             remains = re.sub(r'[ \t]+', ' ', remains)
 400
 401             def get_criteria(criteria, name):
 402                 for c in criteria:
 403                     for p in self.MATCHES[name]:
 404                         if c[p]:
 405                             if p % 2 == 0:
 406                                 return c[p].replace('+', ' ')
 407                             return c[p]
 408                 return None
 409
 410             criteria = dict(map(
 411                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
 412                 ['author', 'translator', 'title', 'categories',
 413                  'description', 'text']))
 414             query = remains
 415             # empty query and text set case?
 416             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 417         else:
 418             def remove_dump_data(val):
 419                 """Some clients don't get opds placeholders and just send them."""
 420                 if self.ATOM_PLACEHOLDER.match(val):
 421                     return ''
 422                 return val
 423
 424             criteria = dict(
 425                 (cn, remove_dump_data(request.GET.get(cn, '')))
 426                 for cn in self.MATCHES.keys())
 427             # query is set above.
 428             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 429
 430         books = Book.objects.filter(findable=True, preview=False).annotate(
 431             search_vector=UnaccentSearchVector('title')
 432         )
 433         if query:
 434             squery = UnaccentSearchQuery(query, config=settings.SEARCH_CONFIG)
 435             books = books.filter(search_vector=squery)
 436         if criteria['author']:
 437             authors = Tag.objects.filter(category='author').annotate(
 438                 search_vector=UnaccentSearchVector('name_pl')
 439             ).filter(search_vector=UnaccentSearchQuery(criteria['author'], config=settings.SEARCH_CONFIG))
 440             books = books.filter(tag_relations__tag__in=authors)
 441         if criteria['categories']:
 442             tags = Tag.objects.filter(category__in=('genre', 'kind', 'epoch')).annotate(
 443                 search_vector=UnaccentSearchVector('name_pl')
 444             ).filter(search_vector=UnaccentSearchQuery(criteria['categories'], config=settings.SEARCH_CONFIG))
 445             books = books.filter(tag_relations__tag__in=tags)
 446         if criteria['translator']:
 447             # TODO
 448             pass
 449         if criteria['title']:
 450             books = books.filter(
 451                 search_vector=UnaccentSearchQuery(criteria['title'], config=settings.SEARCH_CONFIG)
 452             )
 453
 454         books = books.exclude(ancestor__in=books)
 455
 456         books = books.order_by('popularity__count')
 457         return books
 458
 459     def get_link(self, query):
 460         return "%s?q=%s" % (reverse('search'), query)
 461
 462     def items(self, books):
 463         try:
 464             return books
 465         except ValueError:
 466             # too short a query
 467             return []