src/opds/views.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from functools import reduce
   5 import os.path
   6 from urllib.parse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.shortcuts import get_object_or_404
  10 from django.urls import reverse
  11 from django.utils.feedgenerator import Atom1Feed
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15 from django.utils.functional import lazy
  16
  17 from basicauth import logged_in_or_basicauth, factory_decorator
  18 from catalogue.models import Book, Tag
  19 from search.utils import UnaccentSearchQuery, UnaccentSearchVector
  20
  21 import operator
  22 import logging
  23 import re
  24
  25 from stats.utils import piwik_track
  26
  27 log = logging.getLogger('opds')
  28
  29 _root_feeds = (
  30     {
  31         "category": "",
  32         "link": "opds_user",
  33         "link_args": [],
  34         "title": "Moje półki",
  35         "description": "Półki użytkownika dostępne po zalogowaniu"
  36     },
  37     {
  38         "category": "author",
  39         "link": "opds_by_category",
  40         "link_args": ["author"],
  41         "title": "Autorzy",
  42         "description": "Utwory wg autorów"
  43     },
  44     {
  45         "category": "kind",
  46         "link": "opds_by_category",
  47         "link_args": ["kind"],
  48         "title": "Rodzaje",
  49         "description": "Utwory wg rodzajów"
  50     },
  51     {
  52         "category": "genre",
  53         "link": "opds_by_category",
  54         "link_args": ["genre"],
  55         "title": "Gatunki",
  56         "description": "Utwory wg gatunków"
  57     },
  58     {
  59         "category": "epoch",
  60         "link": "opds_by_category",
  61         "link_args": ["epoch"],
  62         "title": "Epoki",
  63         "description": "Utwory wg epok"
  64     },
  65 )
  66
  67
  68 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
  69
  70
  71 def full_url(url):
  72     return urljoin("http://%s" % current_domain, url)
  73
  74
  75 class OPDSFeed(Atom1Feed):
  76     link_rel = "subsection"
  77     link_type = "application/atom+xml"
  78
  79     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
  80     try:
  81         _book_parent_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  82     except OSError:
  83         _book_parent_img_size = ''
  84
  85     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
  86     try:
  87         _book_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  88     except OSError:
  89         _book_img_size = ''
  90
  91     def add_root_elements(self, handler):
  92         super(OPDSFeed, self).add_root_elements(handler)
  93         handler.addQuickElement("link", None,
  94                                 {"href": reverse("opds_authors"),
  95                                  "rel": "start",
  96                                  "type": "application/atom+xml"})
  97         handler.addQuickElement("link", None,
  98                                 {"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  99                                  "rel": "search",
 100                                  "type": "application/opensearchdescription+xml"})
 101
 102     def add_item_elements(self, handler, item):
 103         """ modified from Atom1Feed.add_item_elements """
 104         handler.addQuickElement("title", item['title'])
 105
 106         # add a OPDS Navigation link if there's no enclosure
 107         if not item.get('enclosures') is None:
 108             handler.addQuickElement(
 109                 "link", "", {"href": item['link'], "rel": "subsection", "type": "application/atom+xml"})
 110             # add a "green book" icon
 111             handler.addQuickElement(
 112                 "link", '',
 113                 {
 114                     "rel": "http://opds-spec.org/thumbnail",
 115                     "href": self._book_parent_img,
 116                     "length": self._book_parent_img_size,
 117                     "type": "image/png",
 118                 })
 119         if item['pubdate'] is not None:
 120             # FIXME: rfc3339_date is undefined, is this ever run?
 121             handler.addQuickElement("updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 122
 123         # Author information.
 124         if item['author_name'] is not None:
 125             handler.startElement("author", {})
 126             handler.addQuickElement("name", item['author_name'])
 127             if item['author_email'] is not None:
 128                 handler.addQuickElement("email", item['author_email'])
 129             if item['author_link'] is not None:
 130                 handler.addQuickElement("uri", item['author_link'])
 131             handler.endElement("author")
 132
 133         # Unique ID.
 134         if item['unique_id'] is not None:
 135             unique_id = item['unique_id']
 136         else:
 137             # FIXME: get_tag_uri is undefined, is this ever run?
 138             unique_id = get_tag_uri(item['link'], item['pubdate'])
 139         handler.addQuickElement("id", unique_id)
 140
 141         # Summary.
 142         # OPDS needs type=text
 143         if item['description'] is not None:
 144             handler.addQuickElement("summary", item['description'], {"type": "text"})
 145
 146         # Enclosure as OPDS Acquisition Link
 147         for enc in item.get('enclosures', []):
 148             handler.addQuickElement(
 149                 "link", '',
 150                 {
 151                     "rel": "http://opds-spec.org/acquisition",
 152                     "href": enc.url,
 153                     "length": enc.length,
 154                     "type": enc.mime_type,
 155                 })
 156             # add a "red book" icon
 157             handler.addQuickElement(
 158                 "link", '',
 159                 {
 160                     "rel": "http://opds-spec.org/thumbnail",
 161                     "href": self._book_img,
 162                     "length": self._book_img_size,
 163                     "type": "image/png",
 164                 })
 165
 166         # Categories.
 167         for cat in item['categories']:
 168             handler.addQuickElement("category", "", {"term": cat})
 169
 170         # Rights.
 171         if item['item_copyright'] is not None:
 172             handler.addQuickElement("rights", item['item_copyright'])
 173
 174
 175 class AcquisitionFeed(Feed):
 176     feed_type = OPDSFeed
 177     link = 'http://www.wolnelektury.pl/'
 178     item_enclosure_mime_type = "application/epub+zip"
 179     author_name = "Wolne Lektury"
 180     author_link = "http://www.wolnelektury.pl/"
 181
 182     def item_title(self, book):
 183         return book.title
 184
 185     def item_description(self):
 186         return ''
 187
 188     def item_link(self, book):
 189         return book.get_absolute_url()
 190
 191     def item_author_name(self, book):
 192         try:
 193             return book.authors().first().name
 194         except AttributeError:
 195             return ''
 196
 197     def item_author_link(self, book):
 198         try:
 199             return book.authors().first().get_absolute_url()
 200         except AttributeError:
 201             return ''
 202
 203     def item_enclosure_url(self, book):
 204         return full_url(book.epub_url()) if book.epub_file else None
 205
 206     def item_enclosure_length(self, book):
 207         return book.epub_file.size if book.epub_file else None
 208
 209
 210 @piwik_track
 211 class RootFeed(Feed):
 212     feed_type = OPDSFeed
 213     title = 'Wolne Lektury'
 214     link = 'http://wolnelektury.pl/'
 215     description = "Spis utworów na stronie http://WolneLektury.pl"
 216     author_name = "Wolne Lektury"
 217     author_link = "http://wolnelektury.pl/"
 218
 219     def items(self):
 220         return _root_feeds
 221
 222     def item_title(self, item):
 223         return item['title']
 224
 225     def item_link(self, item):
 226         return reverse(item['link'], args=item['link_args'])
 227
 228     def item_description(self, item):
 229         return item['description']
 230
 231
 232 @piwik_track
 233 class ByCategoryFeed(Feed):
 234     feed_type = OPDSFeed
 235     link = 'http://wolnelektury.pl/'
 236     description = "Spis utworów na stronie http://WolneLektury.pl"
 237     author_name = "Wolne Lektury"
 238     author_link = "http://wolnelektury.pl/"
 239
 240     def get_object(self, request, category):
 241         feed = [feed for feed in _root_feeds if feed['category'] == category]
 242         if feed:
 243             feed = feed[0]
 244         else:
 245             raise Http404
 246
 247         return feed
 248
 249     def title(self, feed):
 250         return feed['title']
 251
 252     def items(self, feed):
 253         return Tag.objects.filter(category=feed['category']).exclude(items=None)
 254
 255     def item_title(self, item):
 256         return item.name
 257
 258     def item_link(self, item):
 259         return reverse("opds_by_tag", args=[item.category, item.slug])
 260
 261     def item_description(self):
 262         return ''
 263
 264
 265 @piwik_track
 266 class ByTagFeed(AcquisitionFeed):
 267     def link(self, tag):
 268         return tag.get_absolute_url()
 269
 270     def title(self, tag):
 271         return tag.name
 272
 273     def description(self, tag):
 274         return "Spis utworów na stronie http://WolneLektury.pl"
 275
 276     def get_object(self, request, category, slug):
 277         return get_object_or_404(Tag, category=category, slug=slug)
 278
 279     def items(self, tag):
 280         return Book.tagged_top_level([tag])
 281
 282
 283 @factory_decorator(logged_in_or_basicauth())
 284 @piwik_track
 285 class UserFeed(Feed):
 286     feed_type = OPDSFeed
 287     link = 'http://www.wolnelektury.pl/'
 288     description = "Półki użytkownika na stronie http://WolneLektury.pl"
 289     author_name = "Wolne Lektury"
 290     author_link = "http://wolnelektury.pl/"
 291
 292     def get_object(self, request):
 293         return request.user
 294
 295     def title(self, user):
 296         return "Półki użytkownika %s" % user.username
 297
 298     def items(self, user):
 299         return Tag.objects.filter(category='set', user=user).exclude(items=None)
 300
 301     def item_title(self, item):
 302         return item.name
 303
 304     def item_link(self, item):
 305         return reverse("opds_user_set", args=[item.slug])
 306
 307     def item_description(self):
 308         return ''
 309
 310
 311 @factory_decorator(logged_in_or_basicauth())
 312 @piwik_track
 313 class UserSetFeed(AcquisitionFeed):
 314     def link(self, tag):
 315         return tag.get_absolute_url()
 316
 317     def title(self, tag):
 318         return tag.name
 319
 320     def description(self, tag):
 321         return "Spis utworów na stronie http://WolneLektury.pl"
 322
 323     def get_object(self, request, slug):
 324         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 325
 326     def items(self, tag):
 327         return Book.tagged.with_any([tag])
 328
 329
 330 @piwik_track
 331 class SearchFeed(AcquisitionFeed):
 332     description = "Wyniki wyszukiwania na stronie WolneLektury.pl"
 333     title = "Wyniki wyszukiwania"
 334
 335     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
 336     INLINE_QUERY_RE = re.compile(
 337         r"author:" + QUOTE_OR_NOT +
 338         "|translator:" + QUOTE_OR_NOT +
 339         "|title:" + QUOTE_OR_NOT +
 340         "|categories:" + QUOTE_OR_NOT +
 341         "|description:" + QUOTE_OR_NOT +
 342         "|text:" + QUOTE_OR_NOT
 343         )
 344     MATCHES = {
 345         'author': (0, 1),
 346         'translator': (2, 3),
 347         'title': (4, 5),
 348         'categories': (6, 7),
 349         'description': (8, 9),
 350         'text': (10, 11),
 351         }
 352
 353     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
 354
 355     def get_object(self, request):
 356         """
 357         For OPDS 1.1 We should handle a query for search terms
 358         and criteria provided either as opensearch or 'inline' query.
 359         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 360         atom:title. Inline query provides author, title, categories (treated as book tags),
 361         description (treated as content search terms).
 362
 363         if search terms are provided, we shall search for books
 364         according to Hint information (from author & contributror & title).
 365
 366         but if search terms are empty, we should do a different search
 367         (perhaps for is_book=True)
 368
 369         """
 370
 371         query = request.GET.get('q', '')
 372
 373         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 374         if inline_criteria:
 375             remains = re.sub(self.INLINE_QUERY_RE, '', query)
 376             remains = re.sub(r'[ \t]+', ' ', remains)
 377
 378             def get_criteria(criteria, name):
 379                 for c in criteria:
 380                     for p in self.MATCHES[name]:
 381                         if c[p]:
 382                             if p % 2 == 0:
 383                                 return c[p].replace('+', ' ')
 384                             return c[p]
 385                 return None
 386
 387             criteria = dict(map(
 388                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
 389                 ['author', 'translator', 'title', 'categories',
 390                  'description', 'text']))
 391             query = remains
 392             # empty query and text set case?
 393             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 394         else:
 395             def remove_dump_data(val):
 396                 """Some clients don't get opds placeholders and just send them."""
 397                 if self.ATOM_PLACEHOLDER.match(val):
 398                     return ''
 399                 return val
 400
 401             criteria = dict(
 402                 (cn, remove_dump_data(request.GET.get(cn, '')))
 403                 for cn in self.MATCHES.keys())
 404             # query is set above.
 405             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 406
 407         books = Book.objects.filter(findable=True).annotate(
 408             search_vector=UnaccentSearchVector('title')
 409         )
 410         if query:
 411             squery = UnaccentSearchQuery(query, config=settings.SEARCH_CONFIG)
 412             books = books.filter(search_vector=squery)
 413         if criteria['author']:
 414             authors = Tag.objects.filter(category='author').annotate(
 415                 search_vector=UnaccentSearchVector('name_pl')
 416             ).filter(search_vector=UnaccentSearchQuery(criteria['author'], config=settings.SEARCH_CONFIG))
 417             books = books.filter(tag_relations__tag__in=authors)
 418         if criteria['categories']:
 419             tags = Tag.objects.filter(category__in=('genre', 'kind', 'epoch')).annotate(
 420                 search_vector=UnaccentSearchVector('name_pl')
 421             ).filter(search_vector=UnaccentSearchQuery(criteria['categories'], config=settings.SEARCH_CONFIG))
 422             books = books.filter(tag_relations__tag__in=tags)
 423         if criteria['translator']:
 424             # TODO
 425             pass
 426         if criteria['title']:
 427             books = books.filter(
 428                 search_vector=UnaccentSearchQuery(criteria['title'], config=settings.SEARCH_CONFIG)
 429             )
 430
 431         books = books.exclude(ancestor__in=books)
 432
 433         books = books.order_by('popularity__count')
 434         return books
 435
 436     def get_link(self, query):
 437         return "%s?q=%s" % (reverse('search'), query)
 438
 439     def items(self, books):
 440         try:
 441             return books
 442         except ValueError:
 443             # too short a query
 444             return []