src/opds/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import os.path
   6 from urlparse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.core.urlresolvers import reverse
  10 from django.shortcuts import get_object_or_404
  11 from django.utils.feedgenerator import Atom1Feed
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15 from django.utils.functional import lazy
  16
  17 from basicauth import logged_in_or_basicauth, factory_decorator
  18 from catalogue.models import Book, Tag
  19
  20 from search.views import Search
  21 import operator
  22 import logging
  23 import re
  24
  25 log = logging.getLogger('opds')
  26
  27 from stats.utils import piwik_track
  28
  29 _root_feeds = (
  30     {
  31         u"category": u"",
  32         u"link": u"opds_user",
  33         u"link_args": [],
  34         u"title": u"Moje półki",
  35         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  36     },
  37     {
  38         u"category": u"author",
  39         u"link": u"opds_by_category",
  40         u"link_args": [u"author"],
  41         u"title": u"Autorzy",
  42         u"description": u"Utwory wg autorów"
  43     },
  44     {
  45         u"category": u"kind",
  46         u"link": u"opds_by_category",
  47         u"link_args": [u"kind"],
  48         u"title": u"Rodzaje",
  49         u"description": u"Utwory wg rodzajów"
  50     },
  51     {
  52         u"category": u"genre",
  53         u"link": u"opds_by_category",
  54         u"link_args": [u"genre"],
  55         u"title": u"Gatunki",
  56         u"description": u"Utwory wg gatunków"
  57     },
  58     {
  59         u"category": u"epoch",
  60         u"link": u"opds_by_category",
  61         u"link_args": [u"epoch"],
  62         u"title": u"Epoki",
  63         u"description": u"Utwory wg epok"
  64     },
  65 )
  66
  67
  68 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
  69 def full_url(url):
  70     return urljoin("http://%s" % current_domain, url)
  71
  72
  73 class OPDSFeed(Atom1Feed):
  74     link_rel = u"subsection"
  75     link_type = u"application/atom+xml"
  76
  77     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
  78     try:
  79         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  80     except:
  81         _book_parent_img_size = ''
  82
  83     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
  84     try:
  85         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  86     except:
  87         _book_img_size = ''
  88
  89
  90     def add_root_elements(self, handler):
  91         super(OPDSFeed, self).add_root_elements(handler)
  92         handler.addQuickElement(u"link", None,
  93                                 {u"href": reverse("opds_authors"),
  94                                  u"rel": u"start",
  95                                  u"type": u"application/atom+xml"})
  96         handler.addQuickElement(u"link", None,
  97                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  98                                  u"rel": u"search",
  99                                  u"type": u"application/opensearchdescription+xml"})
 100
 101
 102     def add_item_elements(self, handler, item):
 103         """ modified from Atom1Feed.add_item_elements """
 104         handler.addQuickElement(u"title", item['title'])
 105
 106         # add a OPDS Navigation link if there's no enclosure
 107         if item['enclosure'] is None:
 108             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 109             # add a "green book" icon
 110             handler.addQuickElement(u"link", '',
 111                 {u"rel": u"http://opds-spec.org/thumbnail",
 112                  u"href": self._book_parent_img,
 113                  u"length": self._book_parent_img_size,
 114                  u"type": u"image/png"})
 115         if item['pubdate'] is not None:
 116             # FIXME: rfc3339_date is undefined, is this ever run?
 117             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 118
 119         # Author information.
 120         if item['author_name'] is not None:
 121             handler.startElement(u"author", {})
 122             handler.addQuickElement(u"name", item['author_name'])
 123             if item['author_email'] is not None:
 124                 handler.addQuickElement(u"email", item['author_email'])
 125             if item['author_link'] is not None:
 126                 handler.addQuickElement(u"uri", item['author_link'])
 127             handler.endElement(u"author")
 128
 129         # Unique ID.
 130         if item['unique_id'] is not None:
 131             unique_id = item['unique_id']
 132         else:
 133             # FIXME: get_tag_uri is undefined, is this ever run?
 134             unique_id = get_tag_uri(item['link'], item['pubdate'])
 135         handler.addQuickElement(u"id", unique_id)
 136
 137         # Summary.
 138         # OPDS needs type=text
 139         if item['description'] is not None:
 140             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 141
 142         # Enclosure as OPDS Acquisition Link
 143         if item['enclosure'] is not None:
 144             handler.addQuickElement(u"link", '',
 145                 {u"rel": u"http://opds-spec.org/acquisition",
 146                  u"href": item['enclosure'].url,
 147                  u"length": item['enclosure'].length,
 148                  u"type": item['enclosure'].mime_type})
 149             # add a "red book" icon
 150             handler.addQuickElement(u"link", '',
 151                 {u"rel": u"http://opds-spec.org/thumbnail",
 152                  u"href": self._book_img,
 153                  u"length": self._book_img_size,
 154                  u"type": u"image/png"})
 155
 156         # Categories.
 157         for cat in item['categories']:
 158             handler.addQuickElement(u"category", u"", {u"term": cat})
 159
 160         # Rights.
 161         if item['item_copyright'] is not None:
 162             handler.addQuickElement(u"rights", item['item_copyright'])
 163
 164
 165 class AcquisitionFeed(Feed):
 166     feed_type = OPDSFeed
 167     link = u'http://www.wolnelektury.pl/'
 168     item_enclosure_mime_type = "application/epub+zip"
 169     author_name = u"Wolne Lektury"
 170     author_link = u"http://www.wolnelektury.pl/"
 171
 172     def item_title(self, book):
 173         return book.title
 174
 175     def item_description(self):
 176         return u''
 177
 178     def item_link(self, book):
 179         return book.get_absolute_url()
 180
 181     def item_author_name(self, book):
 182         try:
 183             return book.tags.filter(category='author')[0].name
 184         except KeyError:
 185             return u''
 186
 187     def item_author_link(self, book):
 188         try:
 189             return book.tags.filter(category='author')[0].get_absolute_url()
 190         except KeyError:
 191             return u''
 192
 193     def item_enclosure_url(self, book):
 194         return full_url(book.epub_file.url) if book.epub_file else None
 195
 196     def item_enclosure_length(self, book):
 197         return book.epub_file.size if book.epub_file else None
 198
 199 @piwik_track
 200 class RootFeed(Feed):
 201     feed_type = OPDSFeed
 202     title = u'Wolne Lektury'
 203     link = u'http://wolnelektury.pl/'
 204     description = u"Spis utworów na stronie http://WolneLektury.pl"
 205     author_name = u"Wolne Lektury"
 206     author_link = u"http://wolnelektury.pl/"
 207
 208     def items(self):
 209         return _root_feeds
 210
 211     def item_title(self, item):
 212         return item['title']
 213
 214     def item_link(self, item):
 215         return reverse(item['link'], args=item['link_args'])
 216
 217     def item_description(self, item):
 218         return item['description']
 219
 220 @piwik_track
 221 class ByCategoryFeed(Feed):
 222     feed_type = OPDSFeed
 223     link = u'http://wolnelektury.pl/'
 224     description = u"Spis utworów na stronie http://WolneLektury.pl"
 225     author_name = u"Wolne Lektury"
 226     author_link = u"http://wolnelektury.pl/"
 227
 228     def get_object(self, request, category):
 229         feed = [feed for feed in _root_feeds if feed['category'] == category]
 230         if feed:
 231             feed = feed[0]
 232         else:
 233             raise Http404
 234
 235         return feed
 236
 237     def title(self, feed):
 238         return feed['title']
 239
 240     def items(self, feed):
 241         return Tag.objects.filter(category=feed['category']).exclude(items=None)
 242
 243     def item_title(self, item):
 244         return item.name
 245
 246     def item_link(self, item):
 247         return reverse("opds_by_tag", args=[item.category, item.slug])
 248
 249     def item_description(self):
 250         return u''
 251
 252 @piwik_track
 253 class ByTagFeed(AcquisitionFeed):
 254     def link(self, tag):
 255         return tag.get_absolute_url()
 256
 257     def title(self, tag):
 258         return tag.name
 259
 260     def description(self, tag):
 261         return u"Spis utworów na stronie http://WolneLektury.pl"
 262
 263     def get_object(self, request, category, slug):
 264         return get_object_or_404(Tag, category=category, slug=slug)
 265
 266     def items(self, tag):
 267         return Book.tagged_top_level([tag])
 268
 269
 270 @factory_decorator(logged_in_or_basicauth())
 271 @piwik_track
 272 class UserFeed(Feed):
 273     feed_type = OPDSFeed
 274     link = u'http://www.wolnelektury.pl/'
 275     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 276     author_name = u"Wolne Lektury"
 277     author_link = u"http://wolnelektury.pl/"
 278
 279     def get_object(self, request):
 280         return request.user
 281
 282     def title(self, user):
 283         return u"Półki użytkownika %s" % user.username
 284
 285     def items(self, user):
 286         return Tag.objects.filter(category='set', user=user).exclude(items=None)
 287
 288     def item_title(self, item):
 289         return item.name
 290
 291     def item_link(self, item):
 292         return reverse("opds_user_set", args=[item.slug])
 293
 294     def item_description(self):
 295         return u''
 296
 297
 298 @factory_decorator(logged_in_or_basicauth())
 299 @piwik_track
 300 class UserSetFeed(AcquisitionFeed):
 301     def link(self, tag):
 302         return tag.get_absolute_url()
 303
 304     def title(self, tag):
 305         return tag.name
 306
 307     def description(self, tag):
 308         return u"Spis utworów na stronie http://WolneLektury.pl"
 309
 310     def get_object(self, request, slug):
 311         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 312
 313     def items(self, tag):
 314         return Book.tagged.with_any([tag])
 315
 316
 317 @piwik_track
 318 class SearchFeed(AcquisitionFeed):
 319     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 320     title = u"Wyniki wyszukiwania"
 321
 322     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
 323     INLINE_QUERY_RE = re.compile(
 324         r"author:" + QUOTE_OR_NOT +
 325         "|translator:" + QUOTE_OR_NOT +
 326         "|title:" + QUOTE_OR_NOT +
 327         "|categories:" + QUOTE_OR_NOT +
 328         "|description:" + QUOTE_OR_NOT +
 329         "|text:" + QUOTE_OR_NOT
 330         )
 331     MATCHES = {
 332         'author': (0, 1),
 333         'translator': (2, 3),
 334         'title': (4, 5),
 335         'categories': (6, 7),
 336         'description': (8, 9),
 337         'text': (10, 11),
 338         }
 339
 340     PARAMS_TO_FIELDS = {
 341         'author': 'authors',
 342         'translator': 'translators',
 343         #        'title': 'title',
 344         'categories': 'tag_name_pl',
 345         'description': 'text',
 346         #        'text': 'text',
 347         }
 348
 349     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
 350
 351     def get_object(self, request):
 352         """
 353         For OPDS 1.1 We should handle a query for search terms
 354         and criteria provided either as opensearch or 'inline' query.
 355         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 356         atom:title. Inline query provides author, title, categories (treated as book tags),
 357         description (treated as content search terms).
 358
 359         if search terms are provided, we shall search for books
 360         according to Hint information (from author & contributror & title).
 361
 362         but if search terms are empty, we should do a different search
 363         (perhaps for is_book=True)
 364
 365         """
 366
 367         query = request.GET.get('q', '')
 368
 369         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 370         if inline_criteria:
 371             remains = re.sub(self.INLINE_QUERY_RE, '', query)
 372             remains = re.sub(r'[ \t]+', ' ', remains)
 373
 374             def get_criteria(criteria, name):
 375                 for c in criteria:
 376                     for p in self.MATCHES[name]:
 377                         if c[p]:
 378                             if p % 2 == 0:
 379                                 return c[p].replace('+', ' ')
 380                             return c[p]
 381                 return None
 382
 383             criteria = dict(map(
 384                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
 385                 ['author', 'translator', 'title', 'categories',
 386                  'description', 'text']))
 387             query = remains
 388             # empty query and text set case?
 389             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 390         else:
 391             def remove_dump_data(val):
 392                 """Some clients don't get opds placeholders and just send them."""
 393                 if self.ATOM_PLACEHOLDER.match(val):
 394                     return ''
 395                 return val
 396
 397             criteria = dict([(cn, remove_dump_data(request.GET.get(cn, '')))
 398                         for cn in self.MATCHES.keys()])
 399             # query is set above.
 400             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 401
 402         srch = Search()
 403
 404         book_hit_filter = srch.index.Q(book_id__any=True)
 405         filters = [book_hit_filter] + [srch.index.Q(
 406             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
 407             ) for cn in self.MATCHES.keys() if cn in criteria
 408             if criteria[cn]]
 409
 410         if query:
 411             q = srch.index.query(
 412                 reduce(operator.or_,
 413                        [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query})
 414                         for cn in self.MATCHES.keys()],
 415                 srch.index.Q()))
 416         else:
 417             q = srch.index.query(srch.index.Q())
 418
 419         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
 420         results = q.execute()
 421
 422         book_scores = dict([(r['book_id'], r['score']) for r in results])
 423         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
 424         books = list(books)
 425         books.sort(reverse=True, key=lambda book: book_scores[book.id])
 426         return books
 427
 428     def get_link(self, query):
 429         return "%s?q=%s" % (reverse('search'), query)
 430
 431     def items(self, books):
 432         try:
 433             return books
 434         except ValueError:
 435             # too short a query
 436             return []