apps/opds/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import os.path
   6 from urlparse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.core.urlresolvers import reverse
  10 from django.shortcuts import get_object_or_404
  11 from django.utils.feedgenerator import Atom1Feed
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15
  16 from basicauth import logged_in_or_basicauth, factory_decorator
  17 from catalogue.models import Book, Tag
  18
  19 from search.views import Search, SearchResult
  20 from lucene import Term, QueryWrapperFilter, TermQuery
  21 import operator
  22 import logging
  23 import re
  24
  25 log = logging.getLogger('opds')
  26
  27 from stats.utils import piwik_track
  28
  29 _root_feeds = (
  30     {
  31         u"category": u"",
  32         u"link": u"opds_user",
  33         u"link_args": [],
  34         u"title": u"Moje półki",
  35         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  36     },
  37     {
  38         u"category": u"author",
  39         u"link": u"opds_by_category",
  40         u"link_args": [u"author"],
  41         u"title": u"Autorzy",
  42         u"description": u"Utwory wg autorów"
  43     },
  44     {
  45         u"category": u"kind",
  46         u"link": u"opds_by_category",
  47         u"link_args": [u"kind"],
  48         u"title": u"Rodzaje",
  49         u"description": u"Utwory wg rodzajów"
  50     },
  51     {
  52         u"category": u"genre",
  53         u"link": u"opds_by_category",
  54         u"link_args": [u"genre"],
  55         u"title": u"Gatunki",
  56         u"description": u"Utwory wg gatunków"
  57     },
  58     {
  59         u"category": u"epoch",
  60         u"link": u"opds_by_category",
  61         u"link_args": [u"epoch"],
  62         u"title": u"Epoki",
  63         u"description": u"Utwory wg epok"
  64     },
  65 )
  66
  67
  68 def full_url(url):
  69     return urljoin("http://%s" % Site.objects.get_current().domain, url)
  70
  71
  72 class OPDSFeed(Atom1Feed):
  73     link_rel = u"subsection"
  74     link_type = u"application/atom+xml"
  75
  76     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
  77     try:
  78         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  79     except:
  80         _book_parent_img_size = ''
  81
  82     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
  83     try:
  84         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  85     except:
  86         _book_img_size = ''
  87
  88
  89     def add_root_elements(self, handler):
  90         super(OPDSFeed, self).add_root_elements(handler)
  91         handler.addQuickElement(u"link", None,
  92                                 {u"href": reverse("opds_authors"),
  93                                  u"rel": u"start",
  94                                  u"type": u"application/atom+xml"})
  95         handler.addQuickElement(u"link", None,
  96                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  97                                  u"rel": u"search",
  98                                  u"type": u"application/opensearchdescription+xml"})
  99
 100
 101     def add_item_elements(self, handler, item):
 102         """ modified from Atom1Feed.add_item_elements """
 103         handler.addQuickElement(u"title", item['title'])
 104
 105         # add a OPDS Navigation link if there's no enclosure
 106         if item['enclosure'] is None:
 107             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 108             # add a "green book" icon
 109             handler.addQuickElement(u"link", '',
 110                 {u"rel": u"http://opds-spec.org/thumbnail",
 111                  u"href": self._book_parent_img,
 112                  u"length": self._book_parent_img_size,
 113                  u"type": u"image/png"})
 114         if item['pubdate'] is not None:
 115             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 116
 117         # Author information.
 118         if item['author_name'] is not None:
 119             handler.startElement(u"author", {})
 120             handler.addQuickElement(u"name", item['author_name'])
 121             if item['author_email'] is not None:
 122                 handler.addQuickElement(u"email", item['author_email'])
 123             if item['author_link'] is not None:
 124                 handler.addQuickElement(u"uri", item['author_link'])
 125             handler.endElement(u"author")
 126
 127         # Unique ID.
 128         if item['unique_id'] is not None:
 129             unique_id = item['unique_id']
 130         else:
 131             unique_id = get_tag_uri(item['link'], item['pubdate'])
 132         handler.addQuickElement(u"id", unique_id)
 133
 134         # Summary.
 135         # OPDS needs type=text
 136         if item['description'] is not None:
 137             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 138
 139         # Enclosure as OPDS Acquisition Link
 140         if item['enclosure'] is not None:
 141             handler.addQuickElement(u"link", '',
 142                 {u"rel": u"http://opds-spec.org/acquisition",
 143                  u"href": item['enclosure'].url,
 144                  u"length": item['enclosure'].length,
 145                  u"type": item['enclosure'].mime_type})
 146             # add a "red book" icon
 147             handler.addQuickElement(u"link", '',
 148                 {u"rel": u"http://opds-spec.org/thumbnail",
 149                  u"href": self._book_img,
 150                  u"length": self._book_img_size,
 151                  u"type": u"image/png"})
 152
 153         # Categories.
 154         for cat in item['categories']:
 155             handler.addQuickElement(u"category", u"", {u"term": cat})
 156
 157         # Rights.
 158         if item['item_copyright'] is not None:
 159             handler.addQuickElement(u"rights", item['item_copyright'])
 160
 161
 162 class AcquisitionFeed(Feed):
 163     feed_type = OPDSFeed
 164     link = u'http://www.wolnelektury.pl/'
 165     item_enclosure_mime_type = "application/epub+zip"
 166     author_name = u"Wolne Lektury"
 167     author_link = u"http://www.wolnelektury.pl/"
 168
 169     def item_title(self, book):
 170         return book.title
 171
 172     def item_description(self):
 173         return u''
 174
 175     def item_link(self, book):
 176         return book.get_absolute_url()
 177
 178     def item_author_name(self, book):
 179         try:
 180             return book.tags.filter(category='author')[0].name
 181         except KeyError:
 182             return u''
 183
 184     def item_author_link(self, book):
 185         try:
 186             return book.tags.filter(category='author')[0].get_absolute_url()
 187         except KeyError:
 188             return u''
 189
 190     def item_enclosure_url(self, book):
 191         return full_url(book.epub_file.url) if book.epub_file else None
 192
 193     def item_enclosure_length(self, book):
 194         return book.epub_file.size if book.epub_file else None
 195
 196 @piwik_track
 197 class RootFeed(Feed):
 198     feed_type = OPDSFeed
 199     title = u'Wolne Lektury'
 200     link = u'http://wolnelektury.pl/'
 201     description = u"Spis utworów na stronie http://WolneLektury.pl"
 202     author_name = u"Wolne Lektury"
 203     author_link = u"http://wolnelektury.pl/"
 204
 205     def items(self):
 206         return _root_feeds
 207
 208     def item_title(self, item):
 209         return item['title']
 210
 211     def item_link(self, item):
 212         return reverse(item['link'], args=item['link_args'])
 213
 214     def item_description(self, item):
 215         return item['description']
 216
 217 @piwik_track
 218 class ByCategoryFeed(Feed):
 219     feed_type = OPDSFeed
 220     link = u'http://wolnelektury.pl/'
 221     description = u"Spis utworów na stronie http://WolneLektury.pl"
 222     author_name = u"Wolne Lektury"
 223     author_link = u"http://wolnelektury.pl/"
 224
 225     def get_object(self, request, category):
 226         feed = [feed for feed in _root_feeds if feed['category']==category]
 227         if feed:
 228             feed = feed[0]
 229         else:
 230             raise Http404
 231
 232         return feed
 233
 234     def title(self, feed):
 235         return feed['title']
 236
 237     def items(self, feed):
 238         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
 239
 240     def item_title(self, item):
 241         return item.name
 242
 243     def item_link(self, item):
 244         return reverse("opds_by_tag", args=[item.category, item.slug])
 245
 246     def item_description(self):
 247         return u''
 248
 249 @piwik_track
 250 class ByTagFeed(AcquisitionFeed):
 251     def link(self, tag):
 252         return tag.get_absolute_url()
 253
 254     def title(self, tag):
 255         return tag.name
 256
 257     def description(self, tag):
 258         return u"Spis utworów na stronie http://WolneLektury.pl"
 259
 260     def get_object(self, request, category, slug):
 261         return get_object_or_404(Tag, category=category, slug=slug)
 262
 263     def items(self, tag):
 264         books = Book.tagged.with_any([tag])
 265         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
 266         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 267         if descendants_keys:
 268             books = books.exclude(pk__in=descendants_keys)
 269
 270         return books
 271
 272
 273 @factory_decorator(logged_in_or_basicauth())
 274 @piwik_track
 275 class UserFeed(Feed):
 276     feed_type = OPDSFeed
 277     link = u'http://www.wolnelektury.pl/'
 278     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 279     author_name = u"Wolne Lektury"
 280     author_link = u"http://wolnelektury.pl/"
 281
 282     def get_object(self, request):
 283         return request.user
 284
 285     def title(self, user):
 286         return u"Półki użytkownika %s" % user.username
 287
 288     def items(self, user):
 289         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
 290
 291     def item_title(self, item):
 292         return item.name
 293
 294     def item_link(self, item):
 295         return reverse("opds_user_set", args=[item.slug])
 296
 297     def item_description(self):
 298         return u''
 299
 300 # no class decorators in python 2.5
 301 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
 302
 303
 304 @factory_decorator(logged_in_or_basicauth())
 305 @piwik_track
 306 class UserSetFeed(AcquisitionFeed):
 307     def link(self, tag):
 308         return tag.get_absolute_url()
 309
 310     def title(self, tag):
 311         return tag.name
 312
 313     def description(self, tag):
 314         return u"Spis utworów na stronie http://WolneLektury.pl"
 315
 316     def get_object(self, request, slug):
 317         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 318
 319     def items(self, tag):
 320         return Book.tagged.with_any([tag])
 321
 322 # no class decorators in python 2.5
 323 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
 324
 325
 326 @piwik_track
 327 class SearchFeed(AcquisitionFeed):
 328     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 329     title = u"Wyniki wyszukiwania"
 330
 331     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
 332     INLINE_QUERY_RE = re.compile(
 333         r"author:" + QUOTE_OR_NOT +
 334         "|translator:" + QUOTE_OR_NOT +
 335         "|title:" + QUOTE_OR_NOT +
 336         "|categories:" + QUOTE_OR_NOT +
 337         "|description:" + QUOTE_OR_NOT +
 338         "|text:" + QUOTE_OR_NOT
 339         )
 340     MATCHES = {
 341         'author': (0, 1),
 342         'translator': (2, 3),
 343         'title': (4, 5),
 344         'categories': (6, 7),
 345         'description': (8, 9),
 346         'text': (10, 11),
 347         }
 348
 349     PARAMS_TO_FIELDS = {
 350         'author': 'authors',
 351         'translator': 'translators',
 352         #        'title': 'title',
 353         'categories': 'tag_name_pl',
 354         'description': 'text',
 355         #        'text': 'text',
 356         }
 357
 358     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
 359
 360     def get_object(self, request):
 361         """
 362         For OPDS 1.1 We should handle a query for search terms
 363         and criteria provided either as opensearch or 'inline' query.
 364         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 365         atom:title. Inline query provides author, title, categories (treated as book tags),
 366         description (treated as content search terms).
 367
 368         if search terms are provided, we shall search for books
 369         according to Hint information (from author & contributror & title).
 370
 371         but if search terms are empty, we should do a different search
 372         (perhaps for is_book=True)
 373
 374         """
 375
 376         query = request.GET.get('q', '')
 377
 378         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 379         if inline_criteria:
 380             remains = re.sub(self.INLINE_QUERY_RE, '', query)
 381             remains = re.sub(r'[ \t]+', ' ', remains)
 382
 383             def get_criteria(criteria, name):
 384                 for c in criteria:
 385                     for p in self.MATCHES[name]:
 386                         if c[p]:
 387                             if p % 2 == 0:
 388                                 return c[p].replace('+', ' ')
 389                             return c[p]
 390                 return None
 391
 392             criteria = dict(map(
 393                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
 394                 ['author', 'translator', 'title', 'categories',
 395                  'description', 'text']))
 396             query = remains
 397             # empty query and text set case?
 398             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 399         else:
 400             def remove_dump_data(val):
 401                 """Some clients don't get opds placeholders and just send them."""
 402                 if self.ATOM_PLACEHOLDER.match(val):
 403                     return ''
 404                 return val
 405
 406             criteria = dict([(cn, remove_dump_data(request.GET.get(cn, '')))
 407                         for cn in self.MATCHES.keys()])
 408             # query is set above.
 409             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
 410
 411         srch = Search()
 412
 413         book_hit_filter = srch.index.Q(book_id__any=True)
 414         filters = [book_hit_filter] + [srch.index.Q(
 415             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
 416             ) for cn in self.MATCHES.keys() if cn in criteria
 417             if criteria[cn]]
 418
 419         if query:
 420             q = srch.index.query(
 421                 reduce(operator.or_,
 422                        [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query})
 423                         for cn in self.MATCHES.keys()],
 424                 srch.index.Q()))
 425         else:
 426             q = srch.index.query(srch.index.Q())
 427
 428         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
 429         results = q.execute()
 430
 431         book_scores = dict([(r['book_id'], r['score']) for r in results])
 432         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
 433         books = list(books)
 434         books.sort(reverse=True, key=lambda book: book_scores[book.id])
 435         return books
 436
 437     def get_link(self, query):
 438         return "%s?q=%s" % (reverse('search'), query)
 439
 440     def items(self, books):
 441         try:
 442             return books
 443         except ValueError:
 444             # too short a query
 445             return []