apps/opds/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import os.path
   6 from urlparse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.core.urlresolvers import reverse
  10 from django.shortcuts import get_object_or_404
  11 from django.utils.feedgenerator import Atom1Feed
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15
  16 from basicauth import logged_in_or_basicauth, factory_decorator
  17 from catalogue.models import Book, Tag
  18
  19 from search.views import get_search, SearchResult, JVM
  20 from lucene import Term, QueryWrapperFilter, TermQuery
  21
  22 import logging
  23 import re
  24
  25 log = logging.getLogger('opds')
  26
  27 from stats.utils import piwik_track
  28
  29 _root_feeds = (
  30     {
  31         u"category": u"",
  32         u"link": u"opds_user",
  33         u"link_args": [],
  34         u"title": u"Moje półki",
  35         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  36     },
  37     {
  38         u"category": u"author",
  39         u"link": u"opds_by_category",
  40         u"link_args": [u"author"],
  41         u"title": u"Autorzy",
  42         u"description": u"Utwory wg autorów"
  43     },
  44     {
  45         u"category": u"kind",
  46         u"link": u"opds_by_category",
  47         u"link_args": [u"kind"],
  48         u"title": u"Rodzaje",
  49         u"description": u"Utwory wg rodzajów"
  50     },
  51     {
  52         u"category": u"genre",
  53         u"link": u"opds_by_category",
  54         u"link_args": [u"genre"],
  55         u"title": u"Gatunki",
  56         u"description": u"Utwory wg gatunków"
  57     },
  58     {
  59         u"category": u"epoch",
  60         u"link": u"opds_by_category",
  61         u"link_args": [u"epoch"],
  62         u"title": u"Epoki",
  63         u"description": u"Utwory wg epok"
  64     },
  65 )
  66
  67
  68 def full_url(url):
  69     return urljoin("http://%s" % Site.objects.get_current().domain, url)
  70
  71
  72 class OPDSFeed(Atom1Feed):
  73     link_rel = u"subsection"
  74     link_type = u"application/atom+xml"
  75
  76     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
  77     try:
  78         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  79     except:
  80         _book_parent_img_size = ''
  81
  82     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
  83     try:
  84         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  85     except:
  86         _book_img_size = ''
  87
  88
  89     def add_root_elements(self, handler):
  90         super(OPDSFeed, self).add_root_elements(handler)
  91         handler.addQuickElement(u"link", None,
  92                                 {u"href": reverse("opds_authors"),
  93                                  u"rel": u"start",
  94                                  u"type": u"application/atom+xml"})
  95         handler.addQuickElement(u"link", None,
  96                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  97                                  u"rel": u"search",
  98                                  u"type": u"application/opensearchdescription+xml"})
  99
 100
 101     def add_item_elements(self, handler, item):
 102         """ modified from Atom1Feed.add_item_elements """
 103         handler.addQuickElement(u"title", item['title'])
 104
 105         # add a OPDS Navigation link if there's no enclosure
 106         if item['enclosure'] is None:
 107             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 108             # add a "green book" icon
 109             handler.addQuickElement(u"link", '',
 110                 {u"rel": u"http://opds-spec.org/thumbnail",
 111                  u"href": self._book_parent_img,
 112                  u"length": self._book_parent_img_size,
 113                  u"type": u"image/png"})
 114         if item['pubdate'] is not None:
 115             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 116
 117         # Author information.
 118         if item['author_name'] is not None:
 119             handler.startElement(u"author", {})
 120             handler.addQuickElement(u"name", item['author_name'])
 121             if item['author_email'] is not None:
 122                 handler.addQuickElement(u"email", item['author_email'])
 123             if item['author_link'] is not None:
 124                 handler.addQuickElement(u"uri", item['author_link'])
 125             handler.endElement(u"author")
 126
 127         # Unique ID.
 128         if item['unique_id'] is not None:
 129             unique_id = item['unique_id']
 130         else:
 131             unique_id = get_tag_uri(item['link'], item['pubdate'])
 132         handler.addQuickElement(u"id", unique_id)
 133
 134         # Summary.
 135         # OPDS needs type=text
 136         if item['description'] is not None:
 137             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 138
 139         # Enclosure as OPDS Acquisition Link
 140         if item['enclosure'] is not None:
 141             handler.addQuickElement(u"link", '',
 142                 {u"rel": u"http://opds-spec.org/acquisition",
 143                  u"href": item['enclosure'].url,
 144                  u"length": item['enclosure'].length,
 145                  u"type": item['enclosure'].mime_type})
 146             # add a "red book" icon
 147             handler.addQuickElement(u"link", '',
 148                 {u"rel": u"http://opds-spec.org/thumbnail",
 149                  u"href": self._book_img,
 150                  u"length": self._book_img_size,
 151                  u"type": u"image/png"})
 152
 153         # Categories.
 154         for cat in item['categories']:
 155             handler.addQuickElement(u"category", u"", {u"term": cat})
 156
 157         # Rights.
 158         if item['item_copyright'] is not None:
 159             handler.addQuickElement(u"rights", item['item_copyright'])
 160
 161
 162 class AcquisitionFeed(Feed):
 163     feed_type = OPDSFeed
 164     link = u'http://www.wolnelektury.pl/'
 165     item_enclosure_mime_type = "application/epub+zip"
 166     author_name = u"Wolne Lektury"
 167     author_link = u"http://www.wolnelektury.pl/"
 168
 169     def item_title(self, book):
 170         return book.title
 171
 172     def item_description(self):
 173         return u''
 174
 175     def item_link(self, book):
 176         return book.get_absolute_url()
 177
 178     def item_author_name(self, book):
 179         try:
 180             return book.tags.filter(category='author')[0].name
 181         except KeyError:
 182             return u''
 183
 184     def item_author_link(self, book):
 185         try:
 186             return book.tags.filter(category='author')[0].get_absolute_url()
 187         except KeyError:
 188             return u''
 189
 190     def item_enclosure_url(self, book):
 191         return full_url(book.epub_file.url) if book.epub_file else None
 192
 193     def item_enclosure_length(self, book):
 194         return book.epub_file.size if book.epub_file else None
 195
 196 @piwik_track
 197 class RootFeed(Feed):
 198     feed_type = OPDSFeed
 199     title = u'Wolne Lektury'
 200     link = u'http://wolnelektury.pl/'
 201     description = u"Spis utworów na stronie http://WolneLektury.pl"
 202     author_name = u"Wolne Lektury"
 203     author_link = u"http://wolnelektury.pl/"
 204
 205     def items(self):
 206         return _root_feeds
 207
 208     def item_title(self, item):
 209         return item['title']
 210
 211     def item_link(self, item):
 212         return reverse(item['link'], args=item['link_args'])
 213
 214     def item_description(self, item):
 215         return item['description']
 216
 217 @piwik_track
 218 class ByCategoryFeed(Feed):
 219     feed_type = OPDSFeed
 220     link = u'http://wolnelektury.pl/'
 221     description = u"Spis utworów na stronie http://WolneLektury.pl"
 222     author_name = u"Wolne Lektury"
 223     author_link = u"http://wolnelektury.pl/"
 224
 225     def get_object(self, request, category):
 226         feed = [feed for feed in _root_feeds if feed['category']==category]
 227         if feed:
 228             feed = feed[0]
 229         else:
 230             raise Http404
 231
 232         return feed
 233
 234     def title(self, feed):
 235         return feed['title']
 236
 237     def items(self, feed):
 238         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
 239
 240     def item_title(self, item):
 241         return item.name
 242
 243     def item_link(self, item):
 244         return reverse("opds_by_tag", args=[item.category, item.slug])
 245
 246     def item_description(self):
 247         return u''
 248
 249 @piwik_track
 250 class ByTagFeed(AcquisitionFeed):
 251     def link(self, tag):
 252         return tag.get_absolute_url()
 253
 254     def title(self, tag):
 255         return tag.name
 256
 257     def description(self, tag):
 258         return u"Spis utworów na stronie http://WolneLektury.pl"
 259
 260     def get_object(self, request, category, slug):
 261         return get_object_or_404(Tag, category=category, slug=slug)
 262
 263     def items(self, tag):
 264         books = Book.tagged.with_any([tag])
 265         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
 266         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 267         if descendants_keys:
 268             books = books.exclude(pk__in=descendants_keys)
 269
 270         return books
 271
 272
 273 @factory_decorator(logged_in_or_basicauth())
 274 @piwik_track
 275 class UserFeed(Feed):
 276     feed_type = OPDSFeed
 277     link = u'http://www.wolnelektury.pl/'
 278     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 279     author_name = u"Wolne Lektury"
 280     author_link = u"http://wolnelektury.pl/"
 281
 282     def get_object(self, request):
 283         return request.user
 284
 285     def title(self, user):
 286         return u"Półki użytkownika %s" % user.username
 287
 288     def items(self, user):
 289         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
 290
 291     def item_title(self, item):
 292         return item.name
 293
 294     def item_link(self, item):
 295         return reverse("opds_user_set", args=[item.slug])
 296
 297     def item_description(self):
 298         return u''
 299
 300 # no class decorators in python 2.5
 301 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
 302
 303
 304 @factory_decorator(logged_in_or_basicauth())
 305 @piwik_track
 306 class UserSetFeed(AcquisitionFeed):
 307     def link(self, tag):
 308         return tag.get_absolute_url()
 309
 310     def title(self, tag):
 311         return tag.name
 312
 313     def description(self, tag):
 314         return u"Spis utworów na stronie http://WolneLektury.pl"
 315
 316     def get_object(self, request, slug):
 317         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 318
 319     def items(self, tag):
 320         return Book.tagged.with_any([tag])
 321
 322 # no class decorators in python 2.5
 323 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
 324
 325
 326 @piwik_track
 327 class SearchFeed(AcquisitionFeed):
 328     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 329     title = u"Wyniki wyszukiwania"
 330
 331     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
 332
 333     def get_object(self, request):
 334         """
 335         For OPDS 1.1 We should handle a query for search terms
 336         and criteria provided either as opensearch or 'inline' query.
 337         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 338         atom:title. Inline query provides author, title, categories (treated as book tags),
 339         description (treated as content search terms).
 340
 341         if search terms are provided, we shall search for books
 342         according to Hint information (from author & contributror & title).
 343
 344         but if search terms are empty, we should do a different search
 345         (perhaps for is_book=True)
 346
 347         """
 348         JVM.attachCurrentThread()
 349
 350         query = request.GET.get('q', '')
 351
 352         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 353         if inline_criteria:
 354             def get_criteria(criteria, name, position):
 355                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
 356                 log.info("get_criteria: %s" % e)
 357                 if not e:
 358                     return None
 359                 c = e[0][position]
 360                 log.info("get_criteria: %s" % c)
 361                 if c[0] == '"' and c[-1] == '"':
 362                     c = c[1:-1]
 363                     c = c.replace('+', ' ')
 364                 return c
 365
 366             author = get_criteria(inline_criteria, 'author', 1)
 367             title = get_criteria(inline_criteria, 'title', 2)
 368             translator = None
 369             categories = get_criteria(inline_criteria, 'categories', 3)
 370             query = get_criteria(inline_criteria, 'description', 4)
 371         else:
 372             author = request.GET.get('author', '')
 373             title = request.GET.get('title', '')
 374             translator = request.GET.get('translator', '')
 375
 376             # Our client didn't handle the opds placeholders
 377             if author == '{atom:author}': author = ''
 378             if title == '{atom:title}': title = ''
 379             if translator == '{atom:contributor}': translator = ''
 380             categories = None
 381             fuzzy = False
 382
 383         srch = get_search()
 384         hint = srch.hint()
 385
 386         # Scenario 1: full search terms provided.
 387         # Use auxiliarry information to narrow it and make it better.
 388         if query:
 389             filters = []
 390
 391             if author:
 392                 log.info( "narrow to author %s" % author)
 393                 hint.tags(srch.search_tags(srch.make_phrase(srch.get_tokens(author, field='authors'), field='authors'),
 394                                             filt=srch.term_filter(Term('tag_category', 'author'))))
 395
 396             if translator:
 397                 log.info( "filter by translator %s" % translator)
 398                 filters.append(QueryWrapperFilter(
 399                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
 400                                      field='translators')))
 401
 402             if categories:
 403                 filters.append(QueryWrapperFilter(
 404                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
 405                                      field='tag_name_pl')))
 406
 407             flt = srch.chain_filters(filters)
 408             if title:
 409                 log.info( "hint by book title %s" % title)
 410                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
 411                 hint.books(*srch.search_books(q, filt=flt))
 412
 413             toks = srch.get_tokens(query)
 414             log.info("tokens for query: %s" % toks)
 415
 416             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
 417                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
 418                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
 419             results.sort(reverse=True)
 420             books = []
 421             for r in results:
 422                 try:
 423                     books.append(r.book)
 424                 except Book.DoesNotExist:
 425                     pass
 426             log.info("books: %s" % books)
 427             return books
 428         else:
 429             # Scenario 2: since we no longer have to figure out what the query term means to the user,
 430             # we can just use filters and not the Hint class.
 431             filters = []
 432
 433             fields = {
 434                 'author': author,
 435                 'translators': translator,
 436                 'title': title
 437                 }
 438
 439             for fld, q in fields.items():
 440                 if q:
 441                     filters.append(QueryWrapperFilter(
 442                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
 443
 444             flt = srch.chain_filters(filters)
 445             books = srch.search_books(TermQuery(Term('is_book', 'true')), filt=flt)
 446             return books
 447
 448     def get_link(self, query):
 449         return "%s?q=%s" % (reverse('search'), query)
 450
 451     def items(self, books):
 452         try:
 453             return books
 454         except ValueError:
 455             # too short a query
 456             return []