apps/opds/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import os.path
   6 from urlparse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.core.urlresolvers import reverse
  10 from django.shortcuts import get_object_or_404
  11 from django.utils.feedgenerator import Atom1Feed
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15
  16 from basicauth import logged_in_or_basicauth, factory_decorator
  17 from catalogue.models import Book, Tag
  18
  19 from search import Search, SearchResult, JVM
  20 from lucene import Term, QueryWrapperFilter, TermQuery
  21
  22 import re
  23
  24 from stats.utils import piwik_track
  25
  26 _root_feeds = (
  27     {
  28         u"category": u"",
  29         u"link": u"opds_user",
  30         u"link_args": [],
  31         u"title": u"Moje półki",
  32         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  33     },
  34     {
  35         u"category": u"author",
  36         u"link": u"opds_by_category",
  37         u"link_args": [u"author"],
  38         u"title": u"Autorzy",
  39         u"description": u"Utwory wg autorów"
  40     },
  41     {
  42         u"category": u"kind",
  43         u"link": u"opds_by_category",
  44         u"link_args": [u"kind"],
  45         u"title": u"Rodzaje",
  46         u"description": u"Utwory wg rodzajów"
  47     },
  48     {
  49         u"category": u"genre",
  50         u"link": u"opds_by_category",
  51         u"link_args": [u"genre"],
  52         u"title": u"Gatunki",
  53         u"description": u"Utwory wg gatunków"
  54     },
  55     {
  56         u"category": u"epoch",
  57         u"link": u"opds_by_category",
  58         u"link_args": [u"epoch"],
  59         u"title": u"Epoki",
  60         u"description": u"Utwory wg epok"
  61     },
  62 )
  63
  64
  65 def full_url(url):
  66     return urljoin("http://%s" % Site.objects.get_current().domain, url)
  67
  68
  69 class OPDSFeed(Atom1Feed):
  70     link_rel = u"subsection"
  71     link_type = u"application/atom+xml"
  72
  73     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
  74     try:
  75         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  76     except:
  77         _book_parent_img_size = ''
  78
  79     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
  80     try:
  81         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  82     except:
  83         _book_img_size = ''
  84
  85
  86     def add_root_elements(self, handler):
  87         super(OPDSFeed, self).add_root_elements(handler)
  88         handler.addQuickElement(u"link", None,
  89                                 {u"href": reverse("opds_authors"),
  90                                  u"rel": u"start",
  91                                  u"type": u"application/atom+xml"})
  92         handler.addQuickElement(u"link", None,
  93                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  94                                  u"rel": u"search",
  95                                  u"type": u"application/opensearchdescription+xml"})
  96
  97
  98     def add_item_elements(self, handler, item):
  99         """ modified from Atom1Feed.add_item_elements """
 100         handler.addQuickElement(u"title", item['title'])
 101
 102         # add a OPDS Navigation link if there's no enclosure
 103         if item['enclosure'] is None:
 104             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 105             # add a "green book" icon
 106             handler.addQuickElement(u"link", '',
 107                 {u"rel": u"http://opds-spec.org/thumbnail",
 108                  u"href": self._book_parent_img,
 109                  u"length": self._book_parent_img_size,
 110                  u"type": u"image/png"})
 111         if item['pubdate'] is not None:
 112             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 113
 114         # Author information.
 115         if item['author_name'] is not None:
 116             handler.startElement(u"author", {})
 117             handler.addQuickElement(u"name", item['author_name'])
 118             if item['author_email'] is not None:
 119                 handler.addQuickElement(u"email", item['author_email'])
 120             if item['author_link'] is not None:
 121                 handler.addQuickElement(u"uri", item['author_link'])
 122             handler.endElement(u"author")
 123
 124         # Unique ID.
 125         if item['unique_id'] is not None:
 126             unique_id = item['unique_id']
 127         else:
 128             unique_id = get_tag_uri(item['link'], item['pubdate'])
 129         handler.addQuickElement(u"id", unique_id)
 130
 131         # Summary.
 132         # OPDS needs type=text
 133         if item['description'] is not None:
 134             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 135
 136         # Enclosure as OPDS Acquisition Link
 137         if item['enclosure'] is not None:
 138             handler.addQuickElement(u"link", '',
 139                 {u"rel": u"http://opds-spec.org/acquisition",
 140                  u"href": item['enclosure'].url,
 141                  u"length": item['enclosure'].length,
 142                  u"type": item['enclosure'].mime_type})
 143             # add a "red book" icon
 144             handler.addQuickElement(u"link", '',
 145                 {u"rel": u"http://opds-spec.org/thumbnail",
 146                  u"href": self._book_img,
 147                  u"length": self._book_img_size,
 148                  u"type": u"image/png"})
 149
 150         # Categories.
 151         for cat in item['categories']:
 152             handler.addQuickElement(u"category", u"", {u"term": cat})
 153
 154         # Rights.
 155         if item['item_copyright'] is not None:
 156             handler.addQuickElement(u"rights", item['item_copyright'])
 157
 158
 159 class AcquisitionFeed(Feed):
 160     feed_type = OPDSFeed
 161     link = u'http://www.wolnelektury.pl/'
 162     item_enclosure_mime_type = "application/epub+zip"
 163     author_name = u"Wolne Lektury"
 164     author_link = u"http://www.wolnelektury.pl/"
 165
 166     def item_title(self, book):
 167         return book.title
 168
 169     def item_description(self):
 170         return u''
 171
 172     def item_link(self, book):
 173         return book.get_absolute_url()
 174
 175     def item_author_name(self, book):
 176         try:
 177             return book.tags.filter(category='author')[0].name
 178         except KeyError:
 179             return u''
 180
 181     def item_author_link(self, book):
 182         try:
 183             return book.tags.filter(category='author')[0].get_absolute_url()
 184         except KeyError:
 185             return u''
 186
 187     def item_enclosure_url(self, book):
 188         return full_url(book.epub_file.url)
 189
 190     def item_enclosure_length(self, book):
 191         return book.epub_file.size
 192
 193 @piwik_track
 194 class RootFeed(Feed):
 195     feed_type = OPDSFeed
 196     title = u'Wolne Lektury'
 197     link = u'http://www.wolnelektury.pl/'
 198     description = u"Spis utworów na stronie http://WolneLektury.pl"
 199     author_name = u"Wolne Lektury"
 200     author_link = u"http://www.wolnelektury.pl/"
 201
 202     def items(self):
 203         return _root_feeds
 204
 205     def item_title(self, item):
 206         return item['title']
 207
 208     def item_link(self, item):
 209         return reverse(item['link'], args=item['link_args'])
 210
 211     def item_description(self, item):
 212         return item['description']
 213
 214 @piwik_track
 215 class ByCategoryFeed(Feed):
 216     feed_type = OPDSFeed
 217     link = u'http://www.wolnelektury.pl/'
 218     description = u"Spis utworów na stronie http://WolneLektury.pl"
 219     author_name = u"Wolne Lektury"
 220     author_link = u"http://www.wolnelektury.pl/"
 221
 222     def get_object(self, request, category):
 223         feed = [feed for feed in _root_feeds if feed['category']==category]
 224         if feed:
 225             feed = feed[0]
 226         else:
 227             raise Http404
 228
 229         return feed
 230
 231     def title(self, feed):
 232         return feed['title']
 233
 234     def items(self, feed):
 235         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
 236
 237     def item_title(self, item):
 238         return item.name
 239
 240     def item_link(self, item):
 241         return reverse("opds_by_tag", args=[item.category, item.slug])
 242
 243     def item_description(self):
 244         return u''
 245
 246 @piwik_track
 247 class ByTagFeed(AcquisitionFeed):
 248     def link(self, tag):
 249         return tag.get_absolute_url()
 250
 251     def title(self, tag):
 252         return tag.name
 253
 254     def description(self, tag):
 255         return u"Spis utworów na stronie http://WolneLektury.pl"
 256
 257     def get_object(self, request, category, slug):
 258         return get_object_or_404(Tag, category=category, slug=slug)
 259
 260     def items(self, tag):
 261         books = Book.tagged.with_any([tag])
 262         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
 263         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 264         if descendants_keys:
 265             books = books.exclude(pk__in=descendants_keys)
 266
 267         return books
 268
 269
 270 @factory_decorator(logged_in_or_basicauth())
 271 @piwik_track
 272 class UserFeed(Feed):
 273     feed_type = OPDSFeed
 274     link = u'http://www.wolnelektury.pl/'
 275     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 276     author_name = u"Wolne Lektury"
 277     author_link = u"http://www.wolnelektury.pl/"
 278
 279     def get_object(self, request):
 280         return request.user
 281
 282     def title(self, user):
 283         return u"Półki użytkownika %s" % user.username
 284
 285     def items(self, user):
 286         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
 287
 288     def item_title(self, item):
 289         return item.name
 290
 291     def item_link(self, item):
 292         return reverse("opds_user_set", args=[item.slug])
 293
 294     def item_description(self):
 295         return u''
 296
 297 # no class decorators in python 2.5
 298 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
 299
 300
 301 @factory_decorator(logged_in_or_basicauth())
 302 @piwik_track
 303 class UserSetFeed(AcquisitionFeed):
 304     def link(self, tag):
 305         return tag.get_absolute_url()
 306
 307     def title(self, tag):
 308         return tag.name
 309
 310     def description(self, tag):
 311         return u"Spis utworów na stronie http://WolneLektury.pl"
 312
 313     def get_object(self, request, slug):
 314         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 315
 316     def items(self, tag):
 317         return Book.tagged.with_any([tag])
 318
 319 # no class decorators in python 2.5
 320 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
 321
 322
 323 @piwik_track
 324 class SearchFeed(AcquisitionFeed):
 325     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 326     title = u"Wyniki wyszukiwania"
 327
 328     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
 329
 330     def get_object(self, request):
 331         """
 332         For OPDS 1.1 We should handle a query for search terms
 333         and criteria provided either as opensearch or 'inline' query.
 334         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 335         atom:title. Inline query provides author, title, categories (treated as book tags),
 336         description (treated as content search terms).
 337
 338         if search terms are provided, we shall search for books
 339         according to Hint information (from author & contributror & title).
 340
 341         but if search terms are empty, we should do a different search
 342         (perhaps for is_book=True)
 343
 344         """
 345         JVM.attachCurrentThread()
 346
 347         query = request.GET.get('q', '')
 348
 349         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 350         if inline_criteria:
 351             def get_criteria(criteria, name, position):
 352                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
 353                 print e
 354                 if not e:
 355                     return None
 356                 c = e[0][position]
 357                 print c
 358                 if c[0] == '"' and c[-1] == '"':
 359                     c = c[1:-1]
 360                     c = c.replace('+', ' ')
 361                 return c
 362
 363             #import pdb; pdb.set_trace()
 364             author = get_criteria(inline_criteria, 'author', 1)
 365             title = get_criteria(inline_criteria, 'title', 2)
 366             translator = None
 367             categories = get_criteria(inline_criteria, 'categories', 3)
 368             query = get_criteria(inline_criteria, 'description', 4)
 369         else:
 370             author = request.GET.get('author', '')
 371             title = request.GET.get('title', '')
 372             translator = request.GET.get('translator', '')
 373             categories = None
 374             fuzzy = False
 375
 376
 377         srch = Search()
 378         hint = srch.hint()
 379
 380         # Scenario 1: full search terms provided.
 381         # Use auxiliarry information to narrow it and make it better.
 382         if query:
 383             filters = []
 384
 385             if author:
 386                 print "narrow to author %s" % author
 387                 hint.tags(srch.search_tags(author, filter=srch.term_filter(Term('tag_category', 'author'))))
 388
 389             if translator:
 390                 print "filter by translator %s" % translator
 391                 filters.append(QueryWrapperFilter(
 392                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
 393                                      field='translators')))
 394
 395             if categories:
 396                 filters.append(QueryWrapperFilter(
 397                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
 398                                      field='tag_name_pl')))
 399
 400             flt = srch.chain_filters(filters)
 401             if title:
 402                 print "hint by book title %s" % title
 403                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
 404                 hint.books(*srch.search_books(q, filter=flt))
 405
 406             toks = srch.get_tokens(query)
 407             print "tokens: %s" % toks
 408             #            import pdb; pdb.set_trace()
 409             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
 410                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
 411                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
 412             results.sort(reverse=True)
 413             return [r.book for r in results]
 414         else:
 415             # Scenario 2: since we no longer have to figure out what the query term means to the user,
 416             # we can just use filters and not the Hint class.
 417             filters = []
 418
 419             fields = {
 420                 'author': author,
 421                 'translators': translator,
 422                 'title': title
 423                 }
 424
 425             for fld, q in fields.items():
 426                 if q:
 427                     filters.append(QueryWrapperFilter(
 428                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
 429
 430             flt = srch.chain_filters(filters)
 431             books = srch.search_books(TermQuery(Term('is_book', 'true')), filter=flt)
 432             return books
 433
 434     def get_link(self, query):
 435         return "%s?q=%s" % (reverse('search'), query)
 436
 437     def items(self, books):
 438         try:
 439             return books
 440         except ValueError:
 441             # too short a query
 442             return []