apps/opds/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from base64 import b64encode
   6 import os.path
   7 from urlparse import urljoin
   8 from urllib2 import unquote
   9
  10 from django.contrib.syndication.views import Feed
  11 from django.core.urlresolvers import reverse
  12 from django.shortcuts import get_object_or_404
  13 from django.utils.feedgenerator import Atom1Feed
  14 from django.conf import settings
  15 from django.http import Http404
  16 from django.contrib.sites.models import Site
  17
  18 from basicauth import logged_in_or_basicauth, factory_decorator
  19 from catalogue.models import Book, Tag
  20
  21 from search import MultiSearch, SearchResult, JVM
  22 from lucene import Term, QueryWrapperFilter, TermQuery
  23
  24 import re
  25
  26 from stats.utils import piwik_track
  27
  28 _root_feeds = (
  29     {
  30         u"category": u"",
  31         u"link": u"opds_user",
  32         u"link_args": [],
  33         u"title": u"Moje półki",
  34         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  35     },
  36     {
  37         u"category": u"author",
  38         u"link": u"opds_by_category",
  39         u"link_args": [u"author"],
  40         u"title": u"Autorzy",
  41         u"description": u"Utwory wg autorów"
  42     },
  43     {
  44         u"category": u"kind",
  45         u"link": u"opds_by_category",
  46         u"link_args": [u"kind"],
  47         u"title": u"Rodzaje",
  48         u"description": u"Utwory wg rodzajów"
  49     },
  50     {
  51         u"category": u"genre",
  52         u"link": u"opds_by_category",
  53         u"link_args": [u"genre"],
  54         u"title": u"Gatunki",
  55         u"description": u"Utwory wg gatunków"
  56     },
  57     {
  58         u"category": u"epoch",
  59         u"link": u"opds_by_category",
  60         u"link_args": [u"epoch"],
  61         u"title": u"Epoki",
  62         u"description": u"Utwory wg epok"
  63     },
  64 )
  65
  66
  67 def full_url(url):
  68     return urljoin("http://%s" % Site.objects.get_current().domain, url)
  69
  70
  71 class OPDSFeed(Atom1Feed):
  72     link_rel = u"subsection"
  73     link_type = u"application/atom+xml"
  74
  75     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
  76     try:
  77         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  78     except:
  79         _book_parent_img_size = ''
  80
  81     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
  82     try:
  83         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  84     except:
  85         _book_img_size = ''
  86
  87
  88     def add_root_elements(self, handler):
  89         super(OPDSFeed, self).add_root_elements(handler)
  90         handler.addQuickElement(u"link", None,
  91                                 {u"href": reverse("opds_authors"),
  92                                  u"rel": u"start",
  93                                  u"type": u"application/atom+xml"})
  94         handler.addQuickElement(u"link", None,
  95                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  96                                  u"rel": u"search",
  97                                  u"type": u"application/opensearchdescription+xml"})
  98
  99
 100     def add_item_elements(self, handler, item):
 101         """ modified from Atom1Feed.add_item_elements """
 102         handler.addQuickElement(u"title", item['title'])
 103
 104         # add a OPDS Navigation link if there's no enclosure
 105         if item['enclosure'] is None:
 106             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 107             # add a "green book" icon
 108             handler.addQuickElement(u"link", '',
 109                 {u"rel": u"http://opds-spec.org/thumbnail",
 110                  u"href": self._book_parent_img,
 111                  u"length": self._book_parent_img_size,
 112                  u"type": u"image/png"})
 113         if item['pubdate'] is not None:
 114             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 115
 116         # Author information.
 117         if item['author_name'] is not None:
 118             handler.startElement(u"author", {})
 119             handler.addQuickElement(u"name", item['author_name'])
 120             if item['author_email'] is not None:
 121                 handler.addQuickElement(u"email", item['author_email'])
 122             if item['author_link'] is not None:
 123                 handler.addQuickElement(u"uri", item['author_link'])
 124             handler.endElement(u"author")
 125
 126         # Unique ID.
 127         if item['unique_id'] is not None:
 128             unique_id = item['unique_id']
 129         else:
 130             unique_id = get_tag_uri(item['link'], item['pubdate'])
 131         handler.addQuickElement(u"id", unique_id)
 132
 133         # Summary.
 134         # OPDS needs type=text
 135         if item['description'] is not None:
 136             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 137
 138         # Enclosure as OPDS Acquisition Link
 139         if item['enclosure'] is not None:
 140             handler.addQuickElement(u"link", '',
 141                 {u"rel": u"http://opds-spec.org/acquisition",
 142                  u"href": item['enclosure'].url,
 143                  u"length": item['enclosure'].length,
 144                  u"type": item['enclosure'].mime_type})
 145             # add a "red book" icon
 146             handler.addQuickElement(u"link", '',
 147                 {u"rel": u"http://opds-spec.org/thumbnail",
 148                  u"href": self._book_img,
 149                  u"length": self._book_img_size,
 150                  u"type": u"image/png"})
 151
 152         # Categories.
 153         for cat in item['categories']:
 154             handler.addQuickElement(u"category", u"", {u"term": cat})
 155
 156         # Rights.
 157         if item['item_copyright'] is not None:
 158             handler.addQuickElement(u"rights", item['item_copyright'])
 159
 160
 161 class AcquisitionFeed(Feed):
 162     feed_type = OPDSFeed
 163     link = u'http://www.wolnelektury.pl/'
 164     item_enclosure_mime_type = "application/epub+zip"
 165     author_name = u"Wolne Lektury"
 166     author_link = u"http://www.wolnelektury.pl/"
 167
 168     def item_title(self, book):
 169         return book.title
 170
 171     def item_description(self):
 172         return u''
 173
 174     def item_link(self, book):
 175         return book.get_absolute_url()
 176
 177     def item_author_name(self, book):
 178         try:
 179             return book.tags.filter(category='author')[0].name
 180         except KeyError:
 181             return u''
 182
 183     def item_author_link(self, book):
 184         try:
 185             return book.tags.filter(category='author')[0].get_absolute_url()
 186         except KeyError:
 187             return u''
 188
 189     def item_enclosure_url(self, book):
 190         return full_url(book.root_ancestor.epub_file.url)
 191
 192     def item_enclosure_length(self, book):
 193         return book.root_ancestor.epub_file.size
 194
 195 @piwik_track
 196 class RootFeed(Feed):
 197     feed_type = OPDSFeed
 198     title = u'Wolne Lektury'
 199     link = u'http://www.wolnelektury.pl/'
 200     description = u"Spis utworów na stronie http://WolneLektury.pl"
 201     author_name = u"Wolne Lektury"
 202     author_link = u"http://www.wolnelektury.pl/"
 203
 204     def items(self):
 205         return _root_feeds
 206
 207     def item_title(self, item):
 208         return item['title']
 209
 210     def item_link(self, item):
 211         return reverse(item['link'], args=item['link_args'])
 212
 213     def item_description(self, item):
 214         return item['description']
 215
 216 @piwik_track
 217 class ByCategoryFeed(Feed):
 218     feed_type = OPDSFeed
 219     link = u'http://www.wolnelektury.pl/'
 220     description = u"Spis utworów na stronie http://WolneLektury.pl"
 221     author_name = u"Wolne Lektury"
 222     author_link = u"http://www.wolnelektury.pl/"
 223
 224     def get_object(self, request, category):
 225         feed = [feed for feed in _root_feeds if feed['category']==category]
 226         if feed:
 227             feed = feed[0]
 228         else:
 229             raise Http404
 230
 231         return feed
 232
 233     def title(self, feed):
 234         return feed['title']
 235
 236     def items(self, feed):
 237         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
 238
 239     def item_title(self, item):
 240         return item.name
 241
 242     def item_link(self, item):
 243         return reverse("opds_by_tag", args=[item.category, item.slug])
 244
 245     def item_description(self):
 246         return u''
 247
 248 @piwik_track
 249 class ByTagFeed(AcquisitionFeed):
 250     def link(self, tag):
 251         return tag.get_absolute_url()
 252
 253     def title(self, tag):
 254         return tag.name
 255
 256     def description(self, tag):
 257         return u"Spis utworów na stronie http://WolneLektury.pl"
 258
 259     def get_object(self, request, category, slug):
 260         return get_object_or_404(Tag, category=category, slug=slug)
 261
 262     def items(self, tag):
 263         books = Book.tagged.with_any([tag])
 264         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
 265         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 266         if descendants_keys:
 267             books = books.exclude(pk__in=descendants_keys)
 268
 269         return books
 270
 271
 272 @factory_decorator(logged_in_or_basicauth())
 273 @piwik_track
 274 class UserFeed(Feed):
 275     feed_type = OPDSFeed
 276     link = u'http://www.wolnelektury.pl/'
 277     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 278     author_name = u"Wolne Lektury"
 279     author_link = u"http://www.wolnelektury.pl/"
 280
 281     def get_object(self, request):
 282         return request.user
 283
 284     def title(self, user):
 285         return u"Półki użytkownika %s" % user.username
 286
 287     def items(self, user):
 288         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
 289
 290     def item_title(self, item):
 291         return item.name
 292
 293     def item_link(self, item):
 294         return reverse("opds_user_set", args=[item.slug])
 295
 296     def item_description(self):
 297         return u''
 298
 299 # no class decorators in python 2.5
 300 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
 301
 302
 303 @factory_decorator(logged_in_or_basicauth())
 304 @piwik_track
 305 class UserSetFeed(AcquisitionFeed):
 306     def link(self, tag):
 307         return tag.get_absolute_url()
 308
 309     def title(self, tag):
 310         return tag.name
 311
 312     def description(self, tag):
 313         return u"Spis utworów na stronie http://WolneLektury.pl"
 314
 315     def get_object(self, request, slug):
 316         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 317
 318     def items(self, tag):
 319         return Book.tagged.with_any([tag])
 320
 321 # no class decorators in python 2.5
 322 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
 323
 324
 325 @piwik_track
 326 class SearchFeed(AcquisitionFeed):
 327     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 328     title = u"Wyniki wyszukiwania"
 329
 330     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
 331
 332     def get_object(self, request):
 333         """
 334         For OPDS 1.1 We should handle a query for search terms
 335         and criteria provided either as opensearch or 'inline' query.
 336         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 337         atom:title. Inline query provides author, title, categories (treated as book tags),
 338         description (treated as content search terms).
 339
 340         if search terms are provided, we shall search for books
 341         according to Hint information (from author & contributror & title).
 342
 343         but if search terms are empty, we should do a different search
 344         (perhaps for is_book=True)
 345
 346         """
 347         JVM.attachCurrentThread()
 348
 349         query = request.GET.get('q', '')
 350
 351         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 352         if inline_criteria:
 353             def get_criteria(criteria, name, position):
 354                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
 355                 print e
 356                 if not e:
 357                     return None
 358                 c = e[0][position]
 359                 print c
 360                 if c[0] == '"' and c[-1] == '"':
 361                     c = c[1:-1]
 362                     c = c.replace('+', ' ')
 363                 return c
 364
 365             #import pdb; pdb.set_trace()
 366             author = get_criteria(inline_criteria, 'author', 1)
 367             title = get_criteria(inline_criteria, 'title', 2)
 368             translator = None
 369             categories = get_criteria(inline_criteria, 'categories', 3)
 370             query = get_criteria(inline_criteria, 'description', 4)
 371         else:
 372             author = request.GET.get('author', '')
 373             title = request.GET.get('title', '')
 374             translator = request.GET.get('translator', '')
 375             categories = None
 376             fuzzy = False
 377
 378
 379         srch = MultiSearch()
 380         hint = srch.hint()
 381
 382         # Scenario 1: full search terms provided.
 383         # Use auxiliarry information to narrow it and make it better.
 384         if query:
 385             filters = []
 386
 387             if author:
 388                 print "narrow to author %s" % author
 389                 hint.tags(srch.search_tags(author, filter=srch.term_filter(Term('tag_category', 'author'))))
 390
 391             if translator:
 392                 print "filter by translator %s" % translator
 393                 filters.append(QueryWrapperFilter(
 394                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
 395                                      field='translators')))
 396
 397             if categories:
 398                 filters.append(QueryWrapperFilter(
 399                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
 400                                      field='tag_name_pl')))
 401
 402             flt = srch.chain_filters(filters)
 403             if title:
 404                 print "hint by book title %s" % title
 405                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
 406                 hint.books(*srch.search_books(q, filter=flt))
 407
 408             toks = srch.get_tokens(query)
 409             print "tokens: %s" % toks
 410             #            import pdb; pdb.set_trace()
 411             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
 412                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
 413                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
 414             results.sort(reverse=True)
 415             return [r.book for r in results]
 416         else:
 417             # Scenario 2: since we no longer have to figure out what the query term means to the user,
 418             # we can just use filters and not the Hint class.
 419             filters = []
 420
 421             fields = {
 422                 'author': author,
 423                 'translators': translator,
 424                 'title': title
 425                 }
 426
 427             for fld, q in fields.items():
 428                 if q:
 429                     filters.append(QueryWrapperFilter(
 430                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
 431
 432             flt = srch.chain_filters(filters)
 433             books = srch.search_books(TermQuery(Term('is_book', 'true')), filter=flt)
 434             return books
 435
 436     def get_link(self, query):
 437         return "%s?q=%s" % (reverse('search'), query)
 438
 439     def items(self, books):
 440         try:
 441             return books
 442         except ValueError:
 443             # too short a query
 444             return []