apps/opds/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from base64 import b64encode
   6 import os.path
   7 from urlparse import urljoin
   8 from urllib2 import unquote
   9
  10 from django.contrib.syndication.views import Feed
  11 from django.core.urlresolvers import reverse
  12 from django.shortcuts import get_object_or_404
  13 from django.utils.feedgenerator import Atom1Feed
  14 from django.conf import settings
  15 from django.http import Http404
  16 from django.contrib.sites.models import Site
  17
  18 from basicauth import logged_in_or_basicauth, factory_decorator
  19 from catalogue.models import Book, Tag
  20
  21 from search import MultiSearch, SearchResult, JVM
  22 from lucene import Term, QueryWrapperFilter, TermQuery
  23
  24 import re
  25
  26 from stats.utils import piwik_track
  27
  28 _root_feeds = (
  29     {
  30         u"category": u"",
  31         u"link": u"opds_user",
  32         u"link_args": [],
  33         u"title": u"Moje półki",
  34         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  35     },
  36     {
  37         u"category": u"author",
  38         u"link": u"opds_by_category",
  39         u"link_args": [u"author"],
  40         u"title": u"Autorzy",
  41         u"description": u"Utwory wg autorów"
  42     },
  43     {
  44         u"category": u"kind",
  45         u"link": u"opds_by_category",
  46         u"link_args": [u"kind"],
  47         u"title": u"Rodzaje",
  48         u"description": u"Utwory wg rodzajów"
  49     },
  50     {
  51         u"category": u"genre",
  52         u"link": u"opds_by_category",
  53         u"link_args": [u"genre"],
  54         u"title": u"Gatunki",
  55         u"description": u"Utwory wg gatunków"
  56     },
  57     {
  58         u"category": u"epoch",
  59         u"link": u"opds_by_category",
  60         u"link_args": [u"epoch"],
  61         u"title": u"Epoki",
  62         u"description": u"Utwory wg epok"
  63     },
  64 )
  65
  66
  67 def full_url(url):
  68     return urljoin("http://%s" % Site.objects.get_current().domain, url)
  69
  70
  71 class OPDSFeed(Atom1Feed):
  72     link_rel = u"subsection"
  73     link_type = u"application/atom+xml"
  74
  75     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
  76     try:
  77         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  78     except:
  79         _book_parent_img_size = ''
  80
  81     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
  82     try:
  83         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  84     except:
  85         _book_img_size = ''
  86
  87
  88     def add_root_elements(self, handler):
  89         super(OPDSFeed, self).add_root_elements(handler)
  90         handler.addQuickElement(u"link", None,
  91                                 {u"href": reverse("opds_authors"),
  92                                  u"rel": u"start",
  93                                  u"type": u"application/atom+xml"})
  94         handler.addQuickElement(u"link", None,
  95                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  96                                  u"rel": u"search",
  97                                  u"type": u"application/opensearchdescription+xml"})
  98
  99
 100     def add_item_elements(self, handler, item):
 101         """ modified from Atom1Feed.add_item_elements """
 102         handler.addQuickElement(u"title", item['title'])
 103
 104         # add a OPDS Navigation link if there's no enclosure
 105         if item['enclosure'] is None:
 106             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 107             # add a "green book" icon
 108             handler.addQuickElement(u"link", '',
 109                 {u"rel": u"http://opds-spec.org/thumbnail",
 110                  u"href": self._book_parent_img,
 111                  u"length": self._book_parent_img_size,
 112                  u"type": u"image/png"})
 113         if item['pubdate'] is not None:
 114             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 115
 116         # Author information.
 117         if item['author_name'] is not None:
 118             handler.startElement(u"author", {})
 119             handler.addQuickElement(u"name", item['author_name'])
 120             if item['author_email'] is not None:
 121                 handler.addQuickElement(u"email", item['author_email'])
 122             if item['author_link'] is not None:
 123                 handler.addQuickElement(u"uri", item['author_link'])
 124             handler.endElement(u"author")
 125
 126         # Unique ID.
 127         if item['unique_id'] is not None:
 128             unique_id = item['unique_id']
 129         else:
 130             unique_id = get_tag_uri(item['link'], item['pubdate'])
 131         handler.addQuickElement(u"id", unique_id)
 132
 133         # Summary.
 134         # OPDS needs type=text
 135         if item['description'] is not None:
 136             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 137
 138         # Enclosure as OPDS Acquisition Link
 139         if item['enclosure'] is not None:
 140             handler.addQuickElement(u"link", '',
 141                 {u"rel": u"http://opds-spec.org/acquisition",
 142                  u"href": item['enclosure'].url,
 143                  u"length": item['enclosure'].length,
 144                  u"type": item['enclosure'].mime_type})
 145             # add a "red book" icon
 146             handler.addQuickElement(u"link", '',
 147                 {u"rel": u"http://opds-spec.org/thumbnail",
 148                  u"href": self._book_img,
 149                  u"length": self._book_img_size,
 150                  u"type": u"image/png"})
 151
 152         # Categories.
 153         for cat in item['categories']:
 154             handler.addQuickElement(u"category", u"", {u"term": cat})
 155
 156         # Rights.
 157         if item['item_copyright'] is not None:
 158             handler.addQuickElement(u"rights", item['item_copyright'])
 159
 160
 161 class AcquisitionFeed(Feed):
 162     feed_type = OPDSFeed
 163     link = u'http://www.wolnelektury.pl/'
 164     item_enclosure_mime_type = "application/epub+zip"
 165     author_name = u"Wolne Lektury"
 166     author_link = u"http://www.wolnelektury.pl/"
 167
 168     def item_title(self, book):
 169         return book.title
 170
 171     def item_description(self):
 172         return u''
 173
 174     def item_link(self, book):
 175         return book.get_absolute_url()
 176
 177     def item_author_name(self, book):
 178         try:
 179             return book.tags.filter(category='author')[0].name
 180         except KeyError:
 181             return u''
 182
 183     def item_author_link(self, book):
 184         try:
 185             return book.tags.filter(category='author')[0].get_absolute_url()
 186         except KeyError:
 187             return u''
 188
 189     def item_enclosure_url(self, book):
 190         return full_url(book.root_ancestor.epub_file.url)
 191
 192     def item_enclosure_length(self, book):
 193         return book.root_ancestor.epub_file.size
 194
 195 @piwik_track
 196 class RootFeed(Feed):
 197     feed_type = OPDSFeed
 198     title = u'Wolne Lektury'
 199     link = u'http://www.wolnelektury.pl/'
 200     description = u"Spis utworów na stronie http://WolneLektury.pl"
 201     author_name = u"Wolne Lektury"
 202     author_link = u"http://www.wolnelektury.pl/"
 203
 204     def items(self):
 205         return _root_feeds
 206
 207     def item_title(self, item):
 208         return item['title']
 209
 210     def item_link(self, item):
 211         return reverse(item['link'], args=item['link_args'])
 212
 213     def item_description(self, item):
 214         return item['description']
 215
 216 @piwik_track
 217 class ByCategoryFeed(Feed):
 218     feed_type = OPDSFeed
 219     link = u'http://www.wolnelektury.pl/'
 220     description = u"Spis utworów na stronie http://WolneLektury.pl"
 221     author_name = u"Wolne Lektury"
 222     author_link = u"http://www.wolnelektury.pl/"
 223
 224     def get_object(self, request, category):
 225         feed = [feed for feed in _root_feeds if feed['category']==category]
 226         if feed:
 227             feed = feed[0]
 228         else:
 229             raise Http404
 230
 231         return feed
 232
 233     def title(self, feed):
 234         return feed['title']
 235
 236     def items(self, feed):
 237         return (tag for tag in Tag.objects.filter(category=feed['category']) if tag.get_count() > 0)
 238
 239     def item_title(self, item):
 240         return item.name
 241
 242     def item_link(self, item):
 243         return reverse("opds_by_tag", args=[item.category, item.slug])
 244
 245     def item_description(self):
 246         return u''
 247
 248 @piwik_track
 249 class ByTagFeed(AcquisitionFeed):
 250     def link(self, tag):
 251         return tag.get_absolute_url()
 252
 253     def title(self, tag):
 254         return tag.name
 255
 256     def description(self, tag):
 257         return u"Spis utworów na stronie http://WolneLektury.pl"
 258
 259     def get_object(self, request, category, slug):
 260         return get_object_or_404(Tag, category=category, slug=slug)
 261
 262     def items(self, tag):
 263         books = Book.tagged.with_any([tag])
 264         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
 265         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 266         if descendants_keys:
 267             books = books.exclude(pk__in=descendants_keys)
 268
 269         return books
 270
 271
 272 @factory_decorator(logged_in_or_basicauth())
 273 @piwik_track
 274 class UserFeed(Feed):
 275     feed_type = OPDSFeed
 276     link = u'http://www.wolnelektury.pl/'
 277     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 278     author_name = u"Wolne Lektury"
 279     author_link = u"http://www.wolnelektury.pl/"
 280
 281     def get_object(self, request):
 282         return request.user
 283
 284     def title(self, user):
 285         return u"Półki użytkownika %s" % user.username
 286
 287     def items(self, user):
 288         return (tag for tag in Tag.objects.filter(category='set', user=user) if tag.get_count() > 0)
 289
 290     def item_title(self, item):
 291         return item.name
 292
 293     def item_link(self, item):
 294         return reverse("opds_user_set", args=[item.slug])
 295
 296     def item_description(self):
 297         return u''
 298
 299 # no class decorators in python 2.5
 300 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
 301
 302
 303 @factory_decorator(logged_in_or_basicauth())
 304 @piwik_track
 305 class UserSetFeed(AcquisitionFeed):
 306     def link(self, tag):
 307         return tag.get_absolute_url()
 308
 309     def title(self, tag):
 310         return tag.name
 311
 312     def description(self, tag):
 313         return u"Spis utworów na stronie http://WolneLektury.pl"
 314
 315     def get_object(self, request, slug):
 316         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 317
 318     def items(self, tag):
 319         return Book.tagged.with_any([tag])
 320
 321 # no class decorators in python 2.5
 322 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
 323
 324
 325 @piwik_track
 326 class SearchFeed(AcquisitionFeed):
 327     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 328     title = u"Wyniki wyszukiwania"
 329
 330     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
 331
 332     def get_object(self, request):
 333         """
 334         For OPDS 1.1 We should handle a query for search terms
 335         and atom:author, atom:contributor, atom:title
 336         if search terms are provided, we shall search for books
 337         according to Hint information (from author & contributror & title).
 338
 339         but if search terms are empty, we should do a different search
 340         (perhaps for is_book=True)
 341
 342         """
 343         JVM.attachCurrentThread()
 344
 345         query = request.GET.get('q', '')
 346
 347         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 348         if inline_criteria:
 349             def get_criteria(criteria, name, position):
 350                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
 351                 print e
 352                 if not e:
 353                     return None
 354                 c = e[0][position]
 355                 print c
 356                 if c[0] == '"' and c[-1] == '"':
 357                     c = c[1:-1]
 358                     c = c.replace('+', ' ')
 359                 return c
 360
 361             #import pdb; pdb.set_trace()
 362             author = get_criteria(inline_criteria, 'author', 1)
 363             title = get_criteria(inline_criteria, 'title', 2)
 364             translator = None
 365             categories = get_criteria(inline_criteria, 'categories', 3)
 366             query = get_criteria(inline_criteria, 'description', 4)
 367         else:
 368             author = request.GET.get('author', '')
 369             title = request.GET.get('title', '')
 370             translator = request.GET.get('translator', '')
 371             categories = None
 372             fuzzy = False
 373
 374
 375         srch = MultiSearch()
 376         hint = srch.hint()
 377
 378         # Scenario 1: full search terms provided.
 379         # Use auxiliarry information to narrow it and make it better.
 380         if query:
 381             filters = []
 382
 383             if author:
 384                 print "narrow to author %s" % author
 385                 hint.tags(srch.search_tags(author, filter=srch.term_filter(Term('tag_category', 'author'))))
 386
 387             if translator:
 388                 print "filter by translator %s" % translator
 389                 filters.append(QueryWrapperFilter(
 390                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
 391                                      field='translators')))
 392
 393             if categories:
 394                 filters.append(QueryWrapperFilter(
 395                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
 396                                      field='tag_name_pl')))
 397
 398             flt = srch.chain_filters(filters)
 399             if title:
 400                 print "hint by book title %s" % title
 401                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
 402                 hint.books(*srch.search_books(q, filter=flt))
 403
 404             toks = srch.get_tokens(query)
 405             print "tokens: %s" % toks
 406             #            import pdb; pdb.set_trace()
 407             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
 408                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
 409                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
 410             results.sort(reverse=True)
 411             return [r.book for r in results]
 412         else:
 413             # Scenario 2: since we no longer have to figure out what the query term means to the user,
 414             # we can just use filters and not the Hint class.
 415             filters = []
 416
 417             fields = {
 418                 'author': author,
 419                 'translators': translator,
 420                 'title': title
 421                 }
 422
 423             for fld, q in fields.items():
 424                 if q:
 425                     filters.append(QueryWrapperFilter(
 426                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
 427
 428             flt = srch.chain_filters(filters)
 429             books = srch.search_books(TermQuery(Term('is_book', 'true')), filter=flt)
 430             return books
 431
 432     def get_link(self, query):
 433         return "%s?q=%s" % (reverse('search'), query)
 434
 435     def items(self, books):
 436         try:
 437             return books
 438         except ValueError:
 439             # too short a query
 440             return []