apps/opds/views.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import os.path
   6 from urlparse import urljoin
   7
   8 from django.contrib.syndication.views import Feed
   9 from django.core.urlresolvers import reverse
  10 from django.shortcuts import get_object_or_404
  11 from django.utils.feedgenerator import Atom1Feed
  12 from django.conf import settings
  13 from django.http import Http404
  14 from django.contrib.sites.models import Site
  15
  16 from basicauth import logged_in_or_basicauth, factory_decorator
  17 from catalogue.models import Book, Tag
  18
  19 from search import Search, SearchResult, JVM
  20 from lucene import Term, QueryWrapperFilter, TermQuery
  21
  22 import re
  23
  24 from stats.utils import piwik_track
  25
  26 _root_feeds = (
  27     {
  28         u"category": u"",
  29         u"link": u"opds_user",
  30         u"link_args": [],
  31         u"title": u"Moje półki",
  32         u"description": u"Półki użytkownika dostępne po zalogowaniu"
  33     },
  34     {
  35         u"category": u"author",
  36         u"link": u"opds_by_category",
  37         u"link_args": [u"author"],
  38         u"title": u"Autorzy",
  39         u"description": u"Utwory wg autorów"
  40     },
  41     {
  42         u"category": u"kind",
  43         u"link": u"opds_by_category",
  44         u"link_args": [u"kind"],
  45         u"title": u"Rodzaje",
  46         u"description": u"Utwory wg rodzajów"
  47     },
  48     {
  49         u"category": u"genre",
  50         u"link": u"opds_by_category",
  51         u"link_args": [u"genre"],
  52         u"title": u"Gatunki",
  53         u"description": u"Utwory wg gatunków"
  54     },
  55     {
  56         u"category": u"epoch",
  57         u"link": u"opds_by_category",
  58         u"link_args": [u"epoch"],
  59         u"title": u"Epoki",
  60         u"description": u"Utwory wg epok"
  61     },
  62 )
  63
  64
  65 def full_url(url):
  66     return urljoin("http://%s" % Site.objects.get_current().domain, url)
  67
  68
  69 class OPDSFeed(Atom1Feed):
  70     link_rel = u"subsection"
  71     link_type = u"application/atom+xml"
  72
  73     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
  74     try:
  75         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
  76     except:
  77         _book_parent_img_size = ''
  78
  79     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
  80     try:
  81         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
  82     except:
  83         _book_img_size = ''
  84
  85
  86     def add_root_elements(self, handler):
  87         super(OPDSFeed, self).add_root_elements(handler)
  88         handler.addQuickElement(u"link", None,
  89                                 {u"href": reverse("opds_authors"),
  90                                  u"rel": u"start",
  91                                  u"type": u"application/atom+xml"})
  92         handler.addQuickElement(u"link", None,
  93                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
  94                                  u"rel": u"search",
  95                                  u"type": u"application/opensearchdescription+xml"})
  96
  97
  98     def add_item_elements(self, handler, item):
  99         """ modified from Atom1Feed.add_item_elements """
 100         handler.addQuickElement(u"title", item['title'])
 101
 102         # add a OPDS Navigation link if there's no enclosure
 103         if item['enclosure'] is None:
 104             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
 105             # add a "green book" icon
 106             handler.addQuickElement(u"link", '',
 107                 {u"rel": u"http://opds-spec.org/thumbnail",
 108                  u"href": self._book_parent_img,
 109                  u"length": self._book_parent_img_size,
 110                  u"type": u"image/png"})
 111         if item['pubdate'] is not None:
 112             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
 113
 114         # Author information.
 115         if item['author_name'] is not None:
 116             handler.startElement(u"author", {})
 117             handler.addQuickElement(u"name", item['author_name'])
 118             if item['author_email'] is not None:
 119                 handler.addQuickElement(u"email", item['author_email'])
 120             if item['author_link'] is not None:
 121                 handler.addQuickElement(u"uri", item['author_link'])
 122             handler.endElement(u"author")
 123
 124         # Unique ID.
 125         if item['unique_id'] is not None:
 126             unique_id = item['unique_id']
 127         else:
 128             unique_id = get_tag_uri(item['link'], item['pubdate'])
 129         handler.addQuickElement(u"id", unique_id)
 130
 131         # Summary.
 132         # OPDS needs type=text
 133         if item['description'] is not None:
 134             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
 135
 136         # Enclosure as OPDS Acquisition Link
 137         if item['enclosure'] is not None:
 138             handler.addQuickElement(u"link", '',
 139                 {u"rel": u"http://opds-spec.org/acquisition",
 140                  u"href": item['enclosure'].url,
 141                  u"length": item['enclosure'].length,
 142                  u"type": item['enclosure'].mime_type})
 143             # add a "red book" icon
 144             handler.addQuickElement(u"link", '',
 145                 {u"rel": u"http://opds-spec.org/thumbnail",
 146                  u"href": self._book_img,
 147                  u"length": self._book_img_size,
 148                  u"type": u"image/png"})
 149
 150         # Categories.
 151         for cat in item['categories']:
 152             handler.addQuickElement(u"category", u"", {u"term": cat})
 153
 154         # Rights.
 155         if item['item_copyright'] is not None:
 156             handler.addQuickElement(u"rights", item['item_copyright'])
 157
 158
 159 class AcquisitionFeed(Feed):
 160     feed_type = OPDSFeed
 161     link = u'http://www.wolnelektury.pl/'
 162     item_enclosure_mime_type = "application/epub+zip"
 163     author_name = u"Wolne Lektury"
 164     author_link = u"http://www.wolnelektury.pl/"
 165
 166     def item_title(self, book):
 167         return book.title
 168
 169     def item_description(self):
 170         return u''
 171
 172     def item_link(self, book):
 173         return book.get_absolute_url()
 174
 175     def item_author_name(self, book):
 176         try:
 177             return book.tags.filter(category='author')[0].name
 178         except KeyError:
 179             return u''
 180
 181     def item_author_link(self, book):
 182         try:
 183             return book.tags.filter(category='author')[0].get_absolute_url()
 184         except KeyError:
 185             return u''
 186
 187     def item_enclosure_url(self, book):
 188         return full_url(book.epub_file.url) if book.epub_file else None
 189
 190     def item_enclosure_length(self, book):
 191         return book.epub_file.size if book.epub_file else None
 192
 193 @piwik_track
 194 class RootFeed(Feed):
 195     feed_type = OPDSFeed
 196     title = u'Wolne Lektury'
 197     link = u'http://wolnelektury.pl/'
 198     description = u"Spis utworów na stronie http://WolneLektury.pl"
 199     author_name = u"Wolne Lektury"
 200     author_link = u"http://wolnelektury.pl/"
 201
 202     def items(self):
 203         return _root_feeds
 204
 205     def item_title(self, item):
 206         return item['title']
 207
 208     def item_link(self, item):
 209         return reverse(item['link'], args=item['link_args'])
 210
 211     def item_description(self, item):
 212         return item['description']
 213
 214 @piwik_track
 215 class ByCategoryFeed(Feed):
 216     feed_type = OPDSFeed
 217     link = u'http://wolnelektury.pl/'
 218     description = u"Spis utworów na stronie http://WolneLektury.pl"
 219     author_name = u"Wolne Lektury"
 220     author_link = u"http://wolnelektury.pl/"
 221
 222     def get_object(self, request, category):
 223         feed = [feed for feed in _root_feeds if feed['category']==category]
 224         if feed:
 225             feed = feed[0]
 226         else:
 227             raise Http404
 228
 229         return feed
 230
 231     def title(self, feed):
 232         return feed['title']
 233
 234     def items(self, feed):
 235         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
 236
 237     def item_title(self, item):
 238         return item.name
 239
 240     def item_link(self, item):
 241         return reverse("opds_by_tag", args=[item.category, item.slug])
 242
 243     def item_description(self):
 244         return u''
 245
 246 @piwik_track
 247 class ByTagFeed(AcquisitionFeed):
 248     def link(self, tag):
 249         return tag.get_absolute_url()
 250
 251     def title(self, tag):
 252         return tag.name
 253
 254     def description(self, tag):
 255         return u"Spis utworów na stronie http://WolneLektury.pl"
 256
 257     def get_object(self, request, category, slug):
 258         return get_object_or_404(Tag, category=category, slug=slug)
 259
 260     def items(self, tag):
 261         books = Book.tagged.with_any([tag])
 262         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
 263         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 264         if descendants_keys:
 265             books = books.exclude(pk__in=descendants_keys)
 266
 267         return books
 268
 269
 270 @factory_decorator(logged_in_or_basicauth())
 271 @piwik_track
 272 class UserFeed(Feed):
 273     feed_type = OPDSFeed
 274     link = u'http://www.wolnelektury.pl/'
 275     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
 276     author_name = u"Wolne Lektury"
 277     author_link = u"http://wolnelektury.pl/"
 278
 279     def get_object(self, request):
 280         return request.user
 281
 282     def title(self, user):
 283         return u"Półki użytkownika %s" % user.username
 284
 285     def items(self, user):
 286         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
 287
 288     def item_title(self, item):
 289         return item.name
 290
 291     def item_link(self, item):
 292         return reverse("opds_user_set", args=[item.slug])
 293
 294     def item_description(self):
 295         return u''
 296
 297 # no class decorators in python 2.5
 298 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
 299
 300
 301 @factory_decorator(logged_in_or_basicauth())
 302 @piwik_track
 303 class UserSetFeed(AcquisitionFeed):
 304     def link(self, tag):
 305         return tag.get_absolute_url()
 306
 307     def title(self, tag):
 308         return tag.name
 309
 310     def description(self, tag):
 311         return u"Spis utworów na stronie http://WolneLektury.pl"
 312
 313     def get_object(self, request, slug):
 314         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
 315
 316     def items(self, tag):
 317         return Book.tagged.with_any([tag])
 318
 319 # no class decorators in python 2.5
 320 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
 321
 322
 323 @piwik_track
 324 class SearchFeed(AcquisitionFeed):
 325     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
 326     title = u"Wyniki wyszukiwania"
 327
 328     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
 329
 330     def get_object(self, request):
 331         """
 332         For OPDS 1.1 We should handle a query for search terms
 333         and criteria provided either as opensearch or 'inline' query.
 334         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
 335         atom:title. Inline query provides author, title, categories (treated as book tags),
 336         description (treated as content search terms).
 337
 338         if search terms are provided, we shall search for books
 339         according to Hint information (from author & contributror & title).
 340
 341         but if search terms are empty, we should do a different search
 342         (perhaps for is_book=True)
 343
 344         """
 345         JVM.attachCurrentThread()
 346
 347         query = request.GET.get('q', '')
 348
 349         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
 350         if inline_criteria:
 351             def get_criteria(criteria, name, position):
 352                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
 353                 print e
 354                 if not e:
 355                     return None
 356                 c = e[0][position]
 357                 print c
 358                 if c[0] == '"' and c[-1] == '"':
 359                     c = c[1:-1]
 360                     c = c.replace('+', ' ')
 361                 return c
 362
 363             #import pdb; pdb.set_trace()
 364             author = get_criteria(inline_criteria, 'author', 1)
 365             title = get_criteria(inline_criteria, 'title', 2)
 366             translator = None
 367             categories = get_criteria(inline_criteria, 'categories', 3)
 368             query = get_criteria(inline_criteria, 'description', 4)
 369         else:
 370             author = request.GET.get('author', '')
 371             title = request.GET.get('title', '')
 372             translator = request.GET.get('translator', '')
 373
 374             # Our client didn't handle the opds placeholders
 375             if author == '{atom:author}': author = ''
 376             if title == '{atom:title}': title = ''
 377             if translator == '{atom:contributor}': translator = ''
 378             categories = None
 379             fuzzy = False
 380
 381
 382         srch = Search()
 383         hint = srch.hint()
 384
 385         # Scenario 1: full search terms provided.
 386         # Use auxiliarry information to narrow it and make it better.
 387         if query:
 388             filters = []
 389
 390             if author:
 391                 print "narrow to author %s" % author
 392                 hint.tags(srch.search_tags(author, filt=srch.term_filter(Term('tag_category', 'author'))))
 393
 394             if translator:
 395                 print "filter by translator %s" % translator
 396                 filters.append(QueryWrapperFilter(
 397                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
 398                                      field='translators')))
 399
 400             if categories:
 401                 filters.append(QueryWrapperFilter(
 402                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
 403                                      field='tag_name_pl')))
 404
 405             flt = srch.chain_filters(filters)
 406             if title:
 407                 print "hint by book title %s" % title
 408                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
 409                 hint.books(*srch.search_books(q, filt=flt))
 410
 411             toks = srch.get_tokens(query)
 412             print "tokens: %s" % toks
 413             #            import pdb; pdb.set_trace()
 414             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
 415                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
 416                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
 417             results.sort(reverse=True)
 418             return [r.book for r in results]
 419         else:
 420             # Scenario 2: since we no longer have to figure out what the query term means to the user,
 421             # we can just use filters and not the Hint class.
 422             filters = []
 423
 424             fields = {
 425                 'author': author,
 426                 'translators': translator,
 427                 'title': title
 428                 }
 429
 430             for fld, q in fields.items():
 431                 if q:
 432                     filters.append(QueryWrapperFilter(
 433                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
 434
 435             flt = srch.chain_filters(filters)
 436             books = srch.search_books(TermQuery(Term('is_book', 'true')), filt=flt)
 437             return books
 438
 439     def get_link(self, query):
 440         return "%s?q=%s" % (reverse('search'), query)
 441
 442     def items(self, books):
 443         try:
 444             return books
 445         except ValueError:
 446             # too short a query
 447             return []