09aaa547797ef4269c6d41b50884ff6cae4fd1d5
[wolnelektury.git] / apps / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os.path
6 from urlparse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.core.urlresolvers import reverse
10 from django.shortcuts import get_object_or_404
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15
16 from basicauth import logged_in_or_basicauth, factory_decorator
17 from catalogue.models import Book, Tag
18
19 from search import Search, SearchResult, JVM
20 from lucene import Term, QueryWrapperFilter, TermQuery
21
22 import re
23
24 from stats.utils import piwik_track
25
26 _root_feeds = (
27     {
28         u"category": u"",
29         u"link": u"opds_user",
30         u"link_args": [],
31         u"title": u"Moje półki",
32         u"description": u"Półki użytkownika dostępne po zalogowaniu"
33     },
34     {
35         u"category": u"author",
36         u"link": u"opds_by_category",
37         u"link_args": [u"author"],
38         u"title": u"Autorzy",
39         u"description": u"Utwory wg autorów"
40     },
41     {
42         u"category": u"kind",
43         u"link": u"opds_by_category",
44         u"link_args": [u"kind"],
45         u"title": u"Rodzaje",
46         u"description": u"Utwory wg rodzajów"
47     },
48     {
49         u"category": u"genre",
50         u"link": u"opds_by_category",
51         u"link_args": [u"genre"],
52         u"title": u"Gatunki",
53         u"description": u"Utwory wg gatunków"
54     },
55     {
56         u"category": u"epoch",
57         u"link": u"opds_by_category",
58         u"link_args": [u"epoch"],
59         u"title": u"Epoki",
60         u"description": u"Utwory wg epok"
61     },
62 )
63
64
65 def full_url(url):
66     return urljoin("http://%s" % Site.objects.get_current().domain, url)
67
68
69 class OPDSFeed(Atom1Feed):
70     link_rel = u"subsection"
71     link_type = u"application/atom+xml"
72
73     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
74     try:
75         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
76     except:
77         _book_parent_img_size = ''
78
79     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
80     try:
81         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
82     except:
83         _book_img_size = ''
84
85
86     def add_root_elements(self, handler):
87         super(OPDSFeed, self).add_root_elements(handler)
88         handler.addQuickElement(u"link", None,
89                                 {u"href": reverse("opds_authors"),
90                                  u"rel": u"start",
91                                  u"type": u"application/atom+xml"})
92         handler.addQuickElement(u"link", None, 
93                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
94                                  u"rel": u"search",
95                                  u"type": u"application/opensearchdescription+xml"})
96
97
98     def add_item_elements(self, handler, item):
99         """ modified from Atom1Feed.add_item_elements """
100         handler.addQuickElement(u"title", item['title'])
101
102         # add a OPDS Navigation link if there's no enclosure
103         if item['enclosure'] is None:
104             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
105             # add a "green book" icon
106             handler.addQuickElement(u"link", '',
107                 {u"rel": u"http://opds-spec.org/thumbnail",
108                  u"href": self._book_parent_img,
109                  u"length": self._book_parent_img_size,
110                  u"type": u"image/png"})
111         if item['pubdate'] is not None:
112             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
113
114         # Author information.
115         if item['author_name'] is not None:
116             handler.startElement(u"author", {})
117             handler.addQuickElement(u"name", item['author_name'])
118             if item['author_email'] is not None:
119                 handler.addQuickElement(u"email", item['author_email'])
120             if item['author_link'] is not None:
121                 handler.addQuickElement(u"uri", item['author_link'])
122             handler.endElement(u"author")
123
124         # Unique ID.
125         if item['unique_id'] is not None:
126             unique_id = item['unique_id']
127         else:
128             unique_id = get_tag_uri(item['link'], item['pubdate'])
129         handler.addQuickElement(u"id", unique_id)
130
131         # Summary.
132         # OPDS needs type=text
133         if item['description'] is not None:
134             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
135
136         # Enclosure as OPDS Acquisition Link
137         if item['enclosure'] is not None:
138             handler.addQuickElement(u"link", '',
139                 {u"rel": u"http://opds-spec.org/acquisition",
140                  u"href": item['enclosure'].url,
141                  u"length": item['enclosure'].length,
142                  u"type": item['enclosure'].mime_type})
143             # add a "red book" icon
144             handler.addQuickElement(u"link", '',
145                 {u"rel": u"http://opds-spec.org/thumbnail",
146                  u"href": self._book_img,
147                  u"length": self._book_img_size,
148                  u"type": u"image/png"})
149
150         # Categories.
151         for cat in item['categories']:
152             handler.addQuickElement(u"category", u"", {u"term": cat})
153
154         # Rights.
155         if item['item_copyright'] is not None:
156             handler.addQuickElement(u"rights", item['item_copyright'])
157
158
159 class AcquisitionFeed(Feed):
160     feed_type = OPDSFeed
161     link = u'http://www.wolnelektury.pl/'
162     item_enclosure_mime_type = "application/epub+zip"
163     author_name = u"Wolne Lektury"
164     author_link = u"http://www.wolnelektury.pl/"
165
166     def item_title(self, book):
167         return book.title
168
169     def item_description(self):
170         return u''
171
172     def item_link(self, book):
173         return book.get_absolute_url()
174
175     def item_author_name(self, book):
176         try:
177             return book.tags.filter(category='author')[0].name
178         except KeyError:
179             return u''
180
181     def item_author_link(self, book):
182         try:
183             return book.tags.filter(category='author')[0].get_absolute_url()
184         except KeyError:
185             return u''
186
187     def item_enclosure_url(self, book):
188         return full_url(book.epub_file.url) if book.epub_file else None
189
190     def item_enclosure_length(self, book):
191         return book.epub_file.size if book.epub_file else None
192
193 @piwik_track
194 class RootFeed(Feed):
195     feed_type = OPDSFeed
196     title = u'Wolne Lektury'
197     link = u'http://www.wolnelektury.pl/'
198     description = u"Spis utworów na stronie http://WolneLektury.pl"
199     author_name = u"Wolne Lektury"
200     author_link = u"http://www.wolnelektury.pl/"
201
202     def items(self):
203         return _root_feeds
204
205     def item_title(self, item):
206         return item['title']
207
208     def item_link(self, item):
209         return reverse(item['link'], args=item['link_args'])
210
211     def item_description(self, item):
212         return item['description']
213
214 @piwik_track
215 class ByCategoryFeed(Feed):
216     feed_type = OPDSFeed
217     link = u'http://www.wolnelektury.pl/'
218     description = u"Spis utworów na stronie http://WolneLektury.pl"
219     author_name = u"Wolne Lektury"
220     author_link = u"http://www.wolnelektury.pl/"
221
222     def get_object(self, request, category):
223         feed = [feed for feed in _root_feeds if feed['category']==category]
224         if feed:
225             feed = feed[0]
226         else:
227             raise Http404
228
229         return feed
230
231     def title(self, feed):
232         return feed['title']
233
234     def items(self, feed):
235         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
236
237     def item_title(self, item):
238         return item.name
239
240     def item_link(self, item):
241         return reverse("opds_by_tag", args=[item.category, item.slug])
242
243     def item_description(self):
244         return u''
245
246 @piwik_track
247 class ByTagFeed(AcquisitionFeed):
248     def link(self, tag):
249         return tag.get_absolute_url()
250
251     def title(self, tag):
252         return tag.name
253
254     def description(self, tag):
255         return u"Spis utworów na stronie http://WolneLektury.pl"
256
257     def get_object(self, request, category, slug):
258         return get_object_or_404(Tag, category=category, slug=slug)
259
260     def items(self, tag):
261         books = Book.tagged.with_any([tag])
262         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
263         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
264         if descendants_keys:
265             books = books.exclude(pk__in=descendants_keys)
266
267         return books
268
269
270 @factory_decorator(logged_in_or_basicauth())
271 @piwik_track
272 class UserFeed(Feed):
273     feed_type = OPDSFeed
274     link = u'http://www.wolnelektury.pl/'
275     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
276     author_name = u"Wolne Lektury"
277     author_link = u"http://www.wolnelektury.pl/"
278
279     def get_object(self, request):
280         return request.user
281
282     def title(self, user):
283         return u"Półki użytkownika %s" % user.username
284
285     def items(self, user):
286         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
287
288     def item_title(self, item):
289         return item.name
290
291     def item_link(self, item):
292         return reverse("opds_user_set", args=[item.slug])
293
294     def item_description(self):
295         return u''
296
297 # no class decorators in python 2.5
298 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
299
300
301 @factory_decorator(logged_in_or_basicauth())
302 @piwik_track
303 class UserSetFeed(AcquisitionFeed):
304     def link(self, tag):
305         return tag.get_absolute_url()
306
307     def title(self, tag):
308         return tag.name
309
310     def description(self, tag):
311         return u"Spis utworów na stronie http://WolneLektury.pl"
312
313     def get_object(self, request, slug):
314         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
315
316     def items(self, tag):
317         return Book.tagged.with_any([tag])
318
319 # no class decorators in python 2.5
320 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
321
322
323 @piwik_track
324 class SearchFeed(AcquisitionFeed):
325     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
326     title = u"Wyniki wyszukiwania"
327
328     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
329     
330     def get_object(self, request):
331         """
332         For OPDS 1.1 We should handle a query for search terms
333         and criteria provided either as opensearch or 'inline' query.
334         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
335         atom:title. Inline query provides author, title, categories (treated as book tags),
336         description (treated as content search terms).
337         
338         if search terms are provided, we shall search for books
339         according to Hint information (from author & contributror & title).
340
341         but if search terms are empty, we should do a different search
342         (perhaps for is_book=True)
343
344         """
345         JVM.attachCurrentThread()
346
347         query = request.GET.get('q', '')
348
349         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
350         if inline_criteria:
351             def get_criteria(criteria, name, position):
352                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
353                 print e
354                 if not e:
355                     return None
356                 c = e[0][position]
357                 print c
358                 if c[0] == '"' and c[-1] == '"':
359                     c = c[1:-1]
360                     c = c.replace('+', ' ')
361                 return c
362
363             #import pdb; pdb.set_trace()
364             author = get_criteria(inline_criteria, 'author', 1)
365             title = get_criteria(inline_criteria, 'title', 2)
366             translator = None
367             categories = get_criteria(inline_criteria, 'categories', 3)
368             query = get_criteria(inline_criteria, 'description', 4)
369         else:
370             author = request.GET.get('author', '')
371             title = request.GET.get('title', '')
372             translator = request.GET.get('translator', '')
373             categories = None
374             fuzzy = False
375
376
377         srch = Search()
378         hint = srch.hint()
379
380         # Scenario 1: full search terms provided.
381         # Use auxiliarry information to narrow it and make it better.
382         if query:
383             filters = []
384
385             if author:
386                 print "narrow to author %s" % author
387                 hint.tags(srch.search_tags(author, filter=srch.term_filter(Term('tag_category', 'author'))))
388
389             if translator:
390                 print "filter by translator %s" % translator
391                 filters.append(QueryWrapperFilter(
392                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
393                                      field='translators')))
394
395             if categories:
396                 filters.append(QueryWrapperFilter(
397                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
398                                      field='tag_name_pl')))
399
400             flt = srch.chain_filters(filters)
401             if title:
402                 print "hint by book title %s" % title
403                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
404                 hint.books(*srch.search_books(q, filter=flt))
405
406             toks = srch.get_tokens(query)
407             print "tokens: %s" % toks
408             #            import pdb; pdb.set_trace()
409             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
410                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
411                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
412             results.sort(reverse=True)
413             return [r.book for r in results]
414         else:
415             # Scenario 2: since we no longer have to figure out what the query term means to the user,
416             # we can just use filters and not the Hint class.
417             filters = []
418
419             fields = {
420                 'author': author,
421                 'translators': translator,
422                 'title': title
423                 }
424
425             for fld, q in fields.items():
426                 if q:
427                     filters.append(QueryWrapperFilter(
428                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
429
430             flt = srch.chain_filters(filters)
431             books = srch.search_books(TermQuery(Term('is_book', 'true')), filter=flt)
432             return books
433
434     def get_link(self, query):
435         return "%s?q=%s" % (reverse('search'), query)
436
437     def items(self, books):
438         try:
439             return books
440         except ValueError:
441             # too short a query
442             return []