some basic docs
[wolnelektury.git] / apps / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os.path
6 from urlparse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.core.urlresolvers import reverse
10 from django.shortcuts import get_object_or_404
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15
16 from basicauth import logged_in_or_basicauth, factory_decorator
17 from catalogue.models import Book, Tag
18
19 from search import Search, SearchResult, JVM
20 from lucene import Term, QueryWrapperFilter, TermQuery
21
22 import re
23
24 from stats.utils import piwik_track
25
26 _root_feeds = (
27     {
28         u"category": u"",
29         u"link": u"opds_user",
30         u"link_args": [],
31         u"title": u"Moje półki",
32         u"description": u"Półki użytkownika dostępne po zalogowaniu"
33     },
34     {
35         u"category": u"author",
36         u"link": u"opds_by_category",
37         u"link_args": [u"author"],
38         u"title": u"Autorzy",
39         u"description": u"Utwory wg autorów"
40     },
41     {
42         u"category": u"kind",
43         u"link": u"opds_by_category",
44         u"link_args": [u"kind"],
45         u"title": u"Rodzaje",
46         u"description": u"Utwory wg rodzajów"
47     },
48     {
49         u"category": u"genre",
50         u"link": u"opds_by_category",
51         u"link_args": [u"genre"],
52         u"title": u"Gatunki",
53         u"description": u"Utwory wg gatunków"
54     },
55     {
56         u"category": u"epoch",
57         u"link": u"opds_by_category",
58         u"link_args": [u"epoch"],
59         u"title": u"Epoki",
60         u"description": u"Utwory wg epok"
61     },
62 )
63
64
65 def full_url(url):
66     return urljoin("http://%s" % Site.objects.get_current().domain, url)
67
68
69 class OPDSFeed(Atom1Feed):
70     link_rel = u"subsection"
71     link_type = u"application/atom+xml"
72
73     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
74     try:
75         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
76     except:
77         _book_parent_img_size = ''
78
79     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
80     try:
81         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
82     except:
83         _book_img_size = ''
84
85
86     def add_root_elements(self, handler):
87         super(OPDSFeed, self).add_root_elements(handler)
88         handler.addQuickElement(u"link", None,
89                                 {u"href": reverse("opds_authors"),
90                                  u"rel": u"start",
91                                  u"type": u"application/atom+xml"})
92         handler.addQuickElement(u"link", None, 
93                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
94                                  u"rel": u"search",
95                                  u"type": u"application/opensearchdescription+xml"})
96
97
98     def add_item_elements(self, handler, item):
99         """ modified from Atom1Feed.add_item_elements """
100         handler.addQuickElement(u"title", item['title'])
101
102         # add a OPDS Navigation link if there's no enclosure
103         if item['enclosure'] is None:
104             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
105             # add a "green book" icon
106             handler.addQuickElement(u"link", '',
107                 {u"rel": u"http://opds-spec.org/thumbnail",
108                  u"href": self._book_parent_img,
109                  u"length": self._book_parent_img_size,
110                  u"type": u"image/png"})
111         if item['pubdate'] is not None:
112             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
113
114         # Author information.
115         if item['author_name'] is not None:
116             handler.startElement(u"author", {})
117             handler.addQuickElement(u"name", item['author_name'])
118             if item['author_email'] is not None:
119                 handler.addQuickElement(u"email", item['author_email'])
120             if item['author_link'] is not None:
121                 handler.addQuickElement(u"uri", item['author_link'])
122             handler.endElement(u"author")
123
124         # Unique ID.
125         if item['unique_id'] is not None:
126             unique_id = item['unique_id']
127         else:
128             unique_id = get_tag_uri(item['link'], item['pubdate'])
129         handler.addQuickElement(u"id", unique_id)
130
131         # Summary.
132         # OPDS needs type=text
133         if item['description'] is not None:
134             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
135
136         # Enclosure as OPDS Acquisition Link
137         if item['enclosure'] is not None:
138             handler.addQuickElement(u"link", '',
139                 {u"rel": u"http://opds-spec.org/acquisition",
140                  u"href": item['enclosure'].url,
141                  u"length": item['enclosure'].length,
142                  u"type": item['enclosure'].mime_type})
143             # add a "red book" icon
144             handler.addQuickElement(u"link", '',
145                 {u"rel": u"http://opds-spec.org/thumbnail",
146                  u"href": self._book_img,
147                  u"length": self._book_img_size,
148                  u"type": u"image/png"})
149
150         # Categories.
151         for cat in item['categories']:
152             handler.addQuickElement(u"category", u"", {u"term": cat})
153
154         # Rights.
155         if item['item_copyright'] is not None:
156             handler.addQuickElement(u"rights", item['item_copyright'])
157
158
159 class AcquisitionFeed(Feed):
160     feed_type = OPDSFeed
161     link = u'http://www.wolnelektury.pl/'
162     item_enclosure_mime_type = "application/epub+zip"
163     author_name = u"Wolne Lektury"
164     author_link = u"http://www.wolnelektury.pl/"
165
166     def item_title(self, book):
167         return book.title
168
169     def item_description(self):
170         return u''
171
172     def item_link(self, book):
173         return book.get_absolute_url()
174
175     def item_author_name(self, book):
176         try:
177             return book.tags.filter(category='author')[0].name
178         except KeyError:
179             return u''
180
181     def item_author_link(self, book):
182         try:
183             return book.tags.filter(category='author')[0].get_absolute_url()
184         except KeyError:
185             return u''
186
187     def item_enclosure_url(self, book):
188         return full_url(book.epub_file.url) if book.epub_file else None
189
190     def item_enclosure_length(self, book):
191         return book.epub_file.size if book.epub_file else None
192
193 @piwik_track
194 class RootFeed(Feed):
195     feed_type = OPDSFeed
196     title = u'Wolne Lektury'
197     link = u'http://wolnelektury.pl/'
198     description = u"Spis utworów na stronie http://WolneLektury.pl"
199     author_name = u"Wolne Lektury"
200     author_link = u"http://wolnelektury.pl/"
201
202     def items(self):
203         return _root_feeds
204
205     def item_title(self, item):
206         return item['title']
207
208     def item_link(self, item):
209         return reverse(item['link'], args=item['link_args'])
210
211     def item_description(self, item):
212         return item['description']
213
214 @piwik_track
215 class ByCategoryFeed(Feed):
216     feed_type = OPDSFeed
217     link = u'http://wolnelektury.pl/'
218     description = u"Spis utworów na stronie http://WolneLektury.pl"
219     author_name = u"Wolne Lektury"
220     author_link = u"http://wolnelektury.pl/"
221
222     def get_object(self, request, category):
223         feed = [feed for feed in _root_feeds if feed['category']==category]
224         if feed:
225             feed = feed[0]
226         else:
227             raise Http404
228
229         return feed
230
231     def title(self, feed):
232         return feed['title']
233
234     def items(self, feed):
235         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
236
237     def item_title(self, item):
238         return item.name
239
240     def item_link(self, item):
241         return reverse("opds_by_tag", args=[item.category, item.slug])
242
243     def item_description(self):
244         return u''
245
246 @piwik_track
247 class ByTagFeed(AcquisitionFeed):
248     def link(self, tag):
249         return tag.get_absolute_url()
250
251     def title(self, tag):
252         return tag.name
253
254     def description(self, tag):
255         return u"Spis utworów na stronie http://WolneLektury.pl"
256
257     def get_object(self, request, category, slug):
258         return get_object_or_404(Tag, category=category, slug=slug)
259
260     def items(self, tag):
261         books = Book.tagged.with_any([tag])
262         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
263         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
264         if descendants_keys:
265             books = books.exclude(pk__in=descendants_keys)
266
267         return books
268
269
270 @factory_decorator(logged_in_or_basicauth())
271 @piwik_track
272 class UserFeed(Feed):
273     feed_type = OPDSFeed
274     link = u'http://www.wolnelektury.pl/'
275     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
276     author_name = u"Wolne Lektury"
277     author_link = u"http://wolnelektury.pl/"
278
279     def get_object(self, request):
280         return request.user
281
282     def title(self, user):
283         return u"Półki użytkownika %s" % user.username
284
285     def items(self, user):
286         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
287
288     def item_title(self, item):
289         return item.name
290
291     def item_link(self, item):
292         return reverse("opds_user_set", args=[item.slug])
293
294     def item_description(self):
295         return u''
296
297 # no class decorators in python 2.5
298 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
299
300
301 @factory_decorator(logged_in_or_basicauth())
302 @piwik_track
303 class UserSetFeed(AcquisitionFeed):
304     def link(self, tag):
305         return tag.get_absolute_url()
306
307     def title(self, tag):
308         return tag.name
309
310     def description(self, tag):
311         return u"Spis utworów na stronie http://WolneLektury.pl"
312
313     def get_object(self, request, slug):
314         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
315
316     def items(self, tag):
317         return Book.tagged.with_any([tag])
318
319 # no class decorators in python 2.5
320 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
321
322
323 @piwik_track
324 class SearchFeed(AcquisitionFeed):
325     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
326     title = u"Wyniki wyszukiwania"
327
328     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
329     
330     def get_object(self, request):
331         """
332         For OPDS 1.1 We should handle a query for search terms
333         and criteria provided either as opensearch or 'inline' query.
334         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
335         atom:title. Inline query provides author, title, categories (treated as book tags),
336         description (treated as content search terms).
337         
338         if search terms are provided, we shall search for books
339         according to Hint information (from author & contributror & title).
340
341         but if search terms are empty, we should do a different search
342         (perhaps for is_book=True)
343
344         """
345         JVM.attachCurrentThread()
346
347         query = request.GET.get('q', '')
348
349         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
350         if inline_criteria:
351             def get_criteria(criteria, name, position):
352                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
353                 print e
354                 if not e:
355                     return None
356                 c = e[0][position]
357                 print c
358                 if c[0] == '"' and c[-1] == '"':
359                     c = c[1:-1]
360                     c = c.replace('+', ' ')
361                 return c
362
363             #import pdb; pdb.set_trace()
364             author = get_criteria(inline_criteria, 'author', 1)
365             title = get_criteria(inline_criteria, 'title', 2)
366             translator = None
367             categories = get_criteria(inline_criteria, 'categories', 3)
368             query = get_criteria(inline_criteria, 'description', 4)
369         else:
370             author = request.GET.get('author', '')
371             title = request.GET.get('title', '')
372             translator = request.GET.get('translator', '')
373
374             # Our client didn't handle the opds placeholders
375             if author == '{atom:author}': author = ''       
376             if title == '{atom:title}': title = ''
377             if translator == '{atom:contributor}': translator = ''
378             categories = None
379             fuzzy = False
380
381
382         srch = Search()
383         hint = srch.hint()
384
385         # Scenario 1: full search terms provided.
386         # Use auxiliarry information to narrow it and make it better.
387         if query:
388             filters = []
389
390             if author:
391                 print "narrow to author %s" % author
392                 hint.tags(srch.search_tags(author, filt=srch.term_filter(Term('tag_category', 'author'))))
393
394             if translator:
395                 print "filter by translator %s" % translator
396                 filters.append(QueryWrapperFilter(
397                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
398                                      field='translators')))
399
400             if categories:
401                 filters.append(QueryWrapperFilter(
402                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
403                                      field='tag_name_pl')))
404
405             flt = srch.chain_filters(filters)
406             if title:
407                 print "hint by book title %s" % title
408                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
409                 hint.books(*srch.search_books(q, filt=flt))
410
411             toks = srch.get_tokens(query)
412             print "tokens: %s" % toks
413             #            import pdb; pdb.set_trace()
414             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
415                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
416                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
417             results.sort(reverse=True)
418             return [r.book for r in results]
419         else:
420             # Scenario 2: since we no longer have to figure out what the query term means to the user,
421             # we can just use filters and not the Hint class.
422             filters = []
423
424             fields = {
425                 'author': author,
426                 'translators': translator,
427                 'title': title
428                 }
429
430             for fld, q in fields.items():
431                 if q:
432                     filters.append(QueryWrapperFilter(
433                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
434
435             flt = srch.chain_filters(filters)
436             books = srch.search_books(TermQuery(Term('is_book', 'true')), filt=flt)
437             return books
438
439     def get_link(self, query):
440         return "%s?q=%s" % (reverse('search'), query)
441
442     def items(self, books):
443         try:
444             return books
445         except ValueError:
446             # too short a query
447             return []