index with solr works.
[wolnelektury.git] / apps / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os.path
6 from urlparse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.core.urlresolvers import reverse
10 from django.shortcuts import get_object_or_404
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15
16 from basicauth import logged_in_or_basicauth, factory_decorator
17 from catalogue.models import Book, Tag
18
19 from search.views import get_search, SearchResult, JVM
20 from lucene import Term, QueryWrapperFilter, TermQuery
21
22 import logging
23 import re
24
25 log = logging.getLogger('opds')
26
27 from stats.utils import piwik_track
28
29 _root_feeds = (
30     {
31         u"category": u"",
32         u"link": u"opds_user",
33         u"link_args": [],
34         u"title": u"Moje półki",
35         u"description": u"Półki użytkownika dostępne po zalogowaniu"
36     },
37     {
38         u"category": u"author",
39         u"link": u"opds_by_category",
40         u"link_args": [u"author"],
41         u"title": u"Autorzy",
42         u"description": u"Utwory wg autorów"
43     },
44     {
45         u"category": u"kind",
46         u"link": u"opds_by_category",
47         u"link_args": [u"kind"],
48         u"title": u"Rodzaje",
49         u"description": u"Utwory wg rodzajów"
50     },
51     {
52         u"category": u"genre",
53         u"link": u"opds_by_category",
54         u"link_args": [u"genre"],
55         u"title": u"Gatunki",
56         u"description": u"Utwory wg gatunków"
57     },
58     {
59         u"category": u"epoch",
60         u"link": u"opds_by_category",
61         u"link_args": [u"epoch"],
62         u"title": u"Epoki",
63         u"description": u"Utwory wg epok"
64     },
65 )
66
67
68 def full_url(url):
69     return urljoin("http://%s" % Site.objects.get_current().domain, url)
70
71
72 class OPDSFeed(Atom1Feed):
73     link_rel = u"subsection"
74     link_type = u"application/atom+xml"
75
76     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
77     try:
78         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
79     except:
80         _book_parent_img_size = ''
81
82     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
83     try:
84         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
85     except:
86         _book_img_size = ''
87
88
89     def add_root_elements(self, handler):
90         super(OPDSFeed, self).add_root_elements(handler)
91         handler.addQuickElement(u"link", None,
92                                 {u"href": reverse("opds_authors"),
93                                  u"rel": u"start",
94                                  u"type": u"application/atom+xml"})
95         handler.addQuickElement(u"link", None, 
96                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
97                                  u"rel": u"search",
98                                  u"type": u"application/opensearchdescription+xml"})
99
100
101     def add_item_elements(self, handler, item):
102         """ modified from Atom1Feed.add_item_elements """
103         handler.addQuickElement(u"title", item['title'])
104
105         # add a OPDS Navigation link if there's no enclosure
106         if item['enclosure'] is None:
107             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
108             # add a "green book" icon
109             handler.addQuickElement(u"link", '',
110                 {u"rel": u"http://opds-spec.org/thumbnail",
111                  u"href": self._book_parent_img,
112                  u"length": self._book_parent_img_size,
113                  u"type": u"image/png"})
114         if item['pubdate'] is not None:
115             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
116
117         # Author information.
118         if item['author_name'] is not None:
119             handler.startElement(u"author", {})
120             handler.addQuickElement(u"name", item['author_name'])
121             if item['author_email'] is not None:
122                 handler.addQuickElement(u"email", item['author_email'])
123             if item['author_link'] is not None:
124                 handler.addQuickElement(u"uri", item['author_link'])
125             handler.endElement(u"author")
126
127         # Unique ID.
128         if item['unique_id'] is not None:
129             unique_id = item['unique_id']
130         else:
131             unique_id = get_tag_uri(item['link'], item['pubdate'])
132         handler.addQuickElement(u"id", unique_id)
133
134         # Summary.
135         # OPDS needs type=text
136         if item['description'] is not None:
137             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
138
139         # Enclosure as OPDS Acquisition Link
140         if item['enclosure'] is not None:
141             handler.addQuickElement(u"link", '',
142                 {u"rel": u"http://opds-spec.org/acquisition",
143                  u"href": item['enclosure'].url,
144                  u"length": item['enclosure'].length,
145                  u"type": item['enclosure'].mime_type})
146             # add a "red book" icon
147             handler.addQuickElement(u"link", '',
148                 {u"rel": u"http://opds-spec.org/thumbnail",
149                  u"href": self._book_img,
150                  u"length": self._book_img_size,
151                  u"type": u"image/png"})
152
153         # Categories.
154         for cat in item['categories']:
155             handler.addQuickElement(u"category", u"", {u"term": cat})
156
157         # Rights.
158         if item['item_copyright'] is not None:
159             handler.addQuickElement(u"rights", item['item_copyright'])
160
161
162 class AcquisitionFeed(Feed):
163     feed_type = OPDSFeed
164     link = u'http://www.wolnelektury.pl/'
165     item_enclosure_mime_type = "application/epub+zip"
166     author_name = u"Wolne Lektury"
167     author_link = u"http://www.wolnelektury.pl/"
168
169     def item_title(self, book):
170         return book.title
171
172     def item_description(self):
173         return u''
174
175     def item_link(self, book):
176         return book.get_absolute_url()
177
178     def item_author_name(self, book):
179         try:
180             return book.tags.filter(category='author')[0].name
181         except KeyError:
182             return u''
183
184     def item_author_link(self, book):
185         try:
186             return book.tags.filter(category='author')[0].get_absolute_url()
187         except KeyError:
188             return u''
189
190     def item_enclosure_url(self, book):
191         return full_url(book.epub_file.url) if book.epub_file else None
192
193     def item_enclosure_length(self, book):
194         return book.epub_file.size if book.epub_file else None
195
196 @piwik_track
197 class RootFeed(Feed):
198     feed_type = OPDSFeed
199     title = u'Wolne Lektury'
200     link = u'http://wolnelektury.pl/'
201     description = u"Spis utworów na stronie http://WolneLektury.pl"
202     author_name = u"Wolne Lektury"
203     author_link = u"http://wolnelektury.pl/"
204
205     def items(self):
206         return _root_feeds
207
208     def item_title(self, item):
209         return item['title']
210
211     def item_link(self, item):
212         return reverse(item['link'], args=item['link_args'])
213
214     def item_description(self, item):
215         return item['description']
216
217 @piwik_track
218 class ByCategoryFeed(Feed):
219     feed_type = OPDSFeed
220     link = u'http://wolnelektury.pl/'
221     description = u"Spis utworów na stronie http://WolneLektury.pl"
222     author_name = u"Wolne Lektury"
223     author_link = u"http://wolnelektury.pl/"
224
225     def get_object(self, request, category):
226         feed = [feed for feed in _root_feeds if feed['category']==category]
227         if feed:
228             feed = feed[0]
229         else:
230             raise Http404
231
232         return feed
233
234     def title(self, feed):
235         return feed['title']
236
237     def items(self, feed):
238         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
239
240     def item_title(self, item):
241         return item.name
242
243     def item_link(self, item):
244         return reverse("opds_by_tag", args=[item.category, item.slug])
245
246     def item_description(self):
247         return u''
248
249 @piwik_track
250 class ByTagFeed(AcquisitionFeed):
251     def link(self, tag):
252         return tag.get_absolute_url()
253
254     def title(self, tag):
255         return tag.name
256
257     def description(self, tag):
258         return u"Spis utworów na stronie http://WolneLektury.pl"
259
260     def get_object(self, request, category, slug):
261         return get_object_or_404(Tag, category=category, slug=slug)
262
263     def items(self, tag):
264         books = Book.tagged.with_any([tag])
265         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
266         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
267         if descendants_keys:
268             books = books.exclude(pk__in=descendants_keys)
269
270         return books
271
272
273 @factory_decorator(logged_in_or_basicauth())
274 @piwik_track
275 class UserFeed(Feed):
276     feed_type = OPDSFeed
277     link = u'http://www.wolnelektury.pl/'
278     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
279     author_name = u"Wolne Lektury"
280     author_link = u"http://wolnelektury.pl/"
281
282     def get_object(self, request):
283         return request.user
284
285     def title(self, user):
286         return u"Półki użytkownika %s" % user.username
287
288     def items(self, user):
289         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
290
291     def item_title(self, item):
292         return item.name
293
294     def item_link(self, item):
295         return reverse("opds_user_set", args=[item.slug])
296
297     def item_description(self):
298         return u''
299
300 # no class decorators in python 2.5
301 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
302
303
304 @factory_decorator(logged_in_or_basicauth())
305 @piwik_track
306 class UserSetFeed(AcquisitionFeed):
307     def link(self, tag):
308         return tag.get_absolute_url()
309
310     def title(self, tag):
311         return tag.name
312
313     def description(self, tag):
314         return u"Spis utworów na stronie http://WolneLektury.pl"
315
316     def get_object(self, request, slug):
317         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
318
319     def items(self, tag):
320         return Book.tagged.with_any([tag])
321
322 # no class decorators in python 2.5
323 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
324
325
326 @piwik_track
327 class SearchFeed(AcquisitionFeed):
328     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
329     title = u"Wyniki wyszukiwania"
330
331     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
332     
333     def get_object(self, request):
334         """
335         For OPDS 1.1 We should handle a query for search terms
336         and criteria provided either as opensearch or 'inline' query.
337         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
338         atom:title. Inline query provides author, title, categories (treated as book tags),
339         description (treated as content search terms).
340         
341         if search terms are provided, we shall search for books
342         according to Hint information (from author & contributror & title).
343
344         but if search terms are empty, we should do a different search
345         (perhaps for is_book=True)
346
347         """
348         JVM.attachCurrentThread()
349
350         query = request.GET.get('q', '')
351         
352         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
353         if inline_criteria:
354             def get_criteria(criteria, name, position):
355                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
356                 log.info("get_criteria: %s" % e)
357                 if not e:
358                     return None
359                 c = e[0][position]
360                 log.info("get_criteria: %s" % c)
361                 if c[0] == '"' and c[-1] == '"':
362                     c = c[1:-1]
363                     c = c.replace('+', ' ')
364                 return c
365
366             author = get_criteria(inline_criteria, 'author', 1)
367             title = get_criteria(inline_criteria, 'title', 2)
368             translator = None
369             categories = get_criteria(inline_criteria, 'categories', 3)
370             query = get_criteria(inline_criteria, 'description', 4)
371         else:
372             author = request.GET.get('author', '')
373             title = request.GET.get('title', '')
374             translator = request.GET.get('translator', '')
375
376             # Our client didn't handle the opds placeholders
377             if author == '{atom:author}': author = ''       
378             if title == '{atom:title}': title = ''
379             if translator == '{atom:contributor}': translator = ''
380             categories = None
381             fuzzy = False
382
383         srch = get_search()
384         hint = srch.hint()
385
386         # Scenario 1: full search terms provided.
387         # Use auxiliarry information to narrow it and make it better.
388         if query:
389             filters = []
390
391             if author:
392                 log.info( "narrow to author %s" % author)
393                 hint.tags(srch.search_tags(srch.make_phrase(srch.get_tokens(author, field='authors'), field='authors'), 
394                                             filt=srch.term_filter(Term('tag_category', 'author'))))
395
396             if translator:
397                 log.info( "filter by translator %s" % translator)
398                 filters.append(QueryWrapperFilter(
399                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
400                                      field='translators')))
401
402             if categories:
403                 filters.append(QueryWrapperFilter(
404                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
405                                      field='tag_name_pl')))
406
407             flt = srch.chain_filters(filters)
408             if title:
409                 log.info( "hint by book title %s" % title)
410                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
411                 hint.books(*srch.search_books(q, filt=flt))
412
413             toks = srch.get_tokens(query)
414             log.info("tokens for query: %s" % toks)
415             
416             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
417                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
418                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
419             results.sort(reverse=True)
420             books = []
421             for r in results:
422                 try:
423                     books.append(r.book)
424                 except Book.DoesNotExist:
425                     pass
426             log.info("books: %s" % books)
427             return books
428         else:
429             # Scenario 2: since we no longer have to figure out what the query term means to the user,
430             # we can just use filters and not the Hint class.
431             filters = []
432
433             fields = {
434                 'author': author,
435                 'translators': translator,
436                 'title': title
437                 }
438
439             for fld, q in fields.items():
440                 if q:
441                     filters.append(QueryWrapperFilter(
442                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
443
444             flt = srch.chain_filters(filters)
445             books = srch.search_books(TermQuery(Term('is_book', 'true')), filt=flt)
446             return books
447
448     def get_link(self, query):
449         return "%s?q=%s" % (reverse('search'), query)
450
451     def items(self, books):
452         try:
453             return books
454         except ValueError:
455             # too short a query
456             return []