book search - new design - almost ready
[wolnelektury.git] / apps / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from base64 import b64encode
6 import os.path
7 from urlparse import urljoin
8 from urllib2 import unquote
9
10 from django.contrib.syndication.views import Feed
11 from django.core.urlresolvers import reverse
12 from django.shortcuts import get_object_or_404
13 from django.utils.feedgenerator import Atom1Feed
14 from django.conf import settings
15 from django.http import Http404
16 from django.contrib.sites.models import Site
17
18 from basicauth import logged_in_or_basicauth, factory_decorator
19 from catalogue.models import Book, Tag
20
21 from search import Search, SearchResult, JVM
22 from lucene import Term, QueryWrapperFilter, TermQuery
23
24 import re
25
26 from stats.utils import piwik_track
27
28 _root_feeds = (
29     {
30         u"category": u"",
31         u"link": u"opds_user",
32         u"link_args": [],
33         u"title": u"Moje półki",
34         u"description": u"Półki użytkownika dostępne po zalogowaniu"
35     },
36     {
37         u"category": u"author",
38         u"link": u"opds_by_category",
39         u"link_args": [u"author"],
40         u"title": u"Autorzy",
41         u"description": u"Utwory wg autorów"
42     },
43     {
44         u"category": u"kind",
45         u"link": u"opds_by_category",
46         u"link_args": [u"kind"],
47         u"title": u"Rodzaje",
48         u"description": u"Utwory wg rodzajów"
49     },
50     {
51         u"category": u"genre",
52         u"link": u"opds_by_category",
53         u"link_args": [u"genre"],
54         u"title": u"Gatunki",
55         u"description": u"Utwory wg gatunków"
56     },
57     {
58         u"category": u"epoch",
59         u"link": u"opds_by_category",
60         u"link_args": [u"epoch"],
61         u"title": u"Epoki",
62         u"description": u"Utwory wg epok"
63     },
64 )
65
66
67 def full_url(url):
68     return urljoin("http://%s" % Site.objects.get_current().domain, url)
69
70
71 class OPDSFeed(Atom1Feed):
72     link_rel = u"subsection"
73     link_type = u"application/atom+xml"
74
75     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
76     try:
77         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
78     except:
79         _book_parent_img_size = ''
80
81     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
82     try:
83         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
84     except:
85         _book_img_size = ''
86
87
88     def add_root_elements(self, handler):
89         super(OPDSFeed, self).add_root_elements(handler)
90         handler.addQuickElement(u"link", None,
91                                 {u"href": reverse("opds_authors"),
92                                  u"rel": u"start",
93                                  u"type": u"application/atom+xml"})
94         handler.addQuickElement(u"link", None, 
95                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
96                                  u"rel": u"search",
97                                  u"type": u"application/opensearchdescription+xml"})
98
99
100     def add_item_elements(self, handler, item):
101         """ modified from Atom1Feed.add_item_elements """
102         handler.addQuickElement(u"title", item['title'])
103
104         # add a OPDS Navigation link if there's no enclosure
105         if item['enclosure'] is None:
106             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
107             # add a "green book" icon
108             handler.addQuickElement(u"link", '',
109                 {u"rel": u"http://opds-spec.org/thumbnail",
110                  u"href": self._book_parent_img,
111                  u"length": self._book_parent_img_size,
112                  u"type": u"image/png"})
113         if item['pubdate'] is not None:
114             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
115
116         # Author information.
117         if item['author_name'] is not None:
118             handler.startElement(u"author", {})
119             handler.addQuickElement(u"name", item['author_name'])
120             if item['author_email'] is not None:
121                 handler.addQuickElement(u"email", item['author_email'])
122             if item['author_link'] is not None:
123                 handler.addQuickElement(u"uri", item['author_link'])
124             handler.endElement(u"author")
125
126         # Unique ID.
127         if item['unique_id'] is not None:
128             unique_id = item['unique_id']
129         else:
130             unique_id = get_tag_uri(item['link'], item['pubdate'])
131         handler.addQuickElement(u"id", unique_id)
132
133         # Summary.
134         # OPDS needs type=text
135         if item['description'] is not None:
136             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
137
138         # Enclosure as OPDS Acquisition Link
139         if item['enclosure'] is not None:
140             handler.addQuickElement(u"link", '',
141                 {u"rel": u"http://opds-spec.org/acquisition",
142                  u"href": item['enclosure'].url,
143                  u"length": item['enclosure'].length,
144                  u"type": item['enclosure'].mime_type})
145             # add a "red book" icon
146             handler.addQuickElement(u"link", '',
147                 {u"rel": u"http://opds-spec.org/thumbnail",
148                  u"href": self._book_img,
149                  u"length": self._book_img_size,
150                  u"type": u"image/png"})
151
152         # Categories.
153         for cat in item['categories']:
154             handler.addQuickElement(u"category", u"", {u"term": cat})
155
156         # Rights.
157         if item['item_copyright'] is not None:
158             handler.addQuickElement(u"rights", item['item_copyright'])
159
160
161 class AcquisitionFeed(Feed):
162     feed_type = OPDSFeed
163     link = u'http://www.wolnelektury.pl/'
164     item_enclosure_mime_type = "application/epub+zip"
165     author_name = u"Wolne Lektury"
166     author_link = u"http://www.wolnelektury.pl/"
167
168     def item_title(self, book):
169         return book.title
170
171     def item_description(self):
172         return u''
173
174     def item_link(self, book):
175         return book.get_absolute_url()
176
177     def item_author_name(self, book):
178         try:
179             return book.tags.filter(category='author')[0].name
180         except KeyError:
181             return u''
182
183     def item_author_link(self, book):
184         try:
185             return book.tags.filter(category='author')[0].get_absolute_url()
186         except KeyError:
187             return u''
188
189     def item_enclosure_url(self, book):
190         return full_url(book.root_ancestor.epub_file.url)
191
192     def item_enclosure_length(self, book):
193         return book.root_ancestor.epub_file.size
194
195 @piwik_track
196 class RootFeed(Feed):
197     feed_type = OPDSFeed
198     title = u'Wolne Lektury'
199     link = u'http://www.wolnelektury.pl/'
200     description = u"Spis utworów na stronie http://WolneLektury.pl"
201     author_name = u"Wolne Lektury"
202     author_link = u"http://www.wolnelektury.pl/"
203
204     def items(self):
205         return _root_feeds
206
207     def item_title(self, item):
208         return item['title']
209
210     def item_link(self, item):
211         return reverse(item['link'], args=item['link_args'])
212
213     def item_description(self, item):
214         return item['description']
215
216 @piwik_track
217 class ByCategoryFeed(Feed):
218     feed_type = OPDSFeed
219     link = u'http://www.wolnelektury.pl/'
220     description = u"Spis utworów na stronie http://WolneLektury.pl"
221     author_name = u"Wolne Lektury"
222     author_link = u"http://www.wolnelektury.pl/"
223
224     def get_object(self, request, category):
225         feed = [feed for feed in _root_feeds if feed['category']==category]
226         if feed:
227             feed = feed[0]
228         else:
229             raise Http404
230
231         return feed
232
233     def title(self, feed):
234         return feed['title']
235
236     def items(self, feed):
237         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
238
239     def item_title(self, item):
240         return item.name
241
242     def item_link(self, item):
243         return reverse("opds_by_tag", args=[item.category, item.slug])
244
245     def item_description(self):
246         return u''
247
248 @piwik_track
249 class ByTagFeed(AcquisitionFeed):
250     def link(self, tag):
251         return tag.get_absolute_url()
252
253     def title(self, tag):
254         return tag.name
255
256     def description(self, tag):
257         return u"Spis utworów na stronie http://WolneLektury.pl"
258
259     def get_object(self, request, category, slug):
260         return get_object_or_404(Tag, category=category, slug=slug)
261
262     def items(self, tag):
263         books = Book.tagged.with_any([tag])
264         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
265         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
266         if descendants_keys:
267             books = books.exclude(pk__in=descendants_keys)
268
269         return books
270
271
272 @factory_decorator(logged_in_or_basicauth())
273 @piwik_track
274 class UserFeed(Feed):
275     feed_type = OPDSFeed
276     link = u'http://www.wolnelektury.pl/'
277     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
278     author_name = u"Wolne Lektury"
279     author_link = u"http://www.wolnelektury.pl/"
280
281     def get_object(self, request):
282         return request.user
283
284     def title(self, user):
285         return u"Półki użytkownika %s" % user.username
286
287     def items(self, user):
288         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
289
290     def item_title(self, item):
291         return item.name
292
293     def item_link(self, item):
294         return reverse("opds_user_set", args=[item.slug])
295
296     def item_description(self):
297         return u''
298
299 # no class decorators in python 2.5
300 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
301
302
303 @factory_decorator(logged_in_or_basicauth())
304 @piwik_track
305 class UserSetFeed(AcquisitionFeed):
306     def link(self, tag):
307         return tag.get_absolute_url()
308
309     def title(self, tag):
310         return tag.name
311
312     def description(self, tag):
313         return u"Spis utworów na stronie http://WolneLektury.pl"
314
315     def get_object(self, request, slug):
316         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
317
318     def items(self, tag):
319         return Book.tagged.with_any([tag])
320
321 # no class decorators in python 2.5
322 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
323
324
325 @piwik_track
326 class SearchFeed(AcquisitionFeed):
327     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
328     title = u"Wyniki wyszukiwania"
329
330     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
331     
332     def get_object(self, request):
333         """
334         For OPDS 1.1 We should handle a query for search terms
335         and criteria provided either as opensearch or 'inline' query.
336         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
337         atom:title. Inline query provides author, title, categories (treated as book tags),
338         description (treated as content search terms).
339         
340         if search terms are provided, we shall search for books
341         according to Hint information (from author & contributror & title).
342
343         but if search terms are empty, we should do a different search
344         (perhaps for is_book=True)
345
346         """
347         JVM.attachCurrentThread()
348
349         query = request.GET.get('q', '')
350
351         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
352         if inline_criteria:
353             def get_criteria(criteria, name, position):
354                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
355                 print e
356                 if not e:
357                     return None
358                 c = e[0][position]
359                 print c
360                 if c[0] == '"' and c[-1] == '"':
361                     c = c[1:-1]
362                     c = c.replace('+', ' ')
363                 return c
364
365             #import pdb; pdb.set_trace()
366             author = get_criteria(inline_criteria, 'author', 1)
367             title = get_criteria(inline_criteria, 'title', 2)
368             translator = None
369             categories = get_criteria(inline_criteria, 'categories', 3)
370             query = get_criteria(inline_criteria, 'description', 4)
371         else:
372             author = request.GET.get('author', '')
373             title = request.GET.get('title', '')
374             translator = request.GET.get('translator', '')
375             categories = None
376             fuzzy = False
377
378
379         srch = Search()
380         hint = srch.hint()
381
382         # Scenario 1: full search terms provided.
383         # Use auxiliarry information to narrow it and make it better.
384         if query:
385             filters = []
386
387             if author:
388                 print "narrow to author %s" % author
389                 hint.tags(srch.search_tags(author, filter=srch.term_filter(Term('tag_category', 'author'))))
390
391             if translator:
392                 print "filter by translator %s" % translator
393                 filters.append(QueryWrapperFilter(
394                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
395                                      field='translators')))
396
397             if categories:
398                 filters.append(QueryWrapperFilter(
399                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
400                                      field='tag_name_pl')))
401
402             flt = srch.chain_filters(filters)
403             if title:
404                 print "hint by book title %s" % title
405                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
406                 hint.books(*srch.search_books(q, filter=flt))
407
408             toks = srch.get_tokens(query)
409             print "tokens: %s" % toks
410             #            import pdb; pdb.set_trace()
411             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
412                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
413                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
414             results.sort(reverse=True)
415             return [r.book for r in results]
416         else:
417             # Scenario 2: since we no longer have to figure out what the query term means to the user,
418             # we can just use filters and not the Hint class.
419             filters = []
420
421             fields = {
422                 'author': author,
423                 'translators': translator,
424                 'title': title
425                 }
426
427             for fld, q in fields.items():
428                 if q:
429                     filters.append(QueryWrapperFilter(
430                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
431
432             flt = srch.chain_filters(filters)
433             books = srch.search_books(TermQuery(Term('is_book', 'true')), filter=flt)
434             return books
435
436     def get_link(self, query):
437         return "%s?q=%s" % (reverse('search'), query)
438
439     def items(self, books):
440         try:
441             return books
442         except ValueError:
443             # too short a query
444             return []