Filter some search characters which make Solr too unhappy.
[wolnelektury.git] / apps / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os.path
6 from urlparse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.core.urlresolvers import reverse
10 from django.shortcuts import get_object_or_404
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15
16 from basicauth import logged_in_or_basicauth, factory_decorator
17 from catalogue.models import Book, Tag
18
19 from search.views import Search, SearchResult
20 from lucene import Term, QueryWrapperFilter, TermQuery
21 import operator
22 import logging
23 import re
24
25 log = logging.getLogger('opds')
26
27 from stats.utils import piwik_track
28
29 _root_feeds = (
30     {
31         u"category": u"",
32         u"link": u"opds_user",
33         u"link_args": [],
34         u"title": u"Moje półki",
35         u"description": u"Półki użytkownika dostępne po zalogowaniu"
36     },
37     {
38         u"category": u"author",
39         u"link": u"opds_by_category",
40         u"link_args": [u"author"],
41         u"title": u"Autorzy",
42         u"description": u"Utwory wg autorów"
43     },
44     {
45         u"category": u"kind",
46         u"link": u"opds_by_category",
47         u"link_args": [u"kind"],
48         u"title": u"Rodzaje",
49         u"description": u"Utwory wg rodzajów"
50     },
51     {
52         u"category": u"genre",
53         u"link": u"opds_by_category",
54         u"link_args": [u"genre"],
55         u"title": u"Gatunki",
56         u"description": u"Utwory wg gatunków"
57     },
58     {
59         u"category": u"epoch",
60         u"link": u"opds_by_category",
61         u"link_args": [u"epoch"],
62         u"title": u"Epoki",
63         u"description": u"Utwory wg epok"
64     },
65 )
66
67
68 def full_url(url):
69     return urljoin("http://%s" % Site.objects.get_current().domain, url)
70
71
72 class OPDSFeed(Atom1Feed):
73     link_rel = u"subsection"
74     link_type = u"application/atom+xml"
75
76     _book_parent_img = full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png"))
77     try:
78         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
79     except:
80         _book_parent_img_size = ''
81
82     _book_img = full_url(os.path.join(settings.STATIC_URL, "img/book.png"))
83     try:
84         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
85     except:
86         _book_img_size = ''
87
88
89     def add_root_elements(self, handler):
90         super(OPDSFeed, self).add_root_elements(handler)
91         handler.addQuickElement(u"link", None,
92                                 {u"href": reverse("opds_authors"),
93                                  u"rel": u"start",
94                                  u"type": u"application/atom+xml"})
95         handler.addQuickElement(u"link", None,
96                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
97                                  u"rel": u"search",
98                                  u"type": u"application/opensearchdescription+xml"})
99
100
101     def add_item_elements(self, handler, item):
102         """ modified from Atom1Feed.add_item_elements """
103         handler.addQuickElement(u"title", item['title'])
104
105         # add a OPDS Navigation link if there's no enclosure
106         if item['enclosure'] is None:
107             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
108             # add a "green book" icon
109             handler.addQuickElement(u"link", '',
110                 {u"rel": u"http://opds-spec.org/thumbnail",
111                  u"href": self._book_parent_img,
112                  u"length": self._book_parent_img_size,
113                  u"type": u"image/png"})
114         if item['pubdate'] is not None:
115             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
116
117         # Author information.
118         if item['author_name'] is not None:
119             handler.startElement(u"author", {})
120             handler.addQuickElement(u"name", item['author_name'])
121             if item['author_email'] is not None:
122                 handler.addQuickElement(u"email", item['author_email'])
123             if item['author_link'] is not None:
124                 handler.addQuickElement(u"uri", item['author_link'])
125             handler.endElement(u"author")
126
127         # Unique ID.
128         if item['unique_id'] is not None:
129             unique_id = item['unique_id']
130         else:
131             unique_id = get_tag_uri(item['link'], item['pubdate'])
132         handler.addQuickElement(u"id", unique_id)
133
134         # Summary.
135         # OPDS needs type=text
136         if item['description'] is not None:
137             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
138
139         # Enclosure as OPDS Acquisition Link
140         if item['enclosure'] is not None:
141             handler.addQuickElement(u"link", '',
142                 {u"rel": u"http://opds-spec.org/acquisition",
143                  u"href": item['enclosure'].url,
144                  u"length": item['enclosure'].length,
145                  u"type": item['enclosure'].mime_type})
146             # add a "red book" icon
147             handler.addQuickElement(u"link", '',
148                 {u"rel": u"http://opds-spec.org/thumbnail",
149                  u"href": self._book_img,
150                  u"length": self._book_img_size,
151                  u"type": u"image/png"})
152
153         # Categories.
154         for cat in item['categories']:
155             handler.addQuickElement(u"category", u"", {u"term": cat})
156
157         # Rights.
158         if item['item_copyright'] is not None:
159             handler.addQuickElement(u"rights", item['item_copyright'])
160
161
162 class AcquisitionFeed(Feed):
163     feed_type = OPDSFeed
164     link = u'http://www.wolnelektury.pl/'
165     item_enclosure_mime_type = "application/epub+zip"
166     author_name = u"Wolne Lektury"
167     author_link = u"http://www.wolnelektury.pl/"
168
169     def item_title(self, book):
170         return book.title
171
172     def item_description(self):
173         return u''
174
175     def item_link(self, book):
176         return book.get_absolute_url()
177
178     def item_author_name(self, book):
179         try:
180             return book.tags.filter(category='author')[0].name
181         except KeyError:
182             return u''
183
184     def item_author_link(self, book):
185         try:
186             return book.tags.filter(category='author')[0].get_absolute_url()
187         except KeyError:
188             return u''
189
190     def item_enclosure_url(self, book):
191         return full_url(book.epub_file.url) if book.epub_file else None
192
193     def item_enclosure_length(self, book):
194         return book.epub_file.size if book.epub_file else None
195
196 @piwik_track
197 class RootFeed(Feed):
198     feed_type = OPDSFeed
199     title = u'Wolne Lektury'
200     link = u'http://wolnelektury.pl/'
201     description = u"Spis utworów na stronie http://WolneLektury.pl"
202     author_name = u"Wolne Lektury"
203     author_link = u"http://wolnelektury.pl/"
204
205     def items(self):
206         return _root_feeds
207
208     def item_title(self, item):
209         return item['title']
210
211     def item_link(self, item):
212         return reverse(item['link'], args=item['link_args'])
213
214     def item_description(self, item):
215         return item['description']
216
217 @piwik_track
218 class ByCategoryFeed(Feed):
219     feed_type = OPDSFeed
220     link = u'http://wolnelektury.pl/'
221     description = u"Spis utworów na stronie http://WolneLektury.pl"
222     author_name = u"Wolne Lektury"
223     author_link = u"http://wolnelektury.pl/"
224
225     def get_object(self, request, category):
226         feed = [feed for feed in _root_feeds if feed['category']==category]
227         if feed:
228             feed = feed[0]
229         else:
230             raise Http404
231
232         return feed
233
234     def title(self, feed):
235         return feed['title']
236
237     def items(self, feed):
238         return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
239
240     def item_title(self, item):
241         return item.name
242
243     def item_link(self, item):
244         return reverse("opds_by_tag", args=[item.category, item.slug])
245
246     def item_description(self):
247         return u''
248
249 @piwik_track
250 class ByTagFeed(AcquisitionFeed):
251     def link(self, tag):
252         return tag.get_absolute_url()
253
254     def title(self, tag):
255         return tag.name
256
257     def description(self, tag):
258         return u"Spis utworów na stronie http://WolneLektury.pl"
259
260     def get_object(self, request, category, slug):
261         return get_object_or_404(Tag, category=category, slug=slug)
262
263     def items(self, tag):
264         books = Book.tagged.with_any([tag])
265         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
266         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
267         if descendants_keys:
268             books = books.exclude(pk__in=descendants_keys)
269
270         return books
271
272
273 @factory_decorator(logged_in_or_basicauth())
274 @piwik_track
275 class UserFeed(Feed):
276     feed_type = OPDSFeed
277     link = u'http://www.wolnelektury.pl/'
278     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
279     author_name = u"Wolne Lektury"
280     author_link = u"http://wolnelektury.pl/"
281
282     def get_object(self, request):
283         return request.user
284
285     def title(self, user):
286         return u"Półki użytkownika %s" % user.username
287
288     def items(self, user):
289         return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
290
291     def item_title(self, item):
292         return item.name
293
294     def item_link(self, item):
295         return reverse("opds_user_set", args=[item.slug])
296
297     def item_description(self):
298         return u''
299
300 # no class decorators in python 2.5
301 #UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed)
302
303
304 @factory_decorator(logged_in_or_basicauth())
305 @piwik_track
306 class UserSetFeed(AcquisitionFeed):
307     def link(self, tag):
308         return tag.get_absolute_url()
309
310     def title(self, tag):
311         return tag.name
312
313     def description(self, tag):
314         return u"Spis utworów na stronie http://WolneLektury.pl"
315
316     def get_object(self, request, slug):
317         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
318
319     def items(self, tag):
320         return Book.tagged.with_any([tag])
321
322 # no class decorators in python 2.5
323 #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
324
325
326 @piwik_track
327 class SearchFeed(AcquisitionFeed):
328     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
329     title = u"Wyniki wyszukiwania"
330
331     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
332     INLINE_QUERY_RE = re.compile(
333         r"author:" + QUOTE_OR_NOT +
334         "|translator:" + QUOTE_OR_NOT +
335         "|title:" + QUOTE_OR_NOT +
336         "|categories:" + QUOTE_OR_NOT +
337         "|description:" + QUOTE_OR_NOT +
338         "|text:" + QUOTE_OR_NOT
339         )
340     MATCHES = {
341         'author': (0, 1),
342         'translator': (2, 3),
343         'title': (4, 5),
344         'categories': (6, 7),
345         'description': (8, 9),
346         'text': (10, 11),
347         }
348
349     PARAMS_TO_FIELDS = {
350         'author': 'authors',
351         'translator': 'translators',
352         #        'title': 'title',
353         'categories': 'tag_name_pl',
354         'description': 'text',
355         #        'text': 'text',
356         }
357
358     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
359
360     def get_object(self, request):
361         """
362         For OPDS 1.1 We should handle a query for search terms
363         and criteria provided either as opensearch or 'inline' query.
364         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
365         atom:title. Inline query provides author, title, categories (treated as book tags),
366         description (treated as content search terms).
367
368         if search terms are provided, we shall search for books
369         according to Hint information (from author & contributror & title).
370
371         but if search terms are empty, we should do a different search
372         (perhaps for is_book=True)
373
374         """
375
376         query = request.GET.get('q', '')
377
378         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
379         if inline_criteria:
380             remains = re.sub(self.INLINE_QUERY_RE, '', query)
381             remains = re.sub(r'[ \t]+', ' ', remains)
382
383             def get_criteria(criteria, name):
384                 for c in criteria:
385                     for p in self.MATCHES[name]:
386                         if c[p]:
387                             if p % 2 == 0:
388                                 return c[p].replace('+', ' ')
389                             return c[p]
390                 return None
391
392             criteria = dict(map(
393                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
394                 ['author', 'translator', 'title', 'categories',
395                  'description', 'text']))
396             query = remains
397             # empty query and text set case?
398             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
399         else:
400             def remove_dump_data(val):
401                 """Some clients don't get opds placeholders and just send them."""
402                 if self.ATOM_PLACEHOLDER.match(val):
403                     return ''
404                 return val
405
406             criteria = dict([(cn, remove_dump_data(request.GET.get(cn, '')))
407                         for cn in self.MATCHES.keys()])
408             # query is set above.
409             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
410
411         srch = Search()
412
413         book_hit_filter = srch.index.Q(book_id__any=True)
414         filters = [book_hit_filter] + [srch.index.Q(
415             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
416             ) for cn in self.MATCHES.keys() if cn in criteria
417             if criteria[cn]]
418
419         if query:
420             q = srch.index.query(
421                 reduce(operator.or_,
422                        [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query})
423                         for cn in self.MATCHES.keys()],
424                 srch.index.Q()))
425         else:
426             q = srch.index.query(srch.index.Q())
427
428         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
429         results = q.execute()
430
431         book_scores = dict([(r['book_id'], r['score']) for r in results])
432         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
433         books = list(books)
434         books.sort(reverse=True, key=lambda book: book_scores[book.id])
435         return books
436
437     def get_link(self, query):
438         return "%s?q=%s" % (reverse('search'), query)
439
440     def items(self, books):
441         try:
442             return books
443         except ValueError:
444             # too short a query
445             return []