cleanup solr schema
[wolnelektury.git] / src / opds / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import os.path
6 from urlparse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.core.urlresolvers import reverse
10 from django.shortcuts import get_object_or_404
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15 from django.utils.functional import lazy
16
17 from basicauth import logged_in_or_basicauth, factory_decorator
18 from catalogue.models import Book, Tag
19
20 from search.views import Search
21 import operator
22 import logging
23 import re
24
25 from stats.utils import piwik_track
26
27 log = logging.getLogger('opds')
28
29 _root_feeds = (
30     {
31         u"category": u"",
32         u"link": u"opds_user",
33         u"link_args": [],
34         u"title": u"Moje półki",
35         u"description": u"Półki użytkownika dostępne po zalogowaniu"
36     },
37     {
38         u"category": u"author",
39         u"link": u"opds_by_category",
40         u"link_args": [u"author"],
41         u"title": u"Autorzy",
42         u"description": u"Utwory wg autorów"
43     },
44     {
45         u"category": u"kind",
46         u"link": u"opds_by_category",
47         u"link_args": [u"kind"],
48         u"title": u"Rodzaje",
49         u"description": u"Utwory wg rodzajów"
50     },
51     {
52         u"category": u"genre",
53         u"link": u"opds_by_category",
54         u"link_args": [u"genre"],
55         u"title": u"Gatunki",
56         u"description": u"Utwory wg gatunków"
57     },
58     {
59         u"category": u"epoch",
60         u"link": u"opds_by_category",
61         u"link_args": [u"epoch"],
62         u"title": u"Epoki",
63         u"description": u"Utwory wg epok"
64     },
65 )
66
67
68 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
69
70
71 def full_url(url):
72     return urljoin("http://%s" % current_domain, url)
73
74
75 class OPDSFeed(Atom1Feed):
76     link_rel = u"subsection"
77     link_type = u"application/atom+xml"
78
79     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
80     try:
81         _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
82     except IOError:
83         _book_parent_img_size = ''
84
85     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
86     try:
87         _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
88     except IOError:
89         _book_img_size = ''
90
91     def add_root_elements(self, handler):
92         super(OPDSFeed, self).add_root_elements(handler)
93         handler.addQuickElement(u"link", None,
94                                 {u"href": reverse("opds_authors"),
95                                  u"rel": u"start",
96                                  u"type": u"application/atom+xml"})
97         handler.addQuickElement(u"link", None,
98                                 {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
99                                  u"rel": u"search",
100                                  u"type": u"application/opensearchdescription+xml"})
101
102     def add_item_elements(self, handler, item):
103         """ modified from Atom1Feed.add_item_elements """
104         handler.addQuickElement(u"title", item['title'])
105
106         # add a OPDS Navigation link if there's no enclosure
107         if item['enclosure'] is None:
108             handler.addQuickElement(
109                 u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
110             # add a "green book" icon
111             handler.addQuickElement(
112                 u"link", '',
113                 {
114                     u"rel": u"http://opds-spec.org/thumbnail",
115                     u"href": self._book_parent_img,
116                     u"length": self._book_parent_img_size,
117                     u"type": u"image/png",
118                 })
119         if item['pubdate'] is not None:
120             # FIXME: rfc3339_date is undefined, is this ever run?
121             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
122
123         # Author information.
124         if item['author_name'] is not None:
125             handler.startElement(u"author", {})
126             handler.addQuickElement(u"name", item['author_name'])
127             if item['author_email'] is not None:
128                 handler.addQuickElement(u"email", item['author_email'])
129             if item['author_link'] is not None:
130                 handler.addQuickElement(u"uri", item['author_link'])
131             handler.endElement(u"author")
132
133         # Unique ID.
134         if item['unique_id'] is not None:
135             unique_id = item['unique_id']
136         else:
137             # FIXME: get_tag_uri is undefined, is this ever run?
138             unique_id = get_tag_uri(item['link'], item['pubdate'])
139         handler.addQuickElement(u"id", unique_id)
140
141         # Summary.
142         # OPDS needs type=text
143         if item['description'] is not None:
144             handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
145
146         # Enclosure as OPDS Acquisition Link
147         if item['enclosure'] is not None:
148             handler.addQuickElement(
149                 u"link", '',
150                 {
151                     u"rel": u"http://opds-spec.org/acquisition",
152                     u"href": item['enclosure'].url,
153                     u"length": item['enclosure'].length,
154                     u"type": item['enclosure'].mime_type,
155                 })
156             # add a "red book" icon
157             handler.addQuickElement(
158                 u"link", '',
159                 {
160                     u"rel": u"http://opds-spec.org/thumbnail",
161                     u"href": self._book_img,
162                     u"length": self._book_img_size,
163                     u"type": u"image/png",
164                 })
165
166         # Categories.
167         for cat in item['categories']:
168             handler.addQuickElement(u"category", u"", {u"term": cat})
169
170         # Rights.
171         if item['item_copyright'] is not None:
172             handler.addQuickElement(u"rights", item['item_copyright'])
173
174
175 class AcquisitionFeed(Feed):
176     feed_type = OPDSFeed
177     link = u'http://www.wolnelektury.pl/'
178     item_enclosure_mime_type = "application/epub+zip"
179     author_name = u"Wolne Lektury"
180     author_link = u"http://www.wolnelektury.pl/"
181
182     def item_title(self, book):
183         return book.title
184
185     def item_description(self):
186         return u''
187
188     def item_link(self, book):
189         return book.get_absolute_url()
190
191     def item_author_name(self, book):
192         try:
193             return book.authors().first().name
194         except AttributeError:
195             return u''
196
197     def item_author_link(self, book):
198         try:
199             return book.authors().first().get_absolute_url()
200         except AttributeError:
201             return u''
202
203     def item_enclosure_url(self, book):
204         return full_url(book.epub_file.url) if book.epub_file else None
205
206     def item_enclosure_length(self, book):
207         return book.epub_file.size if book.epub_file else None
208
209
210 @piwik_track
211 class RootFeed(Feed):
212     feed_type = OPDSFeed
213     title = u'Wolne Lektury'
214     link = u'http://wolnelektury.pl/'
215     description = u"Spis utworów na stronie http://WolneLektury.pl"
216     author_name = u"Wolne Lektury"
217     author_link = u"http://wolnelektury.pl/"
218
219     def items(self):
220         return _root_feeds
221
222     def item_title(self, item):
223         return item['title']
224
225     def item_link(self, item):
226         return reverse(item['link'], args=item['link_args'])
227
228     def item_description(self, item):
229         return item['description']
230
231
232 @piwik_track
233 class ByCategoryFeed(Feed):
234     feed_type = OPDSFeed
235     link = u'http://wolnelektury.pl/'
236     description = u"Spis utworów na stronie http://WolneLektury.pl"
237     author_name = u"Wolne Lektury"
238     author_link = u"http://wolnelektury.pl/"
239
240     def get_object(self, request, category):
241         feed = [feed for feed in _root_feeds if feed['category'] == category]
242         if feed:
243             feed = feed[0]
244         else:
245             raise Http404
246
247         return feed
248
249     def title(self, feed):
250         return feed['title']
251
252     def items(self, feed):
253         return Tag.objects.filter(category=feed['category']).exclude(items=None)
254
255     def item_title(self, item):
256         return item.name
257
258     def item_link(self, item):
259         return reverse("opds_by_tag", args=[item.category, item.slug])
260
261     def item_description(self):
262         return u''
263
264
265 @piwik_track
266 class ByTagFeed(AcquisitionFeed):
267     def link(self, tag):
268         return tag.get_absolute_url()
269
270     def title(self, tag):
271         return tag.name
272
273     def description(self, tag):
274         return u"Spis utworów na stronie http://WolneLektury.pl"
275
276     def get_object(self, request, category, slug):
277         return get_object_or_404(Tag, category=category, slug=slug)
278
279     def items(self, tag):
280         return Book.tagged_top_level([tag])
281
282
283 @factory_decorator(logged_in_or_basicauth())
284 @piwik_track
285 class UserFeed(Feed):
286     feed_type = OPDSFeed
287     link = u'http://www.wolnelektury.pl/'
288     description = u"Półki użytkownika na stronie http://WolneLektury.pl"
289     author_name = u"Wolne Lektury"
290     author_link = u"http://wolnelektury.pl/"
291
292     def get_object(self, request):
293         return request.user
294
295     def title(self, user):
296         return u"Półki użytkownika %s" % user.username
297
298     def items(self, user):
299         return Tag.objects.filter(category='set', user=user).exclude(items=None)
300
301     def item_title(self, item):
302         return item.name
303
304     def item_link(self, item):
305         return reverse("opds_user_set", args=[item.slug])
306
307     def item_description(self):
308         return u''
309
310
311 @factory_decorator(logged_in_or_basicauth())
312 @piwik_track
313 class UserSetFeed(AcquisitionFeed):
314     def link(self, tag):
315         return tag.get_absolute_url()
316
317     def title(self, tag):
318         return tag.name
319
320     def description(self, tag):
321         return u"Spis utworów na stronie http://WolneLektury.pl"
322
323     def get_object(self, request, slug):
324         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
325
326     def items(self, tag):
327         return Book.tagged.with_any([tag])
328
329
330 @piwik_track
331 class SearchFeed(AcquisitionFeed):
332     description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
333     title = u"Wyniki wyszukiwania"
334
335     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
336     INLINE_QUERY_RE = re.compile(
337         r"author:" + QUOTE_OR_NOT +
338         "|translator:" + QUOTE_OR_NOT +
339         "|title:" + QUOTE_OR_NOT +
340         "|categories:" + QUOTE_OR_NOT +
341         "|description:" + QUOTE_OR_NOT +
342         "|text:" + QUOTE_OR_NOT
343         )
344     MATCHES = {
345         'author': (0, 1),
346         'translator': (2, 3),
347         'title': (4, 5),
348         'categories': (6, 7),
349         'description': (8, 9),
350         'text': (10, 11),
351         }
352
353     PARAMS_TO_FIELDS = {
354         'author': 'authors',
355         'translator': 'translators',
356         #        'title': 'title',
357         'categories': 'tag_name_pl',
358         'description': 'text',
359         #        'text': 'text',
360         }
361
362     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
363
364     def get_object(self, request):
365         """
366         For OPDS 1.1 We should handle a query for search terms
367         and criteria provided either as opensearch or 'inline' query.
368         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
369         atom:title. Inline query provides author, title, categories (treated as book tags),
370         description (treated as content search terms).
371
372         if search terms are provided, we shall search for books
373         according to Hint information (from author & contributror & title).
374
375         but if search terms are empty, we should do a different search
376         (perhaps for is_book=True)
377
378         """
379
380         query = request.GET.get('q', '')
381
382         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
383         if inline_criteria:
384             remains = re.sub(self.INLINE_QUERY_RE, '', query)
385             remains = re.sub(r'[ \t]+', ' ', remains)
386
387             def get_criteria(criteria, name):
388                 for c in criteria:
389                     for p in self.MATCHES[name]:
390                         if c[p]:
391                             if p % 2 == 0:
392                                 return c[p].replace('+', ' ')
393                             return c[p]
394                 return None
395
396             criteria = dict(map(
397                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
398                 ['author', 'translator', 'title', 'categories',
399                  'description', 'text']))
400             query = remains
401             # empty query and text set case?
402             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
403         else:
404             def remove_dump_data(val):
405                 """Some clients don't get opds placeholders and just send them."""
406                 if self.ATOM_PLACEHOLDER.match(val):
407                     return ''
408                 return val
409
410             criteria = dict(
411                 (cn, remove_dump_data(request.GET.get(cn, '')))
412                 for cn in self.MATCHES.keys())
413             # query is set above.
414             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
415
416         srch = Search()
417
418         book_hit_filter = srch.index.Q(book_id__any=True)
419         filters = [book_hit_filter] + [srch.index.Q(
420             **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
421             ) for cn in self.MATCHES.keys() if cn in criteria
422             if criteria[cn]]
423
424         if query:
425             q = srch.index.query(
426                 reduce(
427                     operator.or_,
428                     [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query}) for cn in self.MATCHES.keys()],
429                     srch.index.Q()))
430         else:
431             q = srch.index.query(srch.index.Q())
432
433         q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
434         results = q.execute()
435
436         book_scores = dict([(r['book_id'], r['score']) for r in results])
437         books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
438         books = list(books)
439         books.sort(reverse=True, key=lambda book: book_scores[book.id])
440         return books
441
442     def get_link(self, query):
443         return "%s?q=%s" % (reverse('search'), query)
444
445     def items(self, books):
446         try:
447             return books
448         except ValueError:
449             # too short a query
450             return []