Remove legacy search.
[wolnelektury.git] / src / opds / views.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from functools import reduce
5 import os.path
6 from urllib.parse import urljoin
7
8 from django.contrib.syndication.views import Feed
9 from django.shortcuts import get_object_or_404
10 from django.urls import reverse
11 from django.utils.feedgenerator import Atom1Feed
12 from django.conf import settings
13 from django.http import Http404
14 from django.contrib.sites.models import Site
15 from django.utils.functional import lazy
16
17 from basicauth import logged_in_or_basicauth, factory_decorator
18 from catalogue.models import Book, Tag
19 from search.utils import UnaccentSearchQuery, UnaccentSearchVector
20
21 import operator
22 import logging
23 import re
24
25 from stats.utils import piwik_track
26
27 log = logging.getLogger('opds')
28
29 _root_feeds = (
30     {
31         "category": "",
32         "link": "opds_user",
33         "link_args": [],
34         "title": "Moje półki",
35         "description": "Półki użytkownika dostępne po zalogowaniu"
36     },
37     {
38         "category": "author",
39         "link": "opds_by_category",
40         "link_args": ["author"],
41         "title": "Autorzy",
42         "description": "Utwory wg autorów"
43     },
44     {
45         "category": "kind",
46         "link": "opds_by_category",
47         "link_args": ["kind"],
48         "title": "Rodzaje",
49         "description": "Utwory wg rodzajów"
50     },
51     {
52         "category": "genre",
53         "link": "opds_by_category",
54         "link_args": ["genre"],
55         "title": "Gatunki",
56         "description": "Utwory wg gatunków"
57     },
58     {
59         "category": "epoch",
60         "link": "opds_by_category",
61         "link_args": ["epoch"],
62         "title": "Epoki",
63         "description": "Utwory wg epok"
64     },
65 )
66
67
68 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
69
70
71 def full_url(url):
72     return urljoin("http://%s" % current_domain, url)
73
74
75 class OPDSFeed(Atom1Feed):
76     link_rel = "subsection"
77     link_type = "application/atom+xml"
78
79     _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
80     try:
81         _book_parent_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
82     except OSError:
83         _book_parent_img_size = ''
84
85     _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
86     try:
87         _book_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
88     except OSError:
89         _book_img_size = ''
90
91     def add_root_elements(self, handler):
92         super(OPDSFeed, self).add_root_elements(handler)
93         handler.addQuickElement("link", None,
94                                 {"href": reverse("opds_authors"),
95                                  "rel": "start",
96                                  "type": "application/atom+xml"})
97         handler.addQuickElement("link", None,
98                                 {"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
99                                  "rel": "search",
100                                  "type": "application/opensearchdescription+xml"})
101
102     def add_item_elements(self, handler, item):
103         """ modified from Atom1Feed.add_item_elements """
104         handler.addQuickElement("title", item['title'])
105
106         # add a OPDS Navigation link if there's no enclosure
107         if not item.get('enclosures') is None:
108             handler.addQuickElement(
109                 "link", "", {"href": item['link'], "rel": "subsection", "type": "application/atom+xml"})
110             # add a "green book" icon
111             handler.addQuickElement(
112                 "link", '',
113                 {
114                     "rel": "http://opds-spec.org/thumbnail",
115                     "href": self._book_parent_img,
116                     "length": self._book_parent_img_size,
117                     "type": "image/png",
118                 })
119         if item['pubdate'] is not None:
120             # FIXME: rfc3339_date is undefined, is this ever run?
121             handler.addQuickElement("updated", rfc3339_date(item['pubdate']).decode('utf-8'))
122
123         # Author information.
124         if item['author_name'] is not None:
125             handler.startElement("author", {})
126             handler.addQuickElement("name", item['author_name'])
127             if item['author_email'] is not None:
128                 handler.addQuickElement("email", item['author_email'])
129             if item['author_link'] is not None:
130                 handler.addQuickElement("uri", item['author_link'])
131             handler.endElement("author")
132
133         # Unique ID.
134         if item['unique_id'] is not None:
135             unique_id = item['unique_id']
136         else:
137             # FIXME: get_tag_uri is undefined, is this ever run?
138             unique_id = get_tag_uri(item['link'], item['pubdate'])
139         handler.addQuickElement("id", unique_id)
140
141         # Summary.
142         # OPDS needs type=text
143         if item['description'] is not None:
144             handler.addQuickElement("summary", item['description'], {"type": "text"})
145
146         # Enclosure as OPDS Acquisition Link
147         for enc in item.get('enclosures', []):
148             handler.addQuickElement(
149                 "link", '',
150                 {
151                     "rel": "http://opds-spec.org/acquisition",
152                     "href": enc.url,
153                     "length": enc.length,
154                     "type": enc.mime_type,
155                 })
156             # add a "red book" icon
157             handler.addQuickElement(
158                 "link", '',
159                 {
160                     "rel": "http://opds-spec.org/thumbnail",
161                     "href": self._book_img,
162                     "length": self._book_img_size,
163                     "type": "image/png",
164                 })
165
166         # Categories.
167         for cat in item['categories']:
168             handler.addQuickElement("category", "", {"term": cat})
169
170         # Rights.
171         if item['item_copyright'] is not None:
172             handler.addQuickElement("rights", item['item_copyright'])
173
174
175 class AcquisitionFeed(Feed):
176     feed_type = OPDSFeed
177     link = 'http://www.wolnelektury.pl/'
178     item_enclosure_mime_type = "application/epub+zip"
179     author_name = "Wolne Lektury"
180     author_link = "http://www.wolnelektury.pl/"
181
182     def item_title(self, book):
183         return book.title
184
185     def item_description(self):
186         return ''
187
188     def item_link(self, book):
189         return book.get_absolute_url()
190
191     def item_author_name(self, book):
192         try:
193             return book.authors().first().name
194         except AttributeError:
195             return ''
196
197     def item_author_link(self, book):
198         try:
199             return book.authors().first().get_absolute_url()
200         except AttributeError:
201             return ''
202
203     def item_enclosure_url(self, book):
204         return full_url(book.epub_url()) if book.epub_file else None
205
206     def item_enclosure_length(self, book):
207         return book.epub_file.size if book.epub_file else None
208
209
210 @piwik_track
211 class RootFeed(Feed):
212     feed_type = OPDSFeed
213     title = 'Wolne Lektury'
214     link = 'http://wolnelektury.pl/'
215     description = "Spis utworów na stronie http://WolneLektury.pl"
216     author_name = "Wolne Lektury"
217     author_link = "http://wolnelektury.pl/"
218
219     def items(self):
220         return _root_feeds
221
222     def item_title(self, item):
223         return item['title']
224
225     def item_link(self, item):
226         return reverse(item['link'], args=item['link_args'])
227
228     def item_description(self, item):
229         return item['description']
230
231
232 @piwik_track
233 class ByCategoryFeed(Feed):
234     feed_type = OPDSFeed
235     link = 'http://wolnelektury.pl/'
236     description = "Spis utworów na stronie http://WolneLektury.pl"
237     author_name = "Wolne Lektury"
238     author_link = "http://wolnelektury.pl/"
239
240     def get_object(self, request, category):
241         feed = [feed for feed in _root_feeds if feed['category'] == category]
242         if feed:
243             feed = feed[0]
244         else:
245             raise Http404
246
247         return feed
248
249     def title(self, feed):
250         return feed['title']
251
252     def items(self, feed):
253         return Tag.objects.filter(category=feed['category']).exclude(items=None)
254
255     def item_title(self, item):
256         return item.name
257
258     def item_link(self, item):
259         return reverse("opds_by_tag", args=[item.category, item.slug])
260
261     def item_description(self):
262         return ''
263
264
265 @piwik_track
266 class ByTagFeed(AcquisitionFeed):
267     def link(self, tag):
268         return tag.get_absolute_url()
269
270     def title(self, tag):
271         return tag.name
272
273     def description(self, tag):
274         return "Spis utworów na stronie http://WolneLektury.pl"
275
276     def get_object(self, request, category, slug):
277         return get_object_or_404(Tag, category=category, slug=slug)
278
279     def items(self, tag):
280         return Book.tagged_top_level([tag])
281
282
283 @factory_decorator(logged_in_or_basicauth())
284 @piwik_track
285 class UserFeed(Feed):
286     feed_type = OPDSFeed
287     link = 'http://www.wolnelektury.pl/'
288     description = "Półki użytkownika na stronie http://WolneLektury.pl"
289     author_name = "Wolne Lektury"
290     author_link = "http://wolnelektury.pl/"
291
292     def get_object(self, request):
293         return request.user
294
295     def title(self, user):
296         return "Półki użytkownika %s" % user.username
297
298     def items(self, user):
299         return Tag.objects.filter(category='set', user=user).exclude(items=None)
300
301     def item_title(self, item):
302         return item.name
303
304     def item_link(self, item):
305         return reverse("opds_user_set", args=[item.slug])
306
307     def item_description(self):
308         return ''
309
310
311 @factory_decorator(logged_in_or_basicauth())
312 @piwik_track
313 class UserSetFeed(AcquisitionFeed):
314     def link(self, tag):
315         return tag.get_absolute_url()
316
317     def title(self, tag):
318         return tag.name
319
320     def description(self, tag):
321         return "Spis utworów na stronie http://WolneLektury.pl"
322
323     def get_object(self, request, slug):
324         return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
325
326     def items(self, tag):
327         return Book.tagged.with_any([tag])
328
329
330 @piwik_track
331 class SearchFeed(AcquisitionFeed):
332     description = "Wyniki wyszukiwania na stronie WolneLektury.pl"
333     title = "Wyniki wyszukiwania"
334
335     QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
336     INLINE_QUERY_RE = re.compile(
337         r"author:" + QUOTE_OR_NOT +
338         "|translator:" + QUOTE_OR_NOT +
339         "|title:" + QUOTE_OR_NOT +
340         "|categories:" + QUOTE_OR_NOT +
341         "|description:" + QUOTE_OR_NOT +
342         "|text:" + QUOTE_OR_NOT
343         )
344     MATCHES = {
345         'author': (0, 1),
346         'translator': (2, 3),
347         'title': (4, 5),
348         'categories': (6, 7),
349         'description': (8, 9),
350         'text': (10, 11),
351         }
352
353     ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
354
355     def get_object(self, request):
356         """
357         For OPDS 1.1 We should handle a query for search terms
358         and criteria provided either as opensearch or 'inline' query.
359         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
360         atom:title. Inline query provides author, title, categories (treated as book tags),
361         description (treated as content search terms).
362
363         if search terms are provided, we shall search for books
364         according to Hint information (from author & contributror & title).
365
366         but if search terms are empty, we should do a different search
367         (perhaps for is_book=True)
368
369         """
370
371         query = request.GET.get('q', '')
372
373         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
374         if inline_criteria:
375             remains = re.sub(self.INLINE_QUERY_RE, '', query)
376             remains = re.sub(r'[ \t]+', ' ', remains)
377
378             def get_criteria(criteria, name):
379                 for c in criteria:
380                     for p in self.MATCHES[name]:
381                         if c[p]:
382                             if p % 2 == 0:
383                                 return c[p].replace('+', ' ')
384                             return c[p]
385                 return None
386
387             criteria = dict(map(
388                 lambda cn: (cn, get_criteria(inline_criteria, cn)),
389                 ['author', 'translator', 'title', 'categories',
390                  'description', 'text']))
391             query = remains
392             # empty query and text set case?
393             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
394         else:
395             def remove_dump_data(val):
396                 """Some clients don't get opds placeholders and just send them."""
397                 if self.ATOM_PLACEHOLDER.match(val):
398                     return ''
399                 return val
400
401             criteria = dict(
402                 (cn, remove_dump_data(request.GET.get(cn, '')))
403                 for cn in self.MATCHES.keys())
404             # query is set above.
405             log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
406
407         books = Book.objects.filter(findable=True).annotate(
408             search_vector=UnaccentSearchVector('title')
409         )
410         if query:
411             squery = UnaccentSearchQuery(query, config=settings.SEARCH_CONFIG)
412             books = books.filter(search_vector=squery)
413         if criteria['author']:
414             authors = Tag.objects.filter(category='author').annotate(
415                 search_vector=UnaccentSearchVector('name_pl')
416             ).filter(search_vector=UnaccentSearchQuery(criteria['author'], config=settings.SEARCH_CONFIG))
417             books = books.filter(tag_relations__tag__in=authors)
418         if criteria['categories']:
419             tags = Tag.objects.filter(category__in=('genre', 'kind', 'epoch')).annotate(
420                 search_vector=UnaccentSearchVector('name_pl')
421             ).filter(search_vector=UnaccentSearchQuery(criteria['categories'], config=settings.SEARCH_CONFIG))
422             books = books.filter(tag_relations__tag__in=tags)
423         if criteria['translator']:
424             # TODO
425             pass
426         if criteria['title']:
427             books = books.filter(
428                 search_vector=UnaccentSearchQuery(criteria['title'], config=settings.SEARCH_CONFIG)
429             )
430
431         books = books.exclude(ancestor__in=books)
432
433         books = books.order_by('popularity__count')
434         return books
435
436     def get_link(self, query):
437         return "%s?q=%s" % (reverse('search'), query)
438
439     def items(self, books):
440         try:
441             return books
442         except ValueError:
443             # too short a query
444             return []