Fixed #724: nicer search results
[wolnelektury.git] / apps / catalogue / views.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import tempfile
6 import zipfile
7 import sys
8 import pprint
9 import traceback
10 import re
11 import itertools
12 from operator import itemgetter
13
14 from django.conf import settings
15 from django.template import RequestContext
16 from django.shortcuts import render_to_response, get_object_or_404
17 from django.http import HttpResponse, HttpResponseRedirect, Http404
18 from django.core.urlresolvers import reverse
19 from django.db.models import Q
20 from django.contrib.auth.decorators import login_required, user_passes_test
21 from django.utils.datastructures import SortedDict
22 from django.views.decorators.http import require_POST
23 from django.contrib import auth
24 from django.contrib.auth.forms import UserCreationForm, AuthenticationForm
25 from django.utils import simplejson
26 from django.utils.functional import Promise
27 from django.utils.encoding import force_unicode
28 from django.utils.http import urlquote_plus
29 from django.views.decorators import cache
30 from django.utils.translation import ugettext as _
31 from django.views.generic.list_detail import object_list
32
33 from catalogue import models
34 from catalogue import forms
35 from catalogue.utils import split_tags
36 from newtagging import views as newtagging_views
37
38
39 staff_required = user_passes_test(lambda user: user.is_staff)
40
41
42 class LazyEncoder(simplejson.JSONEncoder):
43     def default(self, obj):
44         if isinstance(obj, Promise):
45             return force_unicode(obj)
46         return obj
47
48
49 def main_page(request):
50     if request.user.is_authenticated():
51         shelves = models.Tag.objects.filter(category='set', user=request.user)
52         new_set_form = forms.NewSetForm()
53
54     tags = models.Tag.objects.exclude(category__in=('set', 'book'))
55     for tag in tags:
56         tag.count = tag.get_count()
57     categories = split_tags(tags)
58     fragment_tags = categories.get('theme', [])
59
60     form = forms.SearchForm()
61     return render_to_response('catalogue/main_page.html', locals(),
62         context_instance=RequestContext(request))
63
64
65 def book_list(request):
66     books = models.Book.objects.all()
67     form = forms.SearchForm()
68
69     books_by_first_letter = SortedDict()
70     for book in books:
71         books_by_first_letter.setdefault(book.title[0], []).append(book)
72
73     return render_to_response('catalogue/book_list.html', locals(),
74         context_instance=RequestContext(request))
75
76
77 def differentiate_tags(request, tags, ambiguous_slugs):
78     beginning = '/'.join(tag.url_chunk for tag in tags)
79     unparsed = '/'.join(ambiguous_slugs[1:])
80     options = []
81     for tag in models.Tag.objects.exclude(category='book').filter(slug=ambiguous_slugs[0]):
82         options.append({
83             'url_args': '/'.join((beginning, tag.url_chunk, unparsed)).strip('/'),
84             'tags': [tag]
85         })
86     return render_to_response('catalogue/differentiate_tags.html',
87                 {'tags': tags, 'options': options, 'unparsed': ambiguous_slugs[1:]},
88                 context_instance=RequestContext(request))
89
90
91 def tagged_object_list(request, tags=''):
92     try:
93         tags = models.Tag.get_tag_list(tags)
94     except models.Tag.DoesNotExist:
95         raise Http404
96     except models.Tag.MultipleObjectsReturned, e:
97         return differentiate_tags(request, e.tags, e.ambiguous_slugs)
98
99     try:
100         if len(tags) > settings.MAX_TAG_LIST:
101             raise Http404
102     except AttributeError:
103         pass
104
105     if len([tag for tag in tags if tag.category == 'book']):
106         raise Http404
107
108     theme_is_set = [tag for tag in tags if tag.category == 'theme']
109     shelf_is_set = [tag for tag in tags if tag.category == 'set']
110     only_shelf = shelf_is_set and len(tags) == 1
111     only_my_shelf = only_shelf and request.user.is_authenticated() and request.user == tags[0].user
112
113     objects = only_author = pd_counter = None
114     categories = {}
115
116     if theme_is_set:
117         shelf_tags = [tag for tag in tags if tag.category == 'set']
118         fragment_tags = [tag for tag in tags if tag.category != 'set']
119         fragments = models.Fragment.tagged.with_all(fragment_tags)
120
121         if shelf_tags:
122             books = models.Book.tagged.with_all(shelf_tags).order_by()
123             l_tags = models.Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
124             fragments = models.Fragment.tagged.with_any(l_tags, fragments)
125
126         # newtagging goes crazy if we just try:
127         #related_tags = models.Tag.objects.usage_for_queryset(fragments, counts=True,
128         #                    extra={'where': ["catalogue_tag.category != 'book'"]})
129         fragment_keys = [fragment.pk for fragment in fragments]
130         if fragment_keys:
131             related_tags = models.Fragment.tags.usage(counts=True,
132                                 filters={'pk__in': fragment_keys},
133                                 extra={'where': ["catalogue_tag.category != 'book'"]})
134             related_tags = (tag for tag in related_tags if tag not in fragment_tags)
135             categories = split_tags(related_tags)
136
137             objects = fragments
138     else:
139         # get relevant books and their tags
140         objects = models.Book.tagged.with_all(tags).order_by()
141         if not shelf_is_set:
142             # eliminate descendants
143             l_tags = models.Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
144             descendants_keys = [book.pk for book in models.Book.tagged.with_any(l_tags)]
145             if descendants_keys:
146                 objects = objects.exclude(pk__in=descendants_keys)
147
148         # get related tags from `tag_counter` and `theme_counter`
149         related_counts = {}
150         tags_pks = [tag.pk for tag in tags]
151         for book in objects:
152             for tag_pk, value in itertools.chain(book.tag_counter.iteritems(), book.theme_counter.iteritems()):
153                 if tag_pk in tags_pks:
154                     continue
155                 related_counts[tag_pk] = related_counts.get(tag_pk, 0) + value
156         related_tags = models.Tag.objects.filter(pk__in=related_counts.keys())
157         related_tags = [tag for tag in related_tags if tag not in tags]
158         for tag in related_tags:
159             tag.count = related_counts[tag.pk]
160
161         categories = split_tags(related_tags)
162         del related_tags
163
164     if not objects:
165         only_author = len(tags) == 1 and tags[0].category == 'author'
166         pd_counter = only_author and tags[0].goes_to_pd()
167         objects = models.Book.objects.none()
168
169     return object_list(
170         request,
171         objects,
172         template_name='catalogue/tagged_object_list.html',
173         extra_context={
174             'categories': categories,
175             'only_shelf': only_shelf,
176             'only_author': only_author,
177             'pd_counter': pd_counter,
178             'only_my_shelf': only_my_shelf,
179             'formats_form': forms.DownloadFormatsForm(),
180
181             'tags': tags,
182         }
183     )
184
185
186 def book_fragments(request, book_slug, theme_slug):
187     book = get_object_or_404(models.Book, slug=book_slug)
188     book_tag = get_object_or_404(models.Tag, slug='l-' + book_slug, category='book')
189     theme = get_object_or_404(models.Tag, slug=theme_slug, category='theme')
190     fragments = models.Fragment.tagged.with_all([book_tag, theme])
191
192     form = forms.SearchForm()
193     return render_to_response('catalogue/book_fragments.html', locals(),
194         context_instance=RequestContext(request))
195
196
197 def book_detail(request, slug):
198     try:
199         book = models.Book.objects.get(slug=slug)
200     except models.Book.DoesNotExist:
201         return book_stub_detail(request, slug)
202
203     book_tag = book.book_tag()
204     tags = list(book.tags.filter(~Q(category='set')))
205     categories = split_tags(tags)
206     book_children = book.children.all().order_by('parent_number')
207
208     theme_counter = book.theme_counter
209     book_themes = models.Tag.objects.filter(pk__in=theme_counter.keys())
210     for tag in book_themes:
211         tag.count = theme_counter[tag.pk]
212
213     extra_info = book.get_extra_info_value()
214
215     form = forms.SearchForm()
216     return render_to_response('catalogue/book_detail.html', locals(),
217         context_instance=RequestContext(request))
218
219
220 def book_stub_detail(request, slug):
221     book = get_object_or_404(models.BookStub, slug=slug)
222     pd_counter = book.pd
223     form = forms.SearchForm()
224
225     return render_to_response('catalogue/book_stub_detail.html', locals(),
226         context_instance=RequestContext(request))
227
228
229 def book_text(request, slug):
230     book = get_object_or_404(models.Book, slug=slug)
231     book_themes = {}
232     for fragment in book.fragments.all():
233         for theme in fragment.tags.filter(category='theme'):
234             book_themes.setdefault(theme, []).append(fragment)
235
236     book_themes = book_themes.items()
237     book_themes.sort(key=lambda s: s[0].sort_key)
238     return render_to_response('catalogue/book_text.html', locals(),
239         context_instance=RequestContext(request))
240
241
242 # ==========
243 # = Search =
244 # ==========
245
246 def _no_diacritics_regexp(query):
247     """ returns a regexp for searching for a query without diacritics
248
249     should be locale-aware """
250     names = {
251         u'a':u'aąĄ', u'c':u'cćĆ', u'e':u'eęĘ', u'l': u'lłŁ', u'n':u'nńŃ', u'o':u'oóÓ', u's':u'sśŚ', u'z':u'zźżŹŻ',
252         u'ą':u'ąĄ', u'ć':u'ćĆ', u'ę':u'ęĘ', u'ł': u'łŁ', u'ń':u'ńŃ', u'ó':u'óÓ', u'ś':u'śŚ', u'ź':u'źŹ', u'ż':u'żŻ'
253         }
254     def repl(m):
255         l = m.group()
256         return u"(%s)" % '|'.join(names[l])
257     return re.sub(u'[%s]' % (u''.join(names.keys())), repl, query)
258
259 def unicode_re_escape(query):
260     """ Unicode-friendly version of re.escape """
261     return re.sub('(?u)(\W)', r'\\\1', query)
262
263 def _word_starts_with(name, prefix):
264     """returns a Q object getting models having `name` contain a word
265     starting with `prefix`
266
267     We define word characters as alphanumeric and underscore, like in JS.
268
269     Works for MySQL, PostgreSQL, Oracle.
270     For SQLite, _sqlite* version is substituted for this.
271     """
272     kwargs = {}
273
274     prefix = _no_diacritics_regexp(unicode_re_escape(prefix))
275     # can't use [[:<:]] (word start),
276     # but we want both `xy` and `(xy` to catch `(xyz)`
277     kwargs['%s__iregex' % name] = u"(^|[^[:alnum:]_])%s" % prefix
278
279     return Q(**kwargs)
280
281
282 def _sqlite_word_starts_with(name, prefix):
283     """ version of _word_starts_with for SQLite
284
285     SQLite in Django uses Python re module
286     """
287     kwargs = {}
288     prefix = _no_diacritics_regexp(unicode_re_escape(prefix))
289     kwargs['%s__iregex' % name] = ur"(^|(?<=[^\wąćęłńóśźżĄĆĘŁŃÓŚŹŻ]))%s" % prefix
290     return Q(**kwargs)
291
292
293 if settings.DATABASE_ENGINE == 'sqlite3':
294     _word_starts_with = _sqlite_word_starts_with
295
296
297 def _tags_starting_with(prefix, user=None):
298     prefix = prefix.lower()
299     book_stubs = models.BookStub.objects.filter(_word_starts_with('title', prefix))
300     books = models.Book.objects.filter(_word_starts_with('title', prefix))
301     book_stubs = filter(lambda x: x not in books, book_stubs)
302     tags = models.Tag.objects.filter(_word_starts_with('name', prefix))
303     if user and user.is_authenticated():
304         tags = tags.filter(~Q(category='book') & (~Q(category='set') | Q(user=user)))
305     else:
306         tags = tags.filter(~Q(category='book') & ~Q(category='set'))
307     return list(books) + list(tags) + list(book_stubs)
308
309
310 def _get_result_link(match, tag_list):
311     if isinstance(match, models.Book) or isinstance(match, models.BookStub):
312         return match.get_absolute_url()
313     else:
314         return reverse('catalogue.views.tagged_object_list',
315             kwargs={'tags': '/'.join(tag.url_chunk for tag in tag_list + [match])}
316         )
317
318 def _get_result_type(match):
319     if isinstance(match, models.Book) or isinstance(match, models.BookStub):
320         type = 'book'
321     else:
322         type = match.category
323     return type
324
325
326
327 def find_best_matches(query, user=None):
328     """ Finds a Book, Tag or Bookstub best matching a query.
329
330     Returns a with:
331       - zero elements when nothing is found,
332       - one element when a best result is found,
333       - more then one element on multiple exact matches
334
335     Raises a ValueError on too short a query.
336     """
337
338     query = query.lower()
339     if len(query) < 2:
340         raise ValueError("query must have at least two characters")
341
342     result = tuple(_tags_starting_with(query, user))
343     exact_matches = tuple(res for res in result if res.name.lower() == query)
344     if exact_matches:
345         return exact_matches
346     else:
347         return result[:1]
348
349
350 def search(request):
351     tags = request.GET.get('tags', '')
352     prefix = request.GET.get('q', '')
353
354     try:
355         tag_list = models.Tag.get_tag_list(tags)
356     except:
357         tag_list = []
358
359     try:
360         result = find_best_matches(prefix, request.user)
361     except ValueError:
362         return render_to_response('catalogue/search_too_short.html', {'tags':tag_list, 'prefix':prefix},
363             context_instance=RequestContext(request))
364
365     if len(result) == 1:
366         return HttpResponseRedirect(_get_result_link(result[0], tag_list))
367     elif len(result) > 1:
368         return render_to_response('catalogue/search_multiple_hits.html',
369             {'tags':tag_list, 'prefix':prefix, 'results':((x, _get_result_link(x, tag_list), _get_result_type(x)) for x in result)},
370             context_instance=RequestContext(request))
371     else:
372         return render_to_response('catalogue/search_no_hits.html', {'tags':tag_list, 'prefix':prefix},
373             context_instance=RequestContext(request))
374
375
376 def tags_starting_with(request):
377     prefix = request.GET.get('q', '')
378     # Prefix must have at least 2 characters
379     if len(prefix) < 2:
380         return HttpResponse('')
381     tags_list = []
382     result = ""   
383     for tag in _tags_starting_with(prefix, request.user):
384         if not tag.name in tags_list:
385             result += "\n" + tag.name
386             tags_list.append(tag.name)
387     return HttpResponse(result)
388
389 # ====================
390 # = Shelf management =
391 # ====================
392 @login_required
393 @cache.never_cache
394 def user_shelves(request):
395     shelves = models.Tag.objects.filter(category='set', user=request.user)
396     new_set_form = forms.NewSetForm()
397     return render_to_response('catalogue/user_shelves.html', locals(),
398             context_instance=RequestContext(request))
399
400 @cache.never_cache
401 def book_sets(request, slug):
402     book = get_object_or_404(models.Book, slug=slug)
403     user_sets = models.Tag.objects.filter(category='set', user=request.user)
404     book_sets = book.tags.filter(category='set', user=request.user)
405
406     if not request.user.is_authenticated():
407         return HttpResponse(_('<p>To maintain your shelves you need to be logged in.</p>'))
408
409     if request.method == 'POST':
410         form = forms.ObjectSetsForm(book, request.user, request.POST)
411         if form.is_valid():
412             old_shelves = list(book.tags.filter(category='set'))
413             new_shelves = [models.Tag.objects.get(pk=id) for id in form.cleaned_data['set_ids']]
414
415             for shelf in [shelf for shelf in old_shelves if shelf not in new_shelves]:
416                 shelf.book_count = None
417                 shelf.save()
418
419             for shelf in [shelf for shelf in new_shelves if shelf not in old_shelves]:
420                 shelf.book_count = None
421                 shelf.save()
422
423             book.tags = new_shelves + list(book.tags.filter(~Q(category='set') | ~Q(user=request.user)))
424             if request.is_ajax():
425                 return HttpResponse(_('<p>Shelves were sucessfully saved.</p>'))
426             else:
427                 return HttpResponseRedirect('/')
428     else:
429         form = forms.ObjectSetsForm(book, request.user)
430         new_set_form = forms.NewSetForm()
431
432     return render_to_response('catalogue/book_sets.html', locals(),
433         context_instance=RequestContext(request))
434
435
436 @login_required
437 @require_POST
438 @cache.never_cache
439 def remove_from_shelf(request, shelf, book):
440     book = get_object_or_404(models.Book, slug=book)
441     shelf = get_object_or_404(models.Tag, slug=shelf, category='set', user=request.user)
442
443     if shelf in book.tags:
444         models.Tag.objects.remove_tag(book, shelf)
445
446         shelf.book_count = None
447         shelf.save()
448
449         return HttpResponse(_('Book was successfully removed from the shelf'))
450     else:
451         return HttpResponse(_('This book is not on the shelf'))
452
453
454 def collect_books(books):
455     """
456     Returns all real books in collection.
457     """
458     result = []
459     for book in books:
460         if len(book.children.all()) == 0:
461             result.append(book)
462         else:
463             result += collect_books(book.children.all())
464     return result
465
466
467 @cache.never_cache
468 def download_shelf(request, slug):
469     """"
470     Create a ZIP archive on disk and transmit it in chunks of 8KB,
471     without loading the whole file into memory. A similar approach can
472     be used for large dynamic PDF files.
473     """
474     shelf = get_object_or_404(models.Tag, slug=slug, category='set')
475
476     formats = []
477     form = forms.DownloadFormatsForm(request.GET)
478     if form.is_valid():
479         formats = form.cleaned_data['formats']
480     if len(formats) == 0:
481         formats = ['pdf', 'epub', 'odt', 'txt', 'mp3', 'ogg']
482
483     # Create a ZIP archive
484     temp = tempfile.TemporaryFile()
485     archive = zipfile.ZipFile(temp, 'w')
486
487     for book in collect_books(models.Book.tagged.with_all(shelf)):
488         if 'pdf' in formats and book.pdf_file:
489             filename = book.pdf_file.path
490             archive.write(filename, str('%s.pdf' % book.slug))
491         if 'epub' in formats and book.epub_file:
492             filename = book.epub_file.path
493             archive.write(filename, str('%s.epub' % book.slug))
494         if 'odt' in formats and book.odt_file:
495             filename = book.odt_file.path
496             archive.write(filename, str('%s.odt' % book.slug))
497         if 'txt' in formats and book.txt_file:
498             filename = book.txt_file.path
499             archive.write(filename, str('%s.txt' % book.slug))
500         if 'mp3' in formats and book.mp3_file:
501             filename = book.mp3_file.path
502             archive.write(filename, str('%s.mp3' % book.slug))
503         if 'ogg' in formats and book.ogg_file:
504             filename = book.ogg_file.path
505             archive.write(filename, str('%s.ogg' % book.slug))
506     archive.close()
507
508     response = HttpResponse(content_type='application/zip', mimetype='application/x-zip-compressed')
509     response['Content-Disposition'] = 'attachment; filename=%s.zip' % shelf.sort_key
510     response['Content-Length'] = temp.tell()
511
512     temp.seek(0)
513     response.write(temp.read())
514     return response
515
516
517 @cache.never_cache
518 def shelf_book_formats(request, shelf):
519     """"
520     Returns a list of formats of books in shelf.
521     """
522     shelf = get_object_or_404(models.Tag, slug=shelf, category='set')
523
524     formats = {'pdf': False, 'epub': False, 'odt': False, 'txt': False, 'mp3': False, 'ogg': False}
525
526     for book in collect_books(models.Book.tagged.with_all(shelf)):
527         if book.pdf_file:
528             formats['pdf'] = True
529         if book.epub_file:
530             formats['epub'] = True
531         if book.odt_file:
532             formats['odt'] = True
533         if book.txt_file:
534             formats['txt'] = True
535         if book.mp3_file:
536             formats['mp3'] = True
537         if book.ogg_file:
538             formats['ogg'] = True
539
540     return HttpResponse(LazyEncoder().encode(formats))
541
542
543 @login_required
544 @require_POST
545 @cache.never_cache
546 def new_set(request):
547     new_set_form = forms.NewSetForm(request.POST)
548     if new_set_form.is_valid():
549         new_set = new_set_form.save(request.user)
550
551         if request.is_ajax():
552             return HttpResponse(_('<p>Shelf <strong>%s</strong> was successfully created</p>') % new_set)
553         else:
554             return HttpResponseRedirect('/')
555
556     return HttpResponseRedirect('/')
557
558
559 @login_required
560 @require_POST
561 @cache.never_cache
562 def delete_shelf(request, slug):
563     user_set = get_object_or_404(models.Tag, slug=slug, category='set', user=request.user)
564     user_set.delete()
565
566     if request.is_ajax():
567         return HttpResponse(_('<p>Shelf <strong>%s</strong> was successfully removed</p>') % user_set.name)
568     else:
569         return HttpResponseRedirect('/')
570
571
572 # ==================
573 # = Authentication =
574 # ==================
575 @require_POST
576 @cache.never_cache
577 def login(request):
578     form = AuthenticationForm(data=request.POST, prefix='login')
579     if form.is_valid():
580         auth.login(request, form.get_user())
581         response_data = {'success': True, 'errors': {}}
582     else:
583         response_data = {'success': False, 'errors': form.errors}
584     return HttpResponse(LazyEncoder(ensure_ascii=False).encode(response_data))
585
586
587 @require_POST
588 @cache.never_cache
589 def register(request):
590     registration_form = UserCreationForm(request.POST, prefix='registration')
591     if registration_form.is_valid():
592         user = registration_form.save()
593         user = auth.authenticate(
594             username=registration_form.cleaned_data['username'],
595             password=registration_form.cleaned_data['password1']
596         )
597         auth.login(request, user)
598         response_data = {'success': True, 'errors': {}}
599     else:
600         response_data = {'success': False, 'errors': registration_form.errors}
601     return HttpResponse(LazyEncoder(ensure_ascii=False).encode(response_data))
602
603
604 @cache.never_cache
605 def logout_then_redirect(request):
606     auth.logout(request)
607     return HttpResponseRedirect(urlquote_plus(request.GET.get('next', '/'), safe='/?='))
608
609
610
611 # =========
612 # = Admin =
613 # =========
614 @login_required
615 @staff_required
616 def import_book(request):
617     """docstring for import_book"""
618     book_import_form = forms.BookImportForm(request.POST, request.FILES)
619     if book_import_form.is_valid():
620         try:
621             book_import_form.save()
622         except:
623             info = sys.exc_info()
624             exception = pprint.pformat(info[1])
625             tb = '\n'.join(traceback.format_tb(info[2]))
626             return HttpResponse(_("An error occurred: %(exception)s\n\n%(tb)s") % {'exception':exception, 'tb':tb}, mimetype='text/plain')
627         return HttpResponse(_("Book imported successfully"))
628     else:
629         return HttpResponse(_("Error importing file: %r") % book_import_form.errors)
630
631
632
633 def clock(request):
634     """ Provides server time for jquery.countdown,
635     in a format suitable for Date.parse()
636     """
637     from datetime import datetime
638     return HttpResponse(datetime.now().strftime('%Y/%m/%d %H:%M:%S'))