X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/73ce961f14509aabfa26536f847afd28111029c6..2d0838ea349cef1976bb1face31f918a56bb915f:/apps/opds/views.py diff --git a/apps/opds/views.py b/apps/opds/views.py index dc094bba7..297c2120a 100644 --- a/apps/opds/views.py +++ b/apps/opds/views.py @@ -2,10 +2,8 @@ # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from base64 import b64encode import os.path from urlparse import urljoin -from urllib2 import unquote from django.contrib.syndication.views import Feed from django.core.urlresolvers import reverse @@ -18,11 +16,14 @@ from django.contrib.sites.models import Site from basicauth import logged_in_or_basicauth, factory_decorator from catalogue.models import Book, Tag -from search import Search, SearchResult, JVM +from search.views import Search, SearchResult from lucene import Term, QueryWrapperFilter, TermQuery +import logging import re +log = logging.getLogger('opds') + from stats.utils import piwik_track _root_feeds = ( @@ -91,7 +92,7 @@ class OPDSFeed(Atom1Feed): {u"href": reverse("opds_authors"), u"rel": u"start", u"type": u"application/atom+xml"}) - handler.addQuickElement(u"link", None, + handler.addQuickElement(u"link", None, {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")), u"rel": u"search", u"type": u"application/opensearchdescription+xml"}) @@ -187,19 +188,19 @@ class AcquisitionFeed(Feed): return u'' def item_enclosure_url(self, book): - return full_url(book.root_ancestor.epub_file.url) + return full_url(book.epub_file.url) if book.epub_file else None def item_enclosure_length(self, book): - return book.root_ancestor.epub_file.size + return book.epub_file.size if book.epub_file else None @piwik_track class RootFeed(Feed): feed_type = OPDSFeed title = u'Wolne Lektury' - link = u'http://www.wolnelektury.pl/' + link = u'http://wolnelektury.pl/' description = u"Spis utworów na stronie http://WolneLektury.pl" author_name = u"Wolne Lektury" - author_link = u"http://www.wolnelektury.pl/" + author_link = u"http://wolnelektury.pl/" def items(self): return _root_feeds @@ -216,10 +217,10 @@ class RootFeed(Feed): @piwik_track class ByCategoryFeed(Feed): feed_type = OPDSFeed - link = u'http://www.wolnelektury.pl/' + link = u'http://wolnelektury.pl/' description = u"Spis utworów na stronie http://WolneLektury.pl" author_name = u"Wolne Lektury" - author_link = u"http://www.wolnelektury.pl/" + author_link = u"http://wolnelektury.pl/" def get_object(self, request, category): feed = [feed for feed in _root_feeds if feed['category']==category] @@ -261,7 +262,7 @@ class ByTagFeed(AcquisitionFeed): def items(self, tag): books = Book.tagged.with_any([tag]) - l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books]) + l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()]) descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)] if descendants_keys: books = books.exclude(pk__in=descendants_keys) @@ -276,7 +277,7 @@ class UserFeed(Feed): link = u'http://www.wolnelektury.pl/' description = u"Półki użytkownika na stronie http://WolneLektury.pl" author_name = u"Wolne Lektury" - author_link = u"http://www.wolnelektury.pl/" + author_link = u"http://wolnelektury.pl/" def get_object(self, request): return request.user @@ -328,7 +329,7 @@ class SearchFeed(AcquisitionFeed): title = u"Wyniki wyszukiwania" INLINE_QUERY_RE = re.compile(r"(author:(?P[^ ]+)|title:(?P[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))") - + def get_object(self, request): """ For OPDS 1.1 We should handle a query for search terms @@ -336,7 +337,7 @@ class SearchFeed(AcquisitionFeed): OpenSearch defines fields: atom:author, atom:contributor (treated as translator), atom:title. Inline query provides author, title, categories (treated as book tags), description (treated as content search terms). - + if search terms are provided, we shall search for books according to Hint information (from author & contributror & title). @@ -344,7 +345,6 @@ class SearchFeed(AcquisitionFeed): (perhaps for is_book=True) """ - JVM.attachCurrentThread() query = request.GET.get('q', '') @@ -352,17 +352,16 @@ class SearchFeed(AcquisitionFeed): if inline_criteria: def get_criteria(criteria, name, position): e = filter(lambda el: el[0][0:len(name)] == name, criteria) - print e + log.info("get_criteria: %s" % e) if not e: return None c = e[0][position] - print c + log.info("get_criteria: %s" % c) if c[0] == '"' and c[-1] == '"': c = c[1:-1] c = c.replace('+', ' ') return c - #import pdb; pdb.set_trace() author = get_criteria(inline_criteria, 'author', 1) title = get_criteria(inline_criteria, 'title', 2) translator = None @@ -372,10 +371,14 @@ class SearchFeed(AcquisitionFeed): author = request.GET.get('author', '') title = request.GET.get('title', '') translator = request.GET.get('translator', '') + + # Our client didn't handle the opds placeholders + if author == '{atom:author}': author = '' + if title == '{atom:title}': title = '' + if translator == '{atom:contributor}': translator = '' categories = None fuzzy = False - srch = Search() hint = srch.hint() @@ -385,11 +388,12 @@ class SearchFeed(AcquisitionFeed): filters = [] if author: - print "narrow to author %s" % author - hint.tags(srch.search_tags(author, filter=srch.term_filter(Term('tag_category', 'author')))) + log.info("narrow to author %s" % author) + hint.tags(srch.search_tags(srch.make_phrase(srch.get_tokens(author, field='authors'), field='authors'), + filt=srch.term_filter(Term('tag_category', 'author')))) if translator: - print "filter by translator %s" % translator + log.info("filter by translator %s" % translator) filters.append(QueryWrapperFilter( srch.make_phrase(srch.get_tokens(translator, field='translators'), field='translators'))) @@ -401,18 +405,25 @@ class SearchFeed(AcquisitionFeed): flt = srch.chain_filters(filters) if title: - print "hint by book title %s" % title + log.info("hint by book title %s" % title) q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title') - hint.books(*srch.search_books(q, filter=flt)) + hint.books(*srch.search_books(q, filt=flt)) toks = srch.get_tokens(query) - print "tokens: %s" % toks - # import pdb; pdb.set_trace() + log.info("tokens for query: %s" % toks) + results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint), srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint), srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint)) results.sort(reverse=True) - return [r.book for r in results] + books = [] + for r in results: + try: + books.append(r.book) + except Book.DoesNotExist: + pass + log.info("books: %s" % books) + return books else: # Scenario 2: since we no longer have to figure out what the query term means to the user, # we can just use filters and not the Hint class. @@ -430,7 +441,7 @@ class SearchFeed(AcquisitionFeed): srch.make_phrase(srch.get_tokens(q, field=fld), field=fld))) flt = srch.chain_filters(filters) - books = srch.search_books(TermQuery(Term('is_book', 'true')), filter=flt) + books = srch.search_books(TermQuery(Term('is_book', 'true')), filt=flt) return books def get_link(self, query):