X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/cf9abb6f419c407ad53036d24c36b50105adeeff..e7ef95e5867bba96cb445fef0d8932ab5ed6c938:/apps/opds/views.py diff --git a/apps/opds/views.py b/apps/opds/views.py index a51b5b187..c907fe198 100644 --- a/apps/opds/views.py +++ b/apps/opds/views.py @@ -4,6 +4,8 @@ # from base64 import b64encode import os.path +from urlparse import urljoin +from urllib2 import unquote from django.contrib.syndication.views import Feed from django.core.urlresolvers import reverse @@ -15,8 +17,13 @@ from django.contrib.sites.models import Site from basicauth import logged_in_or_basicauth, factory_decorator from catalogue.models import Book, Tag -from catalogue.views import books_starting_with +from search import MultiSearch, SearchResult, JVM +from lucene import Term, QueryWrapperFilter, TermQuery + +import re + +from stats.utils import piwik_track _root_feeds = ( { @@ -58,7 +65,7 @@ _root_feeds = ( def full_url(url): - return "http://%s%s" % (Site.objects.get_current().domain, url) + return urljoin("http://%s" % Site.objects.get_current().domain, url) class OPDSFeed(Atom1Feed): @@ -185,7 +192,7 @@ class AcquisitionFeed(Feed): def item_enclosure_length(self, book): return book.root_ancestor.epub_file.size - +@piwik_track class RootFeed(Feed): feed_type = OPDSFeed title = u'Wolne Lektury' @@ -206,7 +213,7 @@ class RootFeed(Feed): def item_description(self, item): return item['description'] - +@piwik_track class ByCategoryFeed(Feed): feed_type = OPDSFeed link = u'http://www.wolnelektury.pl/' @@ -227,7 +234,7 @@ class ByCategoryFeed(Feed): return feed['title'] def items(self, feed): - return (tag for tag in Tag.objects.filter(category=feed['category']) if tag.get_count() > 0) + return Tag.objects.filter(category=feed['category']).exclude(book_count=0) def item_title(self, item): return item.name @@ -238,7 +245,7 @@ class ByCategoryFeed(Feed): def item_description(self): return u'' - +@piwik_track class ByTagFeed(AcquisitionFeed): def link(self, tag): return tag.get_absolute_url() @@ -263,6 +270,7 @@ class ByTagFeed(AcquisitionFeed): @factory_decorator(logged_in_or_basicauth()) +@piwik_track class UserFeed(Feed): feed_type = OPDSFeed link = u'http://www.wolnelektury.pl/' @@ -277,7 +285,7 @@ class UserFeed(Feed): return u"Półki użytkownika %s" % user.username def items(self, user): - return (tag for tag in Tag.objects.filter(category='set', user=user) if tag.get_count() > 0) + return Tag.objects.filter(category='set', user=user).exclude(book_count=0) def item_title(self, item): return item.name @@ -288,8 +296,12 @@ class UserFeed(Feed): def item_description(self): return u'' +# no class decorators in python 2.5 +#UserFeed = factory_decorator(logged_in_or_basicauth())(UserFeed) + @factory_decorator(logged_in_or_basicauth()) +@piwik_track class UserSetFeed(AcquisitionFeed): def link(self, tag): return tag.get_absolute_url() @@ -306,20 +318,127 @@ class UserSetFeed(AcquisitionFeed): def items(self, tag): return Book.tagged.with_any([tag]) +# no class decorators in python 2.5 +#UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed) + +@piwik_track class SearchFeed(AcquisitionFeed): description = u"Wyniki wyszukiwania na stronie WolneLektury.pl" title = u"Wyniki wyszukiwania" + + INLINE_QUERY_RE = re.compile(r"(author:(?P[^ ]+)|title:(?P[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))") def get_object(self, request): - return request.GET.get('q', '') + """ + For OPDS 1.1 We should handle a query for search terms + and criteria provided either as opensearch or 'inline' query. + OpenSearch defines fields: atom:author, atom:contributor (treated as translator), + atom:title. Inline query provides author, title, categories (treated as book tags), + description (treated as content search terms). + + if search terms are provided, we shall search for books + according to Hint information (from author & contributror & title). + + but if search terms are empty, we should do a different search + (perhaps for is_book=True) + + """ + JVM.attachCurrentThread() + + query = request.GET.get('q', '') + + inline_criteria = re.findall(self.INLINE_QUERY_RE, query) + if inline_criteria: + def get_criteria(criteria, name, position): + e = filter(lambda el: el[0][0:len(name)] == name, criteria) + print e + if not e: + return None + c = e[0][position] + print c + if c[0] == '"' and c[-1] == '"': + c = c[1:-1] + c = c.replace('+', ' ') + return c + + #import pdb; pdb.set_trace() + author = get_criteria(inline_criteria, 'author', 1) + title = get_criteria(inline_criteria, 'title', 2) + translator = None + categories = get_criteria(inline_criteria, 'categories', 3) + query = get_criteria(inline_criteria, 'description', 4) + else: + author = request.GET.get('author', '') + title = request.GET.get('title', '') + translator = request.GET.get('translator', '') + categories = None + fuzzy = False + + + srch = MultiSearch() + hint = srch.hint() + + # Scenario 1: full search terms provided. + # Use auxiliarry information to narrow it and make it better. + if query: + filters = [] + + if author: + print "narrow to author %s" % author + hint.tags(srch.search_tags(author, filter=srch.term_filter(Term('tag_category', 'author')))) + + if translator: + print "filter by translator %s" % translator + filters.append(QueryWrapperFilter( + srch.make_phrase(srch.get_tokens(translator, field='translators'), + field='translators'))) + + if categories: + filters.append(QueryWrapperFilter( + srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"), + field='tag_name_pl'))) + + flt = srch.chain_filters(filters) + if title: + print "hint by book title %s" % title + q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title') + hint.books(*srch.search_books(q, filter=flt)) + + toks = srch.get_tokens(query) + print "tokens: %s" % toks + # import pdb; pdb.set_trace() + results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint), + srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint), + srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint)) + results.sort(reverse=True) + return [r.book for r in results] + else: + # Scenario 2: since we no longer have to figure out what the query term means to the user, + # we can just use filters and not the Hint class. + filters = [] + + fields = { + 'author': author, + 'translators': translator, + 'title': title + } + + for fld, q in fields.items(): + if q: + filters.append(QueryWrapperFilter( + srch.make_phrase(srch.get_tokens(q, field=fld), field=fld))) + + flt = srch.chain_filters(filters) + books = srch.search_books(TermQuery(Term('is_book', 'true')), filter=flt) + return books def get_link(self, query): - return "%s?q=%s" % (reverse('search'), query) + return "%s?q=%s" % (reverse('search'), query) - def items(self, query): + def items(self, books): try: - return books_starting_with(query) + return books except ValueError: # too short a query return []