Nicer search, minor fixes.

[wolnelektury.git] / src / opds / views.py
diff --git a/src/opds/views.py b/src/opds/views.py

index 001b69d..63c79a2 100644 (file)
--- a/src/opds/views.py
+++ b/src/opds/views.py
@@ -1,13 +1,13 @@
-# -*- coding: utf-8 -*-
  # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+from functools import reduce
  import os.path
  import os.path
-from urlparse import urljoin
+from urllib.parse import urljoin
  
  from django.contrib.syndication.views import Feed
  
  from django.contrib.syndication.views import Feed
-from django.core.urlresolvers import reverse
  from django.shortcuts import get_object_or_404
  from django.shortcuts import get_object_or_404
+from django.urls import reverse
  from django.utils.feedgenerator import Atom1Feed
  from django.conf import settings
  from django.http import Http404
  from django.utils.feedgenerator import Atom1Feed
  from django.conf import settings
  from django.http import Http404
@@ -16,8 +16,8 @@ from django.utils.functional import lazy
  
  from basicauth import logged_in_or_basicauth, factory_decorator
  from catalogue.models import Book, Tag
  
  from basicauth import logged_in_or_basicauth, factory_decorator
  from catalogue.models import Book, Tag
+from search.utils import UnaccentSearchQuery, UnaccentSearchVector
  
  
-from search.views import Search
  import operator
  import logging
  import re
  import operator
  import logging
  import re
@@ -28,39 +28,39 @@ log = logging.getLogger('opds')
  
  _root_feeds = (
      {
  
  _root_feeds = (
      {
-        u"category": u"",
-        u"link": u"opds_user",
-        u"link_args": [],
-        u"title": u"Moje półki",
-        u"description": u"Półki użytkownika dostępne po zalogowaniu"
+        "category": "",
+        "link": "opds_user",
+        "link_args": [],
+        "title": "Moje półki",
+        "description": "Półki użytkownika dostępne po zalogowaniu"
      },
      {
      },
      {
-        u"category": u"author",
-        u"link": u"opds_by_category",
-        u"link_args": [u"author"],
-        u"title": u"Autorzy",
-        u"description": u"Utwory wg autorów"
+        "category": "author",
+        "link": "opds_by_category",
+        "link_args": ["author"],
+        "title": "Autorzy",
+        "description": "Utwory wg autorów"
      },
      {
      },
      {
-        u"category": u"kind",
-        u"link": u"opds_by_category",
-        u"link_args": [u"kind"],
-        u"title": u"Rodzaje",
-        u"description": u"Utwory wg rodzajów"
+        "category": "kind",
+        "link": "opds_by_category",
+        "link_args": ["kind"],
+        "title": "Rodzaje",
+        "description": "Utwory wg rodzajów"
      },
      {
      },
      {
-        u"category": u"genre",
-        u"link": u"opds_by_category",
-        u"link_args": [u"genre"],
-        u"title": u"Gatunki",
-        u"description": u"Utwory wg gatunków"
+        "category": "genre",
+        "link": "opds_by_category",
+        "link_args": ["genre"],
+        "title": "Gatunki",
+        "description": "Utwory wg gatunków"
      },
      {
      },
      {
-        u"category": u"epoch",
-        u"link": u"opds_by_category",
-        u"link_args": [u"epoch"],
-        u"title": u"Epoki",
-        u"description": u"Utwory wg epok"
+        "category": "epoch",
+        "link": "opds_by_category",
+        "link_args": ["epoch"],
+        "title": "Epoki",
+        "description": "Utwory wg epok"
      },
  )
  
      },
  )
  
@@ -73,62 +73,62 @@ def full_url(url):
  
  
  class OPDSFeed(Atom1Feed):
  
  
  class OPDSFeed(Atom1Feed):
-    link_rel = u"subsection"
-    link_type = u"application/atom+xml"
+    link_rel = "subsection"
+    link_type = "application/atom+xml"
  
      _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
      try:
  
      _book_parent_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book-parent.png")), str)()
      try:
-        _book_parent_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
-    except IOError:
+        _book_parent_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book-parent.png")))
+    except OSError:
          _book_parent_img_size = ''
  
      _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
      try:
          _book_parent_img_size = ''
  
      _book_img = lazy(lambda: full_url(os.path.join(settings.STATIC_URL, "img/book.png")), str)()
      try:
-        _book_img_size = unicode(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
-    except IOError:
+        _book_img_size = str(os.path.getsize(os.path.join(settings.STATIC_ROOT, "img/book.png")))
+    except OSError:
          _book_img_size = ''
  
      def add_root_elements(self, handler):
          super(OPDSFeed, self).add_root_elements(handler)
          _book_img_size = ''
  
      def add_root_elements(self, handler):
          super(OPDSFeed, self).add_root_elements(handler)
-        handler.addQuickElement(u"link", None,
-                                {u"href": reverse("opds_authors"),
-                                 u"rel": u"start",
-                                 u"type": u"application/atom+xml"})
-        handler.addQuickElement(u"link", None,
-                                {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
-                                 u"rel": u"search",
-                                 u"type": u"application/opensearchdescription+xml"})
+        handler.addQuickElement("link", None,
+                                {"href": reverse("opds_authors"),
+                                 "rel": "start",
+                                 "type": "application/atom+xml"})
+        handler.addQuickElement("link", None,
+                                {"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
+                                 "rel": "search",
+                                 "type": "application/opensearchdescription+xml"})
  
      def add_item_elements(self, handler, item):
          """ modified from Atom1Feed.add_item_elements """
  
      def add_item_elements(self, handler, item):
          """ modified from Atom1Feed.add_item_elements """
-        handler.addQuickElement(u"title", item['title'])
+        handler.addQuickElement("title", item['title'])
  
          # add a OPDS Navigation link if there's no enclosure
  
          # add a OPDS Navigation link if there's no enclosure
-        if item['enclosure'] is None:
+        if not item.get('enclosures') is None:
              handler.addQuickElement(
              handler.addQuickElement(
-                u"link", u"", {u"href": item['link'], u"rel": u"subsection", u"type": u"application/atom+xml"})
+                "link", "", {"href": item['link'], "rel": "subsection", "type": "application/atom+xml"})
              # add a "green book" icon
              handler.addQuickElement(
              # add a "green book" icon
              handler.addQuickElement(
-                u"link", '',
+                "link", '',
                  {
                  {
-                    u"rel": u"http://opds-spec.org/thumbnail",
-                    u"href": self._book_parent_img,
-                    u"length": self._book_parent_img_size,
-                    u"type": u"image/png",
+                    "rel": "http://opds-spec.org/thumbnail",
+                    "href": self._book_parent_img,
+                    "length": self._book_parent_img_size,
+                    "type": "image/png",
                  })
          if item['pubdate'] is not None:
              # FIXME: rfc3339_date is undefined, is this ever run?
                  })
          if item['pubdate'] is not None:
              # FIXME: rfc3339_date is undefined, is this ever run?
-            handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
+            handler.addQuickElement("updated", rfc3339_date(item['pubdate']).decode('utf-8'))
  
          # Author information.
          if item['author_name'] is not None:
  
          # Author information.
          if item['author_name'] is not None:
-            handler.startElement(u"author", {})
-            handler.addQuickElement(u"name", item['author_name'])
+            handler.startElement("author", {})
+            handler.addQuickElement("name", item['author_name'])
              if item['author_email'] is not None:
              if item['author_email'] is not None:
-                handler.addQuickElement(u"email", item['author_email'])
+                handler.addQuickElement("email", item['author_email'])
              if item['author_link'] is not None:
              if item['author_link'] is not None:
-                handler.addQuickElement(u"uri", item['author_link'])
-            handler.endElement(u"author")
+                handler.addQuickElement("uri", item['author_link'])
+            handler.endElement("author")
  
          # Unique ID.
          if item['unique_id'] is not None:
  
          # Unique ID.
          if item['unique_id'] is not None:
@@ -136,72 +136,72 @@ class OPDSFeed(Atom1Feed):
          else:
              # FIXME: get_tag_uri is undefined, is this ever run?
              unique_id = get_tag_uri(item['link'], item['pubdate'])
          else:
              # FIXME: get_tag_uri is undefined, is this ever run?
              unique_id = get_tag_uri(item['link'], item['pubdate'])
-        handler.addQuickElement(u"id", unique_id)
+        handler.addQuickElement("id", unique_id)
  
          # Summary.
          # OPDS needs type=text
          if item['description'] is not None:
  
          # Summary.
          # OPDS needs type=text
          if item['description'] is not None:
-            handler.addQuickElement(u"summary", item['description'], {u"type": u"text"})
+            handler.addQuickElement("summary", item['description'], {"type": "text"})
  
          # Enclosure as OPDS Acquisition Link
  
          # Enclosure as OPDS Acquisition Link
-        if item['enclosure'] is not None:
+        for enc in item.get('enclosures', []):
              handler.addQuickElement(
              handler.addQuickElement(
-                u"link", '',
+                "link", '',
                  {
                  {
-                    u"rel": u"http://opds-spec.org/acquisition",
-                    u"href": item['enclosure'].url,
-                    u"length": item['enclosure'].length,
-                    u"type": item['enclosure'].mime_type,
+                    "rel": "http://opds-spec.org/acquisition",
+                    "href": enc.url,
+                    "length": enc.length,
+                    "type": enc.mime_type,
                  })
              # add a "red book" icon
              handler.addQuickElement(
                  })
              # add a "red book" icon
              handler.addQuickElement(
-                u"link", '',
+                "link", '',
                  {
                  {
-                    u"rel": u"http://opds-spec.org/thumbnail",
-                    u"href": self._book_img,
-                    u"length": self._book_img_size,
-                    u"type": u"image/png",
+                    "rel": "http://opds-spec.org/thumbnail",
+                    "href": self._book_img,
+                    "length": self._book_img_size,
+                    "type": "image/png",
                  })
  
          # Categories.
          for cat in item['categories']:
                  })
  
          # Categories.
          for cat in item['categories']:
-            handler.addQuickElement(u"category", u"", {u"term": cat})
+            handler.addQuickElement("category", "", {"term": cat})
  
          # Rights.
          if item['item_copyright'] is not None:
  
          # Rights.
          if item['item_copyright'] is not None:
-            handler.addQuickElement(u"rights", item['item_copyright'])
+            handler.addQuickElement("rights", item['item_copyright'])
  
  
  class AcquisitionFeed(Feed):
      feed_type = OPDSFeed
  
  
  class AcquisitionFeed(Feed):
      feed_type = OPDSFeed
-    link = u'http://www.wolnelektury.pl/'
+    link = 'http://www.wolnelektury.pl/'
      item_enclosure_mime_type = "application/epub+zip"
      item_enclosure_mime_type = "application/epub+zip"
-    author_name = u"Wolne Lektury"
-    author_link = u"http://www.wolnelektury.pl/"
+    author_name = "Wolne Lektury"
+    author_link = "http://www.wolnelektury.pl/"
  
      def item_title(self, book):
          return book.title
  
      def item_description(self):
  
      def item_title(self, book):
          return book.title
  
      def item_description(self):
-        return u''
+        return ''
  
      def item_link(self, book):
          return book.get_absolute_url()
  
      def item_author_name(self, book):
          try:
  
      def item_link(self, book):
          return book.get_absolute_url()
  
      def item_author_name(self, book):
          try:
-            return book.tags.filter(category='author')[0].name
-        except KeyError:
-            return u''
+            return book.authors().first().name
+        except AttributeError:
+            return ''
  
      def item_author_link(self, book):
          try:
  
      def item_author_link(self, book):
          try:
-            return book.tags.filter(category='author')[0].get_absolute_url()
-        except KeyError:
-            return u''
+            return book.authors().first().get_absolute_url()
+        except AttributeError:
+            return ''
  
      def item_enclosure_url(self, book):
  
      def item_enclosure_url(self, book):
-        return full_url(book.epub_file.url) if book.epub_file else None
+        return full_url(book.epub_url()) if book.epub_file else None
  
      def item_enclosure_length(self, book):
          return book.epub_file.size if book.epub_file else None
  
      def item_enclosure_length(self, book):
          return book.epub_file.size if book.epub_file else None
@@ -210,11 +210,11 @@ class AcquisitionFeed(Feed):
  @piwik_track
  class RootFeed(Feed):
      feed_type = OPDSFeed
  @piwik_track
  class RootFeed(Feed):
      feed_type = OPDSFeed
-    title = u'Wolne Lektury'
-    link = u'http://wolnelektury.pl/'
-    description = u"Spis utworów na stronie http://WolneLektury.pl"
-    author_name = u"Wolne Lektury"
-    author_link = u"http://wolnelektury.pl/"
+    title = 'Wolne Lektury'
+    link = 'http://wolnelektury.pl/'
+    description = "Spis utworów na stronie http://WolneLektury.pl"
+    author_name = "Wolne Lektury"
+    author_link = "http://wolnelektury.pl/"
  
      def items(self):
          return _root_feeds
  
      def items(self):
          return _root_feeds
@@ -232,10 +232,10 @@ class RootFeed(Feed):
  @piwik_track
  class ByCategoryFeed(Feed):
      feed_type = OPDSFeed
  @piwik_track
  class ByCategoryFeed(Feed):
      feed_type = OPDSFeed
-    link = u'http://wolnelektury.pl/'
-    description = u"Spis utworów na stronie http://WolneLektury.pl"
-    author_name = u"Wolne Lektury"
-    author_link = u"http://wolnelektury.pl/"
+    link = 'http://wolnelektury.pl/'
+    description = "Spis utworów na stronie http://WolneLektury.pl"
+    author_name = "Wolne Lektury"
+    author_link = "http://wolnelektury.pl/"
  
      def get_object(self, request, category):
          feed = [feed for feed in _root_feeds if feed['category'] == category]
  
      def get_object(self, request, category):
          feed = [feed for feed in _root_feeds if feed['category'] == category]
@@ -259,7 +259,7 @@ class ByCategoryFeed(Feed):
          return reverse("opds_by_tag", args=[item.category, item.slug])
  
      def item_description(self):
          return reverse("opds_by_tag", args=[item.category, item.slug])
  
      def item_description(self):
-        return u''
+        return ''
  
  
  @piwik_track
  
  
  @piwik_track
@@ -271,7 +271,7 @@ class ByTagFeed(AcquisitionFeed):
          return tag.name
  
      def description(self, tag):
          return tag.name
  
      def description(self, tag):
-        return u"Spis utworów na stronie http://WolneLektury.pl"
+        return "Spis utworów na stronie http://WolneLektury.pl"
  
      def get_object(self, request, category, slug):
          return get_object_or_404(Tag, category=category, slug=slug)
  
      def get_object(self, request, category, slug):
          return get_object_or_404(Tag, category=category, slug=slug)
@@ -284,16 +284,16 @@ class ByTagFeed(AcquisitionFeed):
  @piwik_track
  class UserFeed(Feed):
      feed_type = OPDSFeed
  @piwik_track
  class UserFeed(Feed):
      feed_type = OPDSFeed
-    link = u'http://www.wolnelektury.pl/'
-    description = u"Półki użytkownika na stronie http://WolneLektury.pl"
-    author_name = u"Wolne Lektury"
-    author_link = u"http://wolnelektury.pl/"
+    link = 'http://www.wolnelektury.pl/'
+    description = "Półki użytkownika na stronie http://WolneLektury.pl"
+    author_name = "Wolne Lektury"
+    author_link = "http://wolnelektury.pl/"
  
      def get_object(self, request):
          return request.user
  
      def title(self, user):
  
      def get_object(self, request):
          return request.user
  
      def title(self, user):
-        return u"Półki użytkownika %s" % user.username
+        return "Półki użytkownika %s" % user.username
  
      def items(self, user):
          return Tag.objects.filter(category='set', user=user).exclude(items=None)
  
      def items(self, user):
          return Tag.objects.filter(category='set', user=user).exclude(items=None)
@@ -305,7 +305,7 @@ class UserFeed(Feed):
          return reverse("opds_user_set", args=[item.slug])
  
      def item_description(self):
          return reverse("opds_user_set", args=[item.slug])
  
      def item_description(self):
-        return u''
+        return ''
  
  
  @factory_decorator(logged_in_or_basicauth())
  
  
  @factory_decorator(logged_in_or_basicauth())
@@ -318,7 +318,7 @@ class UserSetFeed(AcquisitionFeed):
          return tag.name
  
      def description(self, tag):
          return tag.name
  
      def description(self, tag):
-        return u"Spis utworów na stronie http://WolneLektury.pl"
+        return "Spis utworów na stronie http://WolneLektury.pl"
  
      def get_object(self, request, slug):
          return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
  
      def get_object(self, request, slug):
          return get_object_or_404(Tag, category='set', slug=slug, user=request.user)
@@ -329,8 +329,8 @@ class UserSetFeed(AcquisitionFeed):
  
  @piwik_track
  class SearchFeed(AcquisitionFeed):
  
  @piwik_track
  class SearchFeed(AcquisitionFeed):
-    description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
-    title = u"Wyniki wyszukiwania"
+    description = "Wyniki wyszukiwania na stronie WolneLektury.pl"
+    title = "Wyniki wyszukiwania"
  
      QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
      INLINE_QUERY_RE = re.compile(
  
      QUOTE_OR_NOT = r'(?:(?=["])"([^"]+)"|([^ ]+))'
      INLINE_QUERY_RE = re.compile(
@@ -350,15 +350,6 @@ class SearchFeed(AcquisitionFeed):
          'text': (10, 11),
          }
  
          'text': (10, 11),
          }
  
-    PARAMS_TO_FIELDS = {
-        'author': 'authors',
-        'translator': 'translators',
-        #        'title': 'title',
-        'categories': 'tag_name_pl',
-        'description': 'text',
-        #        'text': 'text',
-        }
-
      ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
  
      def get_object(self, request):
      ATOM_PLACEHOLDER = re.compile(r"^{(atom|opds):\w+}$")
  
      def get_object(self, request):
@@ -413,30 +404,33 @@ class SearchFeed(AcquisitionFeed):
              # query is set above.
              log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
  
              # query is set above.
              log.debug("Inline query = [%s], criteria: %s" % (query, criteria))
  
-        srch = Search()
-
-        book_hit_filter = srch.index.Q(book_id__any=True)
-        filters = [book_hit_filter] + [srch.index.Q(
-            **{self.PARAMS_TO_FIELDS.get(cn, cn): criteria[cn]}
-            ) for cn in self.MATCHES.keys() if cn in criteria
-            if criteria[cn]]
-
+        books = Book.objects.filter(findable=True).annotate(
+            search_vector=UnaccentSearchVector('title')
+        )
          if query:
          if query:
-            q = srch.index.query(
-                reduce(
-                    operator.or_,
-                    [srch.index.Q(**{self.PARAMS_TO_FIELDS.get(cn, cn): query}) for cn in self.MATCHES.keys()],
-                    srch.index.Q()))
-        else:
-            q = srch.index.query(srch.index.Q())
-
-        q = srch.apply_filters(q, filters).field_limit(score=True, fields=['book_id'])
-        results = q.execute()
-
-        book_scores = dict([(r['book_id'], r['score']) for r in results])
-        books = Book.objects.filter(id__in=set([r['book_id'] for r in results]))
-        books = list(books)
-        books.sort(reverse=True, key=lambda book: book_scores[book.id])
+            squery = UnaccentSearchQuery(query, config=settings.SEARCH_CONFIG)
+            books = books.filter(search_vector=squery)
+        if criteria['author']:
+            authors = Tag.objects.filter(category='author').annotate(
+                search_vector=UnaccentSearchVector('name_pl')
+            ).filter(search_vector=UnaccentSearchQuery(criteria['author'], config=settings.SEARCH_CONFIG))
+            books = books.filter(tag_relations__tag__in=authors)
+        if criteria['categories']:
+            tags = Tag.objects.filter(category__in=('genre', 'kind', 'epoch')).annotate(
+                search_vector=UnaccentSearchVector('name_pl')
+            ).filter(search_vector=UnaccentSearchQuery(criteria['categories'], config=settings.SEARCH_CONFIG))
+            books = books.filter(tag_relations__tag__in=tags)
+        if criteria['translator']:
+            # TODO
+            pass
+        if criteria['title']:
+            books = books.filter(
+                search_vector=UnaccentSearchQuery(criteria['title'], config=settings.SEARCH_CONFIG)
+            )
+
+        books = books.exclude(ancestor__in=books)
+
+        books = books.order_by('popularity__count')
          return books
  
      def get_link(self, query):
          return books
  
      def get_link(self, query):