Test for parenthood by looking for children instead of HTML.

[wolnelektury.git] / apps / oai / handlers.py
diff --git a/apps/oai/handlers.py b/apps/oai/handlers.py

index 26e1cfa..142b81f 100644 (file)
--- a/apps/oai/handlers.py
+++ b/apps/oai/handlers.py
@@ -1,3 +1,7 @@
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
  from oaipmh import server, common, metadata, error
  from catalogue.models import Book, Tag
  from api.models import Deleted
  from oaipmh import server, common, metadata, error
  from catalogue.models import Book, Tag
  from api.models import Deleted
@@ -12,27 +16,33 @@ from lxml.etree import ElementTree
  from django.db.models import Q
  from django.conf import settings
  from django.contrib.sites.models import Site
  from django.db.models import Q
  from django.conf import settings
  from django.contrib.sites.models import Site
+from django.utils import timezone
  
  
  
  
+make_time_naive = lambda d: timezone.localtime(d).replace(tzinfo=None)
+
+WL_DC_READER_XPATH = '(.|*)/rdf:RDF/rdf:Description/%s/text()' 
  wl_dc_reader = metadata.MetadataReader(
      fields={
  wl_dc_reader = metadata.MetadataReader(
      fields={
-    'title':       ('textList', 'rdf:RDF/rdf:Description/dc:title/text()'),
-    'creator':     ('textList', 'rdf:RDF/rdf:Description/dc:creator/text()'),
-    'subject':     ('textList', 'rdf:RDF/rdf:Description/dc:subject.period/text() | rdf:RDF/rdf:Description/dc:subject.type/text() | rdf:RDF/rdf:Description/dc:subject.genre/text()'),
-    'description': ('textList', 'rdf:RDF/rdf:Description/dc:description/text()'),
-    'publisher':   ('textList', 'rdf:RDF/rdf:Description/dc:publisher/text()'),
-    'contributor': ('textList', 'rdf:RDF/rdf:Description/dc:contributor.editor/text() | rdf:RDF/rdf:Description/dc:contributor.translator/text() | rdf:RDF/rdf:Description/dc:contributor.technical_editor/text()'),
-    'date':        ('textList', 'rdf:RDF/rdf:Description/dc:date/text()'),
-    'type':        ('textList', 'rdf:RDF/rdf:Description/dc:type/text()'),
-    'format':      ('textList', 'rdf:RDF/rdf:Description/dc:format/text()'),
-    'identifier':  ('textList', 'rdf:RDF/rdf:Description/dc:identifier.url/text()'),
-    'source':      ('textList', 'rdf:RDF/rdf:Description/dc:source/text()'),
-    'language':    ('textList', 'rdf:RDF/rdf:Description/dc:language/text()'),
+    'title':       ('textList', WL_DC_READER_XPATH % 'dc:title'),
+    'creator':     ('textList', WL_DC_READER_XPATH % 'dc:creator'),
+    'subject':     ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
+                    ('dc:subject.period', 'dc:subject.type', 'dc:subject.genre')),
+    'description': ('textList', WL_DC_READER_XPATH % 'dc:description'),
+    'publisher':   ('textList', WL_DC_READER_XPATH % 'dc:publisher'),
+    'contributor': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
+                    ('dc:contributor.editor', 'dc:contributor.translator', 'dc:contributor.technical_editor')),
+    'date':        ('textList', WL_DC_READER_XPATH % 'dc:date'),
+    'type':        ('textList', WL_DC_READER_XPATH % 'dc:type'),
+    'format':      ('textList', WL_DC_READER_XPATH % 'dc:format'),
+    'identifier':  ('textList', WL_DC_READER_XPATH % 'dc:identifier.url'),
+    'source':      ('textList', WL_DC_READER_XPATH % 'dc:source'),
+    'language':    ('textList', WL_DC_READER_XPATH % 'dc:language'),
      #'isPartOf':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
      #'isPartOf':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
-    'hasPart':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.hasPart/text()'),
+    'hasPart':     ('textList', WL_DC_READER_XPATH % 'dc:relation.hasPart'),
      #    'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
      #    'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
      #    'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
      #    'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
-    'rights':      ('textList', 'rdf:RDF/rdf:Description/dc:rights/text()')
+    'rights':      ('textList', WL_DC_READER_XPATH % 'dc:rights')
      },
      namespaces={
      'dc': 'http://purl.org/dc/elements/1.1/',
      },
      namespaces={
      'dc': 'http://purl.org/dc/elements/1.1/',
@@ -40,17 +50,24 @@ wl_dc_reader = metadata.MetadataReader(
      )
  
  
      )
  
  
+NS_DCTERMS = "http://purl.org/dc/terms/"
+
+
+def nsdcterms(name):
+    return '{%s}%s' % (NS_DCTERMS, name)
+
+
  class Catalogue(common.ResumptionOAIPMH):
      TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
  class Catalogue(common.ResumptionOAIPMH):
      TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
-    
+
      def __init__(self, metadata_registry):
          super(Catalogue, self).__init__()
          self.metadata_registry = metadata_registry
  
      def __init__(self, metadata_registry):
          super(Catalogue, self).__init__()
          self.metadata_registry = metadata_registry
  
-        self.oai_id = "oai:"+Site.objects.get_current().domain+":%s"
+        self.oai_id = "oai:" + Site.objects.get_current().domain + ":%s"
  
          # earliest change
  
          # earliest change
-        year_zero = datetime(1990, 1, 1, 0, 0, 0)
+        year_zero = timezone.make_aware(datetime(1990, 1, 1, 0, 0, 0), timezone.utc)
  
          try:
              earliest_change = \
  
          try:
              earliest_change = \
@@ -81,12 +98,12 @@ class Catalogue(common.ResumptionOAIPMH):
          identifier = self.slug_to_identifier(book.slug)
          if isinstance(book, Book):
              #            setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
          identifier = self.slug_to_identifier(book.slug)
          if isinstance(book, Book):
              #            setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
-            header = common.Header(identifier, book.changed_at, [], False)
+            header = common.Header(identifier, make_time_naive(book.changed_at), [], False)
              if not headers_only:
                  meta = common.Metadata(self.metadata(book))
              about = None
          elif isinstance(book, Deleted):
              if not headers_only:
                  meta = common.Metadata(self.metadata(book))
              about = None
          elif isinstance(book, Deleted):
-            header = common.Header(identifier, book.deleted_at, [], True)
+            header = common.Header(identifier, make_time_naive(book.deleted_at), [], True)
              if not headers_only:
                  meta = common.Metadata({})
              about = None
              if not headers_only:
                  meta = common.Metadata({})
              about = None
@@ -100,7 +117,7 @@ class Catalogue(common.ResumptionOAIPMH):
              '%s/oaipmh' % WL_BASE,  # generate
              '2.0',  # version
              [m[1] for m in settings.MANAGERS],  # adminEmails
              '%s/oaipmh' % WL_BASE,  # generate
              '2.0',  # version
              [m[1] for m in settings.MANAGERS],  # adminEmails
-            self.earliest_datestamp,  # earliest datestamp of any change
+            make_time_naive(self.earliest_datestamp),  # earliest datestamp of any change
              'persistent',  # deletedRecord
              'YYYY-MM-DDThh:mm:ssZ',  # granularity
              ['identity'],  # compression
              'persistent',  # deletedRecord
              'YYYY-MM-DDThh:mm:ssZ',  # granularity
              ['identity'],  # compression
@@ -111,7 +128,7 @@ class Catalogue(common.ResumptionOAIPMH):
      def books(self, tag, from_, until):
          if tag:
              # we do not support sets, since they are problematic for deleted books.
      def books(self, tag, from_, until):
          if tag:
              # we do not support sets, since they are problematic for deleted books.
-            raise errror.NoSetHierarchyError("Wolne Lektury does not support sets.")
+            raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
              # books = Book.tagged.with_all([tag])
          else:
              books = Book.objects.all()
              # books = Book.tagged.with_all([tag])
          else:
              books = Book.objects.all()
@@ -193,9 +210,13 @@ returns result, token
          return records, None
  
      def listMetadataFormats(self, **kw):
          return records, None
  
      def listMetadataFormats(self, **kw):
-        formats = [('oai_dc',
-                 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
-                 server.NS_OAIDC)]
+        formats = [
+            ('oai_dc',
+             'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
+             server.NS_OAIDC),
+            ('qdc',
+             'http://dublincore.org/schemas/xmls/qdc/2006/01/06/dcterms.xsd',
+             NS_DCTERMS)]
          if 'identifier' in kw:
              slug = self.identifier_to_slug(kw['identifier'])
              try:
          if 'identifier' in kw:
              slug = self.identifier_to_slug(kw['identifier'])
              try:
@@ -219,5 +240,3 @@ returns result, token
          #                      tag.name,
          #                      tag.description))
          # return tags, None
          #                      tag.name,
          #                      tag.description))
          # return tags, None
-
-