Test for parenthood by looking for children instead of HTML.
[wolnelektury.git] / apps / oai / handlers.py
index 26e1cfa..142b81f 100644 (file)
@@ -1,3 +1,7 @@
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
 from oaipmh import server, common, metadata, error
 from catalogue.models import Book, Tag
 from api.models import Deleted
 from oaipmh import server, common, metadata, error
 from catalogue.models import Book, Tag
 from api.models import Deleted
@@ -12,27 +16,33 @@ from lxml.etree import ElementTree
 from django.db.models import Q
 from django.conf import settings
 from django.contrib.sites.models import Site
 from django.db.models import Q
 from django.conf import settings
 from django.contrib.sites.models import Site
+from django.utils import timezone
 
 
 
 
+make_time_naive = lambda d: timezone.localtime(d).replace(tzinfo=None)
+
+WL_DC_READER_XPATH = '(.|*)/rdf:RDF/rdf:Description/%s/text()' 
 wl_dc_reader = metadata.MetadataReader(
     fields={
 wl_dc_reader = metadata.MetadataReader(
     fields={
-    'title':       ('textList', 'rdf:RDF/rdf:Description/dc:title/text()'),
-    'creator':     ('textList', 'rdf:RDF/rdf:Description/dc:creator/text()'),
-    'subject':     ('textList', 'rdf:RDF/rdf:Description/dc:subject.period/text() | rdf:RDF/rdf:Description/dc:subject.type/text() | rdf:RDF/rdf:Description/dc:subject.genre/text()'),
-    'description': ('textList', 'rdf:RDF/rdf:Description/dc:description/text()'),
-    'publisher':   ('textList', 'rdf:RDF/rdf:Description/dc:publisher/text()'),
-    'contributor': ('textList', 'rdf:RDF/rdf:Description/dc:contributor.editor/text() | rdf:RDF/rdf:Description/dc:contributor.translator/text() | rdf:RDF/rdf:Description/dc:contributor.technical_editor/text()'),
-    'date':        ('textList', 'rdf:RDF/rdf:Description/dc:date/text()'),
-    'type':        ('textList', 'rdf:RDF/rdf:Description/dc:type/text()'),
-    'format':      ('textList', 'rdf:RDF/rdf:Description/dc:format/text()'),
-    'identifier':  ('textList', 'rdf:RDF/rdf:Description/dc:identifier.url/text()'),
-    'source':      ('textList', 'rdf:RDF/rdf:Description/dc:source/text()'),
-    'language':    ('textList', 'rdf:RDF/rdf:Description/dc:language/text()'),
+    'title':       ('textList', WL_DC_READER_XPATH % 'dc:title'),
+    'creator':     ('textList', WL_DC_READER_XPATH % 'dc:creator'),
+    'subject':     ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
+                    ('dc:subject.period', 'dc:subject.type', 'dc:subject.genre')),
+    'description': ('textList', WL_DC_READER_XPATH % 'dc:description'),
+    'publisher':   ('textList', WL_DC_READER_XPATH % 'dc:publisher'),
+    'contributor': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
+                    ('dc:contributor.editor', 'dc:contributor.translator', 'dc:contributor.technical_editor')),
+    'date':        ('textList', WL_DC_READER_XPATH % 'dc:date'),
+    'type':        ('textList', WL_DC_READER_XPATH % 'dc:type'),
+    'format':      ('textList', WL_DC_READER_XPATH % 'dc:format'),
+    'identifier':  ('textList', WL_DC_READER_XPATH % 'dc:identifier.url'),
+    'source':      ('textList', WL_DC_READER_XPATH % 'dc:source'),
+    'language':    ('textList', WL_DC_READER_XPATH % 'dc:language'),
     #'isPartOf':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
     #'isPartOf':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
-    'hasPart':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.hasPart/text()'),
+    'hasPart':     ('textList', WL_DC_READER_XPATH % 'dc:relation.hasPart'),
     #    'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
     #    'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
     #    'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
     #    'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
-    'rights':      ('textList', 'rdf:RDF/rdf:Description/dc:rights/text()')
+    'rights':      ('textList', WL_DC_READER_XPATH % 'dc:rights')
     },
     namespaces={
     'dc': 'http://purl.org/dc/elements/1.1/',
     },
     namespaces={
     'dc': 'http://purl.org/dc/elements/1.1/',
@@ -40,17 +50,24 @@ wl_dc_reader = metadata.MetadataReader(
     )
 
 
     )
 
 
+NS_DCTERMS = "http://purl.org/dc/terms/"
+
+
+def nsdcterms(name):
+    return '{%s}%s' % (NS_DCTERMS, name)
+
+
 class Catalogue(common.ResumptionOAIPMH):
     TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
 class Catalogue(common.ResumptionOAIPMH):
     TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
-    
+
     def __init__(self, metadata_registry):
         super(Catalogue, self).__init__()
         self.metadata_registry = metadata_registry
 
     def __init__(self, metadata_registry):
         super(Catalogue, self).__init__()
         self.metadata_registry = metadata_registry
 
-        self.oai_id = "oai:"+Site.objects.get_current().domain+":%s"
+        self.oai_id = "oai:" + Site.objects.get_current().domain + ":%s"
 
         # earliest change
 
         # earliest change
-        year_zero = datetime(1990, 1, 1, 0, 0, 0)
+        year_zero = timezone.make_aware(datetime(1990, 1, 1, 0, 0, 0), timezone.utc)
 
         try:
             earliest_change = \
 
         try:
             earliest_change = \
@@ -81,12 +98,12 @@ class Catalogue(common.ResumptionOAIPMH):
         identifier = self.slug_to_identifier(book.slug)
         if isinstance(book, Book):
             #            setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
         identifier = self.slug_to_identifier(book.slug)
         if isinstance(book, Book):
             #            setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
-            header = common.Header(identifier, book.changed_at, [], False)
+            header = common.Header(identifier, make_time_naive(book.changed_at), [], False)
             if not headers_only:
                 meta = common.Metadata(self.metadata(book))
             about = None
         elif isinstance(book, Deleted):
             if not headers_only:
                 meta = common.Metadata(self.metadata(book))
             about = None
         elif isinstance(book, Deleted):
-            header = common.Header(identifier, book.deleted_at, [], True)
+            header = common.Header(identifier, make_time_naive(book.deleted_at), [], True)
             if not headers_only:
                 meta = common.Metadata({})
             about = None
             if not headers_only:
                 meta = common.Metadata({})
             about = None
@@ -100,7 +117,7 @@ class Catalogue(common.ResumptionOAIPMH):
             '%s/oaipmh' % WL_BASE,  # generate
             '2.0',  # version
             [m[1] for m in settings.MANAGERS],  # adminEmails
             '%s/oaipmh' % WL_BASE,  # generate
             '2.0',  # version
             [m[1] for m in settings.MANAGERS],  # adminEmails
-            self.earliest_datestamp,  # earliest datestamp of any change
+            make_time_naive(self.earliest_datestamp),  # earliest datestamp of any change
             'persistent',  # deletedRecord
             'YYYY-MM-DDThh:mm:ssZ',  # granularity
             ['identity'],  # compression
             'persistent',  # deletedRecord
             'YYYY-MM-DDThh:mm:ssZ',  # granularity
             ['identity'],  # compression
@@ -111,7 +128,7 @@ class Catalogue(common.ResumptionOAIPMH):
     def books(self, tag, from_, until):
         if tag:
             # we do not support sets, since they are problematic for deleted books.
     def books(self, tag, from_, until):
         if tag:
             # we do not support sets, since they are problematic for deleted books.
-            raise errror.NoSetHierarchyError("Wolne Lektury does not support sets.")
+            raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
             # books = Book.tagged.with_all([tag])
         else:
             books = Book.objects.all()
             # books = Book.tagged.with_all([tag])
         else:
             books = Book.objects.all()
@@ -193,9 +210,13 @@ returns result, token
         return records, None
 
     def listMetadataFormats(self, **kw):
         return records, None
 
     def listMetadataFormats(self, **kw):
-        formats = [('oai_dc',
-                 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
-                 server.NS_OAIDC)]
+        formats = [
+            ('oai_dc',
+             'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
+             server.NS_OAIDC),
+            ('qdc',
+             'http://dublincore.org/schemas/xmls/qdc/2006/01/06/dcterms.xsd',
+             NS_DCTERMS)]
         if 'identifier' in kw:
             slug = self.identifier_to_slug(kw['identifier'])
             try:
         if 'identifier' in kw:
             slug = self.identifier_to_slug(kw['identifier'])
             try:
@@ -219,5 +240,3 @@ returns result, token
         #                      tag.name,
         #                      tag.description))
         # return tags, None
         #                      tag.name,
         #                      tag.description))
         # return tags, None
-
-