Cleaning: timezone issues, deprecated urls.py imports, missing notes.
[wolnelektury.git] / apps / oai / handlers.py
index 4ad5c13..142b81f 100644 (file)
@@ -1,3 +1,7 @@
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
 from oaipmh import server, common, metadata, error
 from catalogue.models import Book, Tag
 from api.models import Deleted
 from oaipmh import server, common, metadata, error
 from catalogue.models import Book, Tag
 from api.models import Deleted
@@ -12,27 +16,33 @@ from lxml.etree import ElementTree
 from django.db.models import Q
 from django.conf import settings
 from django.contrib.sites.models import Site
 from django.db.models import Q
 from django.conf import settings
 from django.contrib.sites.models import Site
+from django.utils import timezone
 
 
 
 
+make_time_naive = lambda d: timezone.localtime(d).replace(tzinfo=None)
+
+WL_DC_READER_XPATH = '(.|*)/rdf:RDF/rdf:Description/%s/text()' 
 wl_dc_reader = metadata.MetadataReader(
     fields={
 wl_dc_reader = metadata.MetadataReader(
     fields={
-    'title':       ('textList', 'rdf:RDF/rdf:Description/dc:title/text()'),
-    'creator':     ('textList', 'rdf:RDF/rdf:Description/dc:creator/text()'),
-    'subject':     ('textList', 'rdf:RDF/rdf:Description/dc:subject.period/text() | rdf:RDF/rdf:Description/dc:subject.type/text() | rdf:RDF/rdf:Description/dc:subject.genre/text()'),
-    'description': ('textList', 'rdf:RDF/rdf:Description/dc:description/text()'),
-    'publisher':   ('textList', 'rdf:RDF/rdf:Description/dc:publisher/text()'),
-    'contributor': ('textList', 'rdf:RDF/rdf:Description/dc:contributor.editor/text() | rdf:RDF/rdf:Description/dc:contributor.translator/text() | rdf:RDF/rdf:Description/dc:contributor.technical_editor/text()'),
-    'date':        ('textList', 'rdf:RDF/rdf:Description/dc:date/text()'),
-    'type':        ('textList', 'rdf:RDF/rdf:Description/dc:type/text()'),
-    'format':      ('textList', 'rdf:RDF/rdf:Description/dc:format/text()'),
-    'identifier':  ('textList', 'rdf:RDF/rdf:Description/dc:identifier.url/text()'),
-    'source':      ('textList', 'rdf:RDF/rdf:Description/dc:source/text()'),
-    'language':    ('textList', 'rdf:RDF/rdf:Description/dc:language/text()'),
+    'title':       ('textList', WL_DC_READER_XPATH % 'dc:title'),
+    'creator':     ('textList', WL_DC_READER_XPATH % 'dc:creator'),
+    'subject':     ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
+                    ('dc:subject.period', 'dc:subject.type', 'dc:subject.genre')),
+    'description': ('textList', WL_DC_READER_XPATH % 'dc:description'),
+    'publisher':   ('textList', WL_DC_READER_XPATH % 'dc:publisher'),
+    'contributor': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
+                    ('dc:contributor.editor', 'dc:contributor.translator', 'dc:contributor.technical_editor')),
+    'date':        ('textList', WL_DC_READER_XPATH % 'dc:date'),
+    'type':        ('textList', WL_DC_READER_XPATH % 'dc:type'),
+    'format':      ('textList', WL_DC_READER_XPATH % 'dc:format'),
+    'identifier':  ('textList', WL_DC_READER_XPATH % 'dc:identifier.url'),
+    'source':      ('textList', WL_DC_READER_XPATH % 'dc:source'),
+    'language':    ('textList', WL_DC_READER_XPATH % 'dc:language'),
     #'isPartOf':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
     #'isPartOf':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
-    'hasPart':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.hasPart/text()'),
+    'hasPart':     ('textList', WL_DC_READER_XPATH % 'dc:relation.hasPart'),
     #    'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
     #    'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
     #    'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
     #    'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
-    'rights':      ('textList', 'rdf:RDF/rdf:Description/dc:rights/text()')
+    'rights':      ('textList', WL_DC_READER_XPATH % 'dc:rights')
     },
     namespaces={
     'dc': 'http://purl.org/dc/elements/1.1/',
     },
     namespaces={
     'dc': 'http://purl.org/dc/elements/1.1/',
@@ -57,7 +67,7 @@ class Catalogue(common.ResumptionOAIPMH):
         self.oai_id = "oai:" + Site.objects.get_current().domain + ":%s"
 
         # earliest change
         self.oai_id = "oai:" + Site.objects.get_current().domain + ":%s"
 
         # earliest change
-        year_zero = datetime(1990, 1, 1, 0, 0, 0)
+        year_zero = timezone.make_aware(datetime(1990, 1, 1, 0, 0, 0), timezone.utc)
 
         try:
             earliest_change = \
 
         try:
             earliest_change = \
@@ -88,12 +98,12 @@ class Catalogue(common.ResumptionOAIPMH):
         identifier = self.slug_to_identifier(book.slug)
         if isinstance(book, Book):
             #            setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
         identifier = self.slug_to_identifier(book.slug)
         if isinstance(book, Book):
             #            setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
-            header = common.Header(identifier, book.changed_at, [], False)
+            header = common.Header(identifier, make_time_naive(book.changed_at), [], False)
             if not headers_only:
                 meta = common.Metadata(self.metadata(book))
             about = None
         elif isinstance(book, Deleted):
             if not headers_only:
                 meta = common.Metadata(self.metadata(book))
             about = None
         elif isinstance(book, Deleted):
-            header = common.Header(identifier, book.deleted_at, [], True)
+            header = common.Header(identifier, make_time_naive(book.deleted_at), [], True)
             if not headers_only:
                 meta = common.Metadata({})
             about = None
             if not headers_only:
                 meta = common.Metadata({})
             about = None
@@ -107,7 +117,7 @@ class Catalogue(common.ResumptionOAIPMH):
             '%s/oaipmh' % WL_BASE,  # generate
             '2.0',  # version
             [m[1] for m in settings.MANAGERS],  # adminEmails
             '%s/oaipmh' % WL_BASE,  # generate
             '2.0',  # version
             [m[1] for m in settings.MANAGERS],  # adminEmails
-            self.earliest_datestamp,  # earliest datestamp of any change
+            make_time_naive(self.earliest_datestamp),  # earliest datestamp of any change
             'persistent',  # deletedRecord
             'YYYY-MM-DDThh:mm:ssZ',  # granularity
             ['identity'],  # compression
             'persistent',  # deletedRecord
             'YYYY-MM-DDThh:mm:ssZ',  # granularity
             ['identity'],  # compression