Cleaning: timezone issues, deprecated urls.py imports, missing notes.
[wolnelektury.git] / apps / oai / handlers.py
index d098a64..142b81f 100644 (file)
@@ -1,3 +1,7 @@
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
 from oaipmh import server, common, metadata, error
 from catalogue.models import Book, Tag
 from api.models import Deleted
 from oaipmh import server, common, metadata, error
 from catalogue.models import Book, Tag
 from api.models import Deleted
@@ -10,25 +14,35 @@ from datetime import datetime
 from lxml import etree
 from lxml.etree import ElementTree
 from django.db.models import Q
 from lxml import etree
 from lxml.etree import ElementTree
 from django.db.models import Q
+from django.conf import settings
+from django.contrib.sites.models import Site
+from django.utils import timezone
 
 
 
 
+make_time_naive = lambda d: timezone.localtime(d).replace(tzinfo=None)
+
+WL_DC_READER_XPATH = '(.|*)/rdf:RDF/rdf:Description/%s/text()' 
 wl_dc_reader = metadata.MetadataReader(
     fields={
 wl_dc_reader = metadata.MetadataReader(
     fields={
-    'title':       ('textList', 'rdf:RDF/rdf:Description/dc:title/text()'),
-    'creator':     ('textList', 'rdf:RDF/rdf:Description/dc:creator/text()'),
-    'subject':     ('textList', 'rdf:RDF/rdf:Description/dc:subject.period/text() | rdf:RDF/rdf:Description/dc:subject.type/text() | rdf:RDF/rdf:Description/dc:subject.genre/text()'),
-    'description': ('textList', 'rdf:RDF/rdf:Description/dc:description/text()'),
-    'publisher':   ('textList', 'rdf:RDF/rdf:Description/dc:publisher/text()'),
-    'contributor': ('textList', 'rdf:RDF/rdf:Description/dc:contributor.editor/text() | rdf:RDF/rdf:Description/dc:contributor.translator/text() | rdf:RDF/rdf:Description/dc:contributor.technical_editor/text()'),
-    'date':        ('textList', 'rdf:RDF/rdf:Description/dc:date/text()'),
-    'type':        ('textList', 'rdf:RDF/rdf:Description/dc:type/text()'),
-    'format':      ('textList', 'rdf:RDF/rdf:Description/dc:format/text()'),
-    'identifier':  ('textList', 'rdf:RDF/rdf:Description/dc:identifier.url/text()'),
-    'source':      ('textList', 'rdf:RDF/rdf:Description/dc:source/text()'),
-    'language':    ('textList', 'rdf:RDF/rdf:Description/dc:language/text()'),
+    'title':       ('textList', WL_DC_READER_XPATH % 'dc:title'),
+    'creator':     ('textList', WL_DC_READER_XPATH % 'dc:creator'),
+    'subject':     ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
+                    ('dc:subject.period', 'dc:subject.type', 'dc:subject.genre')),
+    'description': ('textList', WL_DC_READER_XPATH % 'dc:description'),
+    'publisher':   ('textList', WL_DC_READER_XPATH % 'dc:publisher'),
+    'contributor': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
+                    ('dc:contributor.editor', 'dc:contributor.translator', 'dc:contributor.technical_editor')),
+    'date':        ('textList', WL_DC_READER_XPATH % 'dc:date'),
+    'type':        ('textList', WL_DC_READER_XPATH % 'dc:type'),
+    'format':      ('textList', WL_DC_READER_XPATH % 'dc:format'),
+    'identifier':  ('textList', WL_DC_READER_XPATH % 'dc:identifier.url'),
+    'source':      ('textList', WL_DC_READER_XPATH % 'dc:source'),
+    'language':    ('textList', WL_DC_READER_XPATH % 'dc:language'),
+    #'isPartOf':     ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
+    'hasPart':     ('textList', WL_DC_READER_XPATH % 'dc:relation.hasPart'),
     #    'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
     #    'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
     #    'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
     #    'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
-    'rights':      ('textList', 'rdf:RDF/rdf:Description/dc:rights/text()')
+    'rights':      ('textList', WL_DC_READER_XPATH % 'dc:rights')
     },
     namespaces={
     'dc': 'http://purl.org/dc/elements/1.1/',
     },
     namespaces={
     'dc': 'http://purl.org/dc/elements/1.1/',
@@ -36,14 +50,24 @@ wl_dc_reader = metadata.MetadataReader(
     )
 
 
     )
 
 
+NS_DCTERMS = "http://purl.org/dc/terms/"
+
+
+def nsdcterms(name):
+    return '{%s}%s' % (NS_DCTERMS, name)
+
+
 class Catalogue(common.ResumptionOAIPMH):
     TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
 class Catalogue(common.ResumptionOAIPMH):
     TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
-    
-    def __init__(self):
+
+    def __init__(self, metadata_registry):
         super(Catalogue, self).__init__()
         super(Catalogue, self).__init__()
+        self.metadata_registry = metadata_registry
+
+        self.oai_id = "oai:" + Site.objects.get_current().domain + ":%s"
 
         # earliest change
 
         # earliest change
-        year_zero = datetime(1990, 1, 1, 0, 0, 0)
+        year_zero = timezone.make_aware(datetime(1990, 1, 1, 0, 0, 0), timezone.utc)
 
         try:
             earliest_change = \
 
         try:
             earliest_change = \
@@ -52,31 +76,34 @@ class Catalogue(common.ResumptionOAIPMH):
 
         try:
             earliest_delete = \
 
         try:
             earliest_delete = \
-                Deleted.objects.ordery_by('deleted_at')[0].deleted_at
+                Deleted.objects.exclude(slug__exact=u'').ordery_by('deleted_at')[0].deleted_at
         except: earliest_delete = year_zero
 
         self.earliest_datestamp = earliest_change <= earliest_delete and \
             earliest_change or earliest_delete
 
         except: earliest_delete = year_zero
 
         self.earliest_datestamp = earliest_change <= earliest_delete and \
             earliest_change or earliest_delete
 
-        # admins
-        self.admin_emails = [u.email for u in User.objects.filter(is_superuser=True)]
-
     def metadata(self, book):
     def metadata(self, book):
-        xml = etree.parse(book.xml_file)
+        try:
+            xml = etree.parse(book.xml_file)
+        finally:
+            book.xml_file.close()
         md = wl_dc_reader(xml)
         md = wl_dc_reader(xml)
-        return md.getMap()
+        m = md.getMap()
+        if book.parent:
+            m['isPartOf'] = [str(WLURI.from_slug(book.parent.slug))]
+        return m
 
     def record_for_book(self, book, headers_only=False):
         meta = None
 
     def record_for_book(self, book, headers_only=False):
         meta = None
-        identifier = str(WLURI.from_slug(book.slug))
+        identifier = self.slug_to_identifier(book.slug)
         if isinstance(book, Book):
             #            setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
         if isinstance(book, Book):
             #            setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
-            header = common.Header(identifier, book.changed_at, [], False)
+            header = common.Header(identifier, make_time_naive(book.changed_at), [], False)
             if not headers_only:
                 meta = common.Metadata(self.metadata(book))
             about = None
         elif isinstance(book, Deleted):
             if not headers_only:
                 meta = common.Metadata(self.metadata(book))
             about = None
         elif isinstance(book, Deleted):
-            header = common.Header(identifier, book.deleted_at, [], True)
+            header = common.Header(identifier, make_time_naive(book.deleted_at), [], True)
             if not headers_only:
                 meta = common.Metadata({})
             about = None
             if not headers_only:
                 meta = common.Metadata({})
             about = None
@@ -89,8 +116,8 @@ class Catalogue(common.ResumptionOAIPMH):
             'Wolne Lektury',  # generate
             '%s/oaipmh' % WL_BASE,  # generate
             '2.0',  # version
             'Wolne Lektury',  # generate
             '%s/oaipmh' % WL_BASE,  # generate
             '2.0',  # version
-            self.admin_emails,  # adminEmails
-            self.earliest_datestamp,  # earliest datestamp of any change
+            [m[1] for m in settings.MANAGERS],  # adminEmails
+            make_time_naive(self.earliest_datestamp),  # earliest datestamp of any change
             'persistent',  # deletedRecord
             'YYYY-MM-DDThh:mm:ssZ',  # granularity
             ['identity'],  # compression
             'persistent',  # deletedRecord
             'YYYY-MM-DDThh:mm:ssZ',  # granularity
             ['identity'],  # compression
@@ -101,11 +128,11 @@ class Catalogue(common.ResumptionOAIPMH):
     def books(self, tag, from_, until):
         if tag:
             # we do not support sets, since they are problematic for deleted books.
     def books(self, tag, from_, until):
         if tag:
             # we do not support sets, since they are problematic for deleted books.
-            raise errror.NoSetHierarchyError("Wolne Lektury does not support sets.")
+            raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
             # books = Book.tagged.with_all([tag])
         else:
             books = Book.objects.all()
             # books = Book.tagged.with_all([tag])
         else:
             books = Book.objects.all()
-        deleted = Deleted.objects.filter(slug__isnull=False)
+        deleted = Deleted.objects.exclude(slug__exact=u'')
 
         books = books.order_by('changed_at')
         deleted = deleted.order_by('deleted_at')
 
         books = books.order_by('changed_at')
         deleted = deleted.order_by('deleted_at')
@@ -136,7 +163,7 @@ class Catalogue(common.ResumptionOAIPMH):
         """
 Returns (header, metadata, about) for given record.
         """
         """
 Returns (header, metadata, about) for given record.
         """
-        slug = WLURI(kw['identifier']).slug
+        slug = self.identifier_to_slug(kw['identifier'])
         try:
             book = Book.objects.get(slug=slug)
             return self.record_for_book(book)
         try:
             book = Book.objects.get(slug=slug)
             return self.record_for_book(book)
@@ -149,7 +176,20 @@ Returns (header, metadata, about) for given record.
                 raise error.IdDoesNotExistError("No item for this identifier")
             return self.record_for_book(deleted_book)
 
                 raise error.IdDoesNotExistError("No item for this identifier")
             return self.record_for_book(deleted_book)
 
+    def validate_kw(self, kw):
+        if 'resumptionToken' in kw:
+            raise error.BadResumptionTokenError("No resumption token support at this point")
+        if 'metadataPrefix' in kw and not self.metadata_registry.hasWriter(kw['metadataPrefix']):
+            raise error.CannotDisseminateFormatError("This format is not supported")
+
+    def identifier_to_slug(self, ident):
+        return ident.split(':')[-1]
+
+    def slug_to_identifier(self, slug):
+        return self.oai_id % slug
+
     def listIdentifiers(self, **kw):
     def listIdentifiers(self, **kw):
+        self.validate_kw(kw)
         records = [self.record_for_book(book, headers_only=True) for
                    book in self.books(None,
                            kw.get('from_', None),
         records = [self.record_for_book(book, headers_only=True) for
                    book in self.books(None,
                            kw.get('from_', None),
@@ -161,6 +201,7 @@ Returns (header, metadata, about) for given record.
 can get a resumptionToken kw.
 returns result, token
         """
 can get a resumptionToken kw.
 returns result, token
         """
+        self.validate_kw(kw)
         records = [self.record_for_book(book) for
                    book in self.books(None,
                            kw.get('from_', None),
         records = [self.record_for_book(book) for
                    book in self.books(None,
                            kw.get('from_', None),
@@ -169,9 +210,26 @@ returns result, token
         return records, None
 
     def listMetadataFormats(self, **kw):
         return records, None
 
     def listMetadataFormats(self, **kw):
-        return [('oai_dc',
-                 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
-                 server.NS_OAIDC)]
+        formats = [
+            ('oai_dc',
+             'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
+             server.NS_OAIDC),
+            ('qdc',
+             'http://dublincore.org/schemas/xmls/qdc/2006/01/06/dcterms.xsd',
+             NS_DCTERMS)]
+        if 'identifier' in kw:
+            slug = self.identifier_to_slug(kw['identifier'])
+            try:
+                b = Book.objects.get(slug=slug)
+                return formats
+            except:
+                try:
+                    d = Deleted.objects.get(slug=slug)
+                    return []
+                except:
+                    raise error.IdDoesNotExistError("This id does not exist")
+        else:
+            return formats
 
     def listSets(self, **kw):
         raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
 
     def listSets(self, **kw):
         raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
@@ -182,5 +240,3 @@ returns result, token
         #                      tag.name,
         #                      tag.description))
         # return tags, None
         #                      tag.name,
         #                      tag.description))
         # return tags, None
-
-