1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from oaipmh import server, common, metadata, error
6 from catalogue.models import Book, Tag
7 from api.models import Deleted
8 from api.handlers import WL_BASE
9 from librarian import WLURI
10 from datetime import datetime
11 from lxml import etree
12 from django.conf import settings
13 from django.contrib.sites.models import Site
14 from django.utils import timezone
17 def make_time_naive(d):
18 return timezone.localtime(d).replace(tzinfo=None)
20 WL_DC_READER_XPATH = '(.|*)/rdf:RDF/rdf:Description/%s/text()'
21 wl_dc_reader = metadata.MetadataReader(
23 'title': ('textList', WL_DC_READER_XPATH % 'dc:title'),
24 'creator': ('textList', WL_DC_READER_XPATH % 'dc:creator'),
25 'subject': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
26 ('dc:subject.period', 'dc:subject.type', 'dc:subject.genre')),
27 'description': ('textList', WL_DC_READER_XPATH % 'dc:description'),
28 'publisher': ('textList', WL_DC_READER_XPATH % 'dc:publisher'),
29 'contributor': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
30 ('dc:contributor.editor', 'dc:contributor.translator', 'dc:contributor.technical_editor')),
31 'date': ('textList', WL_DC_READER_XPATH % 'dc:date'),
32 'type': ('textList', WL_DC_READER_XPATH % 'dc:type'),
33 'format': ('textList', WL_DC_READER_XPATH % 'dc:format'),
34 'identifier': ('textList', WL_DC_READER_XPATH % 'dc:identifier.url'),
35 'source': ('textList', WL_DC_READER_XPATH % 'dc:source'),
36 'language': ('textList', WL_DC_READER_XPATH % 'dc:language'),
37 # 'isPartOf': ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
38 'hasPart': ('textList', WL_DC_READER_XPATH % 'dc:relation.hasPart'),
39 # 'relation': ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
40 # 'coverage': ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
41 'rights': ('textList', WL_DC_READER_XPATH % 'dc:rights')
44 'dc': 'http://purl.org/dc/elements/1.1/',
45 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}
49 NS_DCTERMS = "http://purl.org/dc/terms/"
53 return '{%s}%s' % (NS_DCTERMS, name)
57 class Catalogue(common.ResumptionOAIPMH):
58 TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
60 def __init__(self, metadata_registry):
61 super(Catalogue, self).__init__()
62 self.metadata_registry = metadata_registry
64 self.oai_id = "oai:" + Site.objects.get_current().domain + ":%s"
67 year_zero = timezone.make_aware(datetime(1990, 1, 1, 0, 0, 0), timezone.utc)
70 earliest_change = Book.objects.order_by('changed_at')[0].changed_at
72 earliest_change = year_zero
76 Deleted.objects.exclude(slug__exact=u'').order_by('deleted_at')[0].deleted_at
78 earliest_delete = year_zero
80 self.earliest_datestamp = earliest_change if earliest_change <= earliest_delete else earliest_delete
85 # xml = etree.parse(book.xml_file)
87 # book.xml_file.close()
88 # md = wl_dc_reader(xml)
91 # m['isPartOf'] = [str(WLURI.from_slug(book.parent.slug))]
95 # def record_for_book(self, book, headers_only=False):
97 # identifier = self.slug_to_identifier(book.slug)
98 # if isinstance(book, Book):
99 # # setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
100 # header = common.Header(identifier, make_time_naive(book.changed_at), [], False)
101 # if not headers_only:
102 # meta = common.Metadata(self.metadata(book))
104 # elif isinstance(book, Deleted):
105 # header = common.Header(identifier, make_time_naive(book.deleted_at), [], True)
106 # if not headers_only:
107 # meta = common.Metadata({})
111 # return header, meta, about
113 # def identify(self, **kw):
114 # ident = common.Identify(
115 # 'Wolne Lektury', # generate
116 # '%s/oaipmh' % unicode(WL_BASE), # generate
118 # [m[1] for m in settings.MANAGERS], # adminEmails
119 # make_time_naive(self.earliest_datestamp), # earliest datestamp of any change
120 # 'persistent', # deletedRecord
121 # 'YYYY-MM-DDThh:mm:ssZ', # granularity
122 # ['identity'], # compression
127 # def books(self, tag, from_, until):
129 # # we do not support sets, since they are problematic for deleted books.
130 # raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
131 # # books = Book.tagged.with_all([tag])
133 # books = Book.objects.all()
134 # deleted = Deleted.objects.exclude(slug__exact=u'')
136 # books = books.order_by('changed_at')
137 # deleted = deleted.order_by('deleted_at')
139 # books = books.filter(changed_at__gte=from_)
140 # deleted = deleted.filter(deleted_at__gte=from_)
142 # books = books.filter(changed_at__lte=until)
143 # deleted = deleted.filter(deleted_at__lte=until)
144 # return list(books) + list(deleted)
147 # def tag_to_setspec(tag):
148 # return "%s:%s" % (tag.category, tag.slug)
151 # def setspec_to_tag(s):
152 # if not s: return None
155 # if not cs[0] in Catalogue.TAG_CATEGORIES:
156 # raise error.NoSetHierarchyError("No category part in set")
157 # tag = Tag.objects.get(slug=cs[1], category=cs[0])
159 # raise error.NoSetHierarchyError("Setspec should have two components: category:slug")
161 # def getRecord(self, **kw):
162 # """Returns (header, metadata, about) for given record."""
163 # slug = self.identifier_to_slug(kw['identifier'])
165 # book = Book.objects.get(slug=slug)
166 # return self.record_for_book(book)
167 # except Book.DoesNotExist:
168 # book_type = ContentType.objects.get_for_model(Book)
170 # deleted_book = Deleted.objects.get(content_type=book_type,
173 # raise error.IdDoesNotExistError("No item for this identifier")
174 # return self.record_for_book(deleted_book)
176 # def validate_kw(self, kw):
177 # if 'resumptionToken' in kw:
178 # raise error.BadResumptionTokenError("No resumption token support at this point")
179 # if 'metadataPrefix' in kw and not self.metadata_registry.hasWriter(kw['metadataPrefix']):
180 # raise error.CannotDisseminateFormatError("This format is not supported")
182 # def identifier_to_slug(self, ident):
183 # return ident.split(':')[-1]
185 # def slug_to_identifier(self, slug):
186 # return self.oai_id % slug
188 # def listIdentifiers(self, **kw):
189 # self.validate_kw(kw)
190 # records = [self.record_for_book(book, headers_only=True) for
191 # book in self.books(None,
192 # kw.get('from_', None),
193 # kw.get('until', None))]
194 # return records, None
196 # def listRecords(self, **kw):
198 # can get a resumptionToken kw.
199 # returns result, token
201 # self.validate_kw(kw)
202 # records = [self.record_for_book(book) for
203 # book in self.books(None,
204 # kw.get('from_', None),
205 # kw.get('until', None))]
207 # return records, None
209 # def listMetadataFormats(self, **kw):
212 # 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
215 # 'http://dublincore.org/schemas/xmls/qdc/2006/01/06/dcterms.xsd',
217 # if 'identifier' in kw:
218 # slug = self.identifier_to_slug(kw['identifier'])
220 # b = Book.objects.get(slug=slug)
224 # d = Deleted.objects.get(slug=slug)
227 # raise error.IdDoesNotExistError("This id does not exist")
231 # def listSets(self, **kw):
232 # raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
234 # # for category in Catalogue.TAG_CATEGORIES:
235 # # for tag in Tag.objects.filter(category=category):
236 # # tags.append(("%s:%s" % (tag.category, tag.slug),
238 # # tag.description))
239 # # return tags, None