1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from oaipmh import server, common, metadata, error
6 from catalogue.models import Book, Tag
7 from api.models import Deleted
8 from api.handlers import WL_BASE
9 from librarian import WLURI
10 from django.contrib.contenttypes.models import ContentType
11 from datetime import datetime
12 from lxml import etree
13 from django.conf import settings
14 from django.contrib.sites.models import Site
15 from django.utils import timezone
18 make_time_naive = lambda d: timezone.localtime(d).replace(tzinfo=None)
20 WL_DC_READER_XPATH = '(.|*)/rdf:RDF/rdf:Description/%s/text()'
21 wl_dc_reader = metadata.MetadataReader(
23 'title': ('textList', WL_DC_READER_XPATH % 'dc:title'),
24 'creator': ('textList', WL_DC_READER_XPATH % 'dc:creator'),
25 'subject': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
26 ('dc:subject.period', 'dc:subject.type', 'dc:subject.genre')),
27 'description': ('textList', WL_DC_READER_XPATH % 'dc:description'),
28 'publisher': ('textList', WL_DC_READER_XPATH % 'dc:publisher'),
29 'contributor': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
30 ('dc:contributor.editor', 'dc:contributor.translator', 'dc:contributor.technical_editor')),
31 'date': ('textList', WL_DC_READER_XPATH % 'dc:date'),
32 'type': ('textList', WL_DC_READER_XPATH % 'dc:type'),
33 'format': ('textList', WL_DC_READER_XPATH % 'dc:format'),
34 'identifier': ('textList', WL_DC_READER_XPATH % 'dc:identifier.url'),
35 'source': ('textList', WL_DC_READER_XPATH % 'dc:source'),
36 'language': ('textList', WL_DC_READER_XPATH % 'dc:language'),
37 #'isPartOf': ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
38 'hasPart': ('textList', WL_DC_READER_XPATH % 'dc:relation.hasPart'),
39 # 'relation': ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
40 # 'coverage': ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
41 'rights': ('textList', WL_DC_READER_XPATH % 'dc:rights')
44 'dc': 'http://purl.org/dc/elements/1.1/',
45 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}
49 NS_DCTERMS = "http://purl.org/dc/terms/"
53 return '{%s}%s' % (NS_DCTERMS, name)
56 class Catalogue(common.ResumptionOAIPMH):
57 TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
59 def __init__(self, metadata_registry):
60 super(Catalogue, self).__init__()
61 self.metadata_registry = metadata_registry
63 self.oai_id = "oai:" + Site.objects.get_current().domain + ":%s"
66 year_zero = timezone.make_aware(datetime(1990, 1, 1, 0, 0, 0), timezone.utc)
70 Book.objects.order_by('changed_at')[0].changed_at
71 except: earliest_change = year_zero
75 Deleted.objects.exclude(slug__exact=u'').ordery_by('deleted_at')[0].deleted_at
76 except: earliest_delete = year_zero
78 self.earliest_datestamp = earliest_change <= earliest_delete and \
79 earliest_change or earliest_delete
81 def metadata(self, book):
83 xml = etree.parse(book.xml_file)
86 md = wl_dc_reader(xml)
89 m['isPartOf'] = [str(WLURI.from_slug(book.parent.slug))]
92 def record_for_book(self, book, headers_only=False):
94 identifier = self.slug_to_identifier(book.slug)
95 if isinstance(book, Book):
96 # setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
97 header = common.Header(identifier, make_time_naive(book.changed_at), [], False)
99 meta = common.Metadata(self.metadata(book))
101 elif isinstance(book, Deleted):
102 header = common.Header(identifier, make_time_naive(book.deleted_at), [], True)
104 meta = common.Metadata({})
108 return header, meta, about
110 def identify(self, **kw):
111 ident = common.Identify(
112 'Wolne Lektury', # generate
113 '%s/oaipmh' % unicode(WL_BASE), # generate
115 [m[1] for m in settings.MANAGERS], # adminEmails
116 make_time_naive(self.earliest_datestamp), # earliest datestamp of any change
117 'persistent', # deletedRecord
118 'YYYY-MM-DDThh:mm:ssZ', # granularity
119 ['identity'], # compression
124 def books(self, tag, from_, until):
126 # we do not support sets, since they are problematic for deleted books.
127 raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
128 # books = Book.tagged.with_all([tag])
130 books = Book.objects.all()
131 deleted = Deleted.objects.exclude(slug__exact=u'')
133 books = books.order_by('changed_at')
134 deleted = deleted.order_by('deleted_at')
136 books = books.filter(changed_at__gte=from_)
137 deleted = deleted.filter(deleted_at__gte=from_)
139 books = books.filter(changed_at__lte=until)
140 deleted = deleted.filter(deleted_at__lte=until)
141 return list(books) + list(deleted)
144 def tag_to_setspec(tag):
145 return "%s:%s" % (tag.category, tag.slug)
148 def setspec_to_tag(s):
149 if not s: return None
152 if not cs[0] in Catalogue.TAG_CATEGORIES:
153 raise error.NoSetHierarchyError("No category part in set")
154 tag = Tag.objects.get(slug=cs[1], category=cs[0])
156 raise error.NoSetHierarchyError("Setspec should have two components: category:slug")
158 def getRecord(self, **kw):
160 Returns (header, metadata, about) for given record.
162 slug = self.identifier_to_slug(kw['identifier'])
164 book = Book.objects.get(slug=slug)
165 return self.record_for_book(book)
166 except Book.DoesNotExist:
167 book_type = ContentType.objects.get_for_model(Book)
169 deleted_book = Deleted.objects.get(content_type=book_type,
172 raise error.IdDoesNotExistError("No item for this identifier")
173 return self.record_for_book(deleted_book)
175 def validate_kw(self, kw):
176 if 'resumptionToken' in kw:
177 raise error.BadResumptionTokenError("No resumption token support at this point")
178 if 'metadataPrefix' in kw and not self.metadata_registry.hasWriter(kw['metadataPrefix']):
179 raise error.CannotDisseminateFormatError("This format is not supported")
181 def identifier_to_slug(self, ident):
182 return ident.split(':')[-1]
184 def slug_to_identifier(self, slug):
185 return self.oai_id % slug
187 def listIdentifiers(self, **kw):
189 records = [self.record_for_book(book, headers_only=True) for
190 book in self.books(None,
191 kw.get('from_', None),
192 kw.get('until', None))]
195 def listRecords(self, **kw):
197 can get a resumptionToken kw.
198 returns result, token
201 records = [self.record_for_book(book) for
202 book in self.books(None,
203 kw.get('from_', None),
204 kw.get('until', None))]
208 def listMetadataFormats(self, **kw):
211 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
214 'http://dublincore.org/schemas/xmls/qdc/2006/01/06/dcterms.xsd',
216 if 'identifier' in kw:
217 slug = self.identifier_to_slug(kw['identifier'])
219 b = Book.objects.get(slug=slug)
223 d = Deleted.objects.get(slug=slug)
226 raise error.IdDoesNotExistError("This id does not exist")
230 def listSets(self, **kw):
231 raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
233 # for category in Catalogue.TAG_CATEGORIES:
234 # for tag in Tag.objects.filter(category=category):
235 # tags.append(("%s:%s" % (tag.category, tag.slug),