1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from oaipmh import server, common, metadata, error
6 from catalogue.models import Book, Tag
7 from api.models import Deleted
8 from api.handlers import WL_BASE
9 from librarian.dcparser import BookInfo
10 from librarian import WLURI
11 from django.contrib.contenttypes.models import ContentType
12 from django.contrib.auth.models import User
13 from datetime import datetime
14 from lxml import etree
15 from lxml.etree import ElementTree
16 from django.db.models import Q
17 from django.conf import settings
18 from django.contrib.sites.models import Site
19 from django.utils import timezone
22 make_time_naive = lambda d: timezone.localtime(d).replace(tzinfo=None)
24 WL_DC_READER_XPATH = '(.|*)/rdf:RDF/rdf:Description/%s/text()'
25 wl_dc_reader = metadata.MetadataReader(
27 'title': ('textList', WL_DC_READER_XPATH % 'dc:title'),
28 'creator': ('textList', WL_DC_READER_XPATH % 'dc:creator'),
29 'subject': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
30 ('dc:subject.period', 'dc:subject.type', 'dc:subject.genre')),
31 'description': ('textList', WL_DC_READER_XPATH % 'dc:description'),
32 'publisher': ('textList', WL_DC_READER_XPATH % 'dc:publisher'),
33 'contributor': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
34 ('dc:contributor.editor', 'dc:contributor.translator', 'dc:contributor.technical_editor')),
35 'date': ('textList', WL_DC_READER_XPATH % 'dc:date'),
36 'type': ('textList', WL_DC_READER_XPATH % 'dc:type'),
37 'format': ('textList', WL_DC_READER_XPATH % 'dc:format'),
38 'identifier': ('textList', WL_DC_READER_XPATH % 'dc:identifier.url'),
39 'source': ('textList', WL_DC_READER_XPATH % 'dc:source'),
40 'language': ('textList', WL_DC_READER_XPATH % 'dc:language'),
41 #'isPartOf': ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
42 'hasPart': ('textList', WL_DC_READER_XPATH % 'dc:relation.hasPart'),
43 # 'relation': ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
44 # 'coverage': ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
45 'rights': ('textList', WL_DC_READER_XPATH % 'dc:rights')
48 'dc': 'http://purl.org/dc/elements/1.1/',
49 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}
53 NS_DCTERMS = "http://purl.org/dc/terms/"
57 return '{%s}%s' % (NS_DCTERMS, name)
60 class Catalogue(common.ResumptionOAIPMH):
61 TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
63 def __init__(self, metadata_registry):
64 super(Catalogue, self).__init__()
65 self.metadata_registry = metadata_registry
67 self.oai_id = "oai:" + Site.objects.get_current().domain + ":%s"
70 year_zero = timezone.make_aware(datetime(1990, 1, 1, 0, 0, 0), timezone.utc)
74 Book.objects.order_by('changed_at')[0].changed_at
75 except: earliest_change = year_zero
79 Deleted.objects.exclude(slug__exact=u'').ordery_by('deleted_at')[0].deleted_at
80 except: earliest_delete = year_zero
82 self.earliest_datestamp = earliest_change <= earliest_delete and \
83 earliest_change or earliest_delete
85 def metadata(self, book):
87 xml = etree.parse(book.xml_file)
90 md = wl_dc_reader(xml)
93 m['isPartOf'] = [str(WLURI.from_slug(book.parent.slug))]
96 def record_for_book(self, book, headers_only=False):
98 identifier = self.slug_to_identifier(book.slug)
99 if isinstance(book, Book):
100 # setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
101 header = common.Header(identifier, make_time_naive(book.changed_at), [], False)
103 meta = common.Metadata(self.metadata(book))
105 elif isinstance(book, Deleted):
106 header = common.Header(identifier, make_time_naive(book.deleted_at), [], True)
108 meta = common.Metadata({})
112 return header, meta, about
114 def identify(self, **kw):
115 ident = common.Identify(
116 'Wolne Lektury', # generate
117 '%s/oaipmh' % WL_BASE, # generate
119 [m[1] for m in settings.MANAGERS], # adminEmails
120 make_time_naive(self.earliest_datestamp), # earliest datestamp of any change
121 'persistent', # deletedRecord
122 'YYYY-MM-DDThh:mm:ssZ', # granularity
123 ['identity'], # compression
128 def books(self, tag, from_, until):
130 # we do not support sets, since they are problematic for deleted books.
131 raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
132 # books = Book.tagged.with_all([tag])
134 books = Book.objects.all()
135 deleted = Deleted.objects.exclude(slug__exact=u'')
137 books = books.order_by('changed_at')
138 deleted = deleted.order_by('deleted_at')
140 books = books.filter(changed_at__gte=from_)
141 deleted = deleted.filter(deleted_at__gte=from_)
143 books = books.filter(changed_at__lte=until)
144 deleted = deleted.filter(deleted_at__lte=until)
145 return list(books) + list(deleted)
148 def tag_to_setspec(tag):
149 return "%s:%s" % (tag.category, tag.slug)
152 def setspec_to_tag(s):
153 if not s: return None
156 if not cs[0] in Catalogue.TAG_CATEGORIES:
157 raise error.NoSetHierarchyError("No category part in set")
158 tag = Tag.objects.get(slug=cs[1], category=cs[0])
160 raise error.NoSetHierarchyError("Setspec should have two components: category:slug")
162 def getRecord(self, **kw):
164 Returns (header, metadata, about) for given record.
166 slug = self.identifier_to_slug(kw['identifier'])
168 book = Book.objects.get(slug=slug)
169 return self.record_for_book(book)
170 except Book.DoesNotExist:
171 book_type = ContentType.objects.get_for_model(Book)
173 deleted_book = Deleted.objects.get(content_type=book_type,
176 raise error.IdDoesNotExistError("No item for this identifier")
177 return self.record_for_book(deleted_book)
179 def validate_kw(self, kw):
180 if 'resumptionToken' in kw:
181 raise error.BadResumptionTokenError("No resumption token support at this point")
182 if 'metadataPrefix' in kw and not self.metadata_registry.hasWriter(kw['metadataPrefix']):
183 raise error.CannotDisseminateFormatError("This format is not supported")
185 def identifier_to_slug(self, ident):
186 return ident.split(':')[-1]
188 def slug_to_identifier(self, slug):
189 return self.oai_id % slug
191 def listIdentifiers(self, **kw):
193 records = [self.record_for_book(book, headers_only=True) for
194 book in self.books(None,
195 kw.get('from_', None),
196 kw.get('until', None))]
199 def listRecords(self, **kw):
201 can get a resumptionToken kw.
202 returns result, token
205 records = [self.record_for_book(book) for
206 book in self.books(None,
207 kw.get('from_', None),
208 kw.get('until', None))]
212 def listMetadataFormats(self, **kw):
215 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
218 'http://dublincore.org/schemas/xmls/qdc/2006/01/06/dcterms.xsd',
220 if 'identifier' in kw:
221 slug = self.identifier_to_slug(kw['identifier'])
223 b = Book.objects.get(slug=slug)
227 d = Deleted.objects.get(slug=slug)
230 raise error.IdDoesNotExistError("This id does not exist")
234 def listSets(self, **kw):
235 raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
237 # for category in Catalogue.TAG_CATEGORIES:
238 # for tag in Tag.objects.filter(category=category):
239 # tags.append(("%s:%s" % (tag.category, tag.slug),