pipeline bug
[wolnelektury.git] / src / oai / handlers.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from oaipmh import server, common, metadata, error
6 from catalogue.models import Book, Tag
7 from api.models import Deleted
8 from api.handlers import WL_BASE
9 from librarian import WLURI
10 from datetime import datetime
11 from lxml import etree
12 from django.conf import settings
13 from django.contrib.sites.models import Site
14 from django.utils import timezone
15
16
17 def make_time_naive(d):
18     return timezone.localtime(d).replace(tzinfo=None)
19
20 WL_DC_READER_XPATH = '(.|*)/rdf:RDF/rdf:Description/%s/text()'
21 wl_dc_reader = metadata.MetadataReader(
22     fields={
23         'title':       ('textList', WL_DC_READER_XPATH % 'dc:title'),
24         'creator':     ('textList', WL_DC_READER_XPATH % 'dc:creator'),
25         'subject':     ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
26                         ('dc:subject.period', 'dc:subject.type', 'dc:subject.genre')),
27         'description': ('textList', WL_DC_READER_XPATH % 'dc:description'),
28         'publisher':   ('textList', WL_DC_READER_XPATH % 'dc:publisher'),
29         'contributor': ('textList', (WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH + ' | ' + WL_DC_READER_XPATH) %
30                         ('dc:contributor.editor', 'dc:contributor.translator', 'dc:contributor.technical_editor')),
31         'date':        ('textList', WL_DC_READER_XPATH % 'dc:date'),
32         'type':        ('textList', WL_DC_READER_XPATH % 'dc:type'),
33         'format':      ('textList', WL_DC_READER_XPATH % 'dc:format'),
34         'identifier':  ('textList', WL_DC_READER_XPATH % 'dc:identifier.url'),
35         'source':      ('textList', WL_DC_READER_XPATH % 'dc:source'),
36         'language':    ('textList', WL_DC_READER_XPATH % 'dc:language'),
37         # 'isPartOf':    ('textList', 'rdf:RDF/rdf:Description/dc:relation.isPartOf/text()'),
38         'hasPart':     ('textList', WL_DC_READER_XPATH % 'dc:relation.hasPart'),
39         # 'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
40         # 'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
41         'rights':      ('textList', WL_DC_READER_XPATH % 'dc:rights')
42     },
43     namespaces={
44         'dc': 'http://purl.org/dc/elements/1.1/',
45         'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}
46     )
47
48
49 NS_DCTERMS = "http://purl.org/dc/terms/"
50
51
52 def nsdcterms(name):
53     return '{%s}%s' % (NS_DCTERMS, name)
54
55
56 # WTF
57 class Catalogue(common.ResumptionOAIPMH):
58     TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
59
60     def __init__(self, metadata_registry):
61         super(Catalogue, self).__init__()
62         self.metadata_registry = metadata_registry
63
64         self.oai_id = "oai:" + Site.objects.get_current().domain + ":%s"
65
66         # earliest change
67         year_zero = timezone.make_aware(datetime(1990, 1, 1, 0, 0, 0), timezone.utc)
68
69         try:
70             earliest_change = Book.objects.order_by('changed_at')[0].changed_at
71         except IndexError:
72             earliest_change = year_zero
73
74         try:
75             earliest_delete = \
76                 Deleted.objects.exclude(slug__exact=u'').ordery_by('deleted_at')[0].deleted_at
77         except IndexError:
78             earliest_delete = year_zero
79
80         self.earliest_datestamp = earliest_change if earliest_change <= earliest_delete else earliest_delete
81
82     # @staticmethod
83     # def metadata(book):
84     #     try:
85     #         xml = etree.parse(book.xml_file)
86     #     finally:
87     #         book.xml_file.close()
88     #     md = wl_dc_reader(xml)
89     #     m = md.getMap()
90     #     if book.parent:
91     #         m['isPartOf'] = [str(WLURI.from_slug(book.parent.slug))]
92     #     return m
93
94     # WTF
95     # def record_for_book(self, book, headers_only=False):
96     #     meta = None
97     #     identifier = self.slug_to_identifier(book.slug)
98     #     if isinstance(book, Book):
99     #         # setSpec = map(self.tag_to_setspec, book.tags.filter(category__in=self.TAG_CATEGORIES))
100     #         header = common.Header(identifier, make_time_naive(book.changed_at), [], False)
101     #         if not headers_only:
102     #             meta = common.Metadata(self.metadata(book))
103     #         about = None
104     #     elif isinstance(book, Deleted):
105     #         header = common.Header(identifier, make_time_naive(book.deleted_at), [], True)
106     #         if not headers_only:
107     #             meta = common.Metadata({})
108     #         about = None
109     #     if headers_only:
110     #         return header
111     #     return header, meta, about
112
113     # def identify(self, **kw):
114     #     ident = common.Identify(
115     #         'Wolne Lektury',  # generate
116     #         '%s/oaipmh' % unicode(WL_BASE),  # generate
117     #         '2.0',  # version
118     #         [m[1] for m in settings.MANAGERS],  # adminEmails
119     #         make_time_naive(self.earliest_datestamp),  # earliest datestamp of any change
120     #         'persistent',  # deletedRecord
121     #         'YYYY-MM-DDThh:mm:ssZ',  # granularity
122     #         ['identity'],  # compression
123     #         []  # descriptions
124     #         )
125     #     return ident
126
127     # def books(self, tag, from_, until):
128     #     if tag:
129     #         # we do not support sets, since they are problematic for deleted books.
130     #         raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
131     #         # books = Book.tagged.with_all([tag])
132     #     else:
133     #         books = Book.objects.all()
134     #     deleted = Deleted.objects.exclude(slug__exact=u'')
135     #
136     #     books = books.order_by('changed_at')
137     #     deleted = deleted.order_by('deleted_at')
138     #     if from_:
139     #         books = books.filter(changed_at__gte=from_)
140     #         deleted = deleted.filter(deleted_at__gte=from_)
141     #     if until:
142     #         books = books.filter(changed_at__lte=until)
143     #         deleted = deleted.filter(deleted_at__lte=until)
144     #     return list(books) + list(deleted)
145
146     # @staticmethod
147     # def tag_to_setspec(tag):
148     #     return "%s:%s" % (tag.category, tag.slug)
149
150     # @staticmethod
151     # def setspec_to_tag(s):
152     #     if not s: return None
153     #     cs = s.split(':')
154     #     if len(cs) == 2:
155     #         if not cs[0] in Catalogue.TAG_CATEGORIES:
156     #             raise error.NoSetHierarchyError("No category part in set")
157     #         tag = Tag.objects.get(slug=cs[1], category=cs[0])
158     #         return tag
159     #     raise error.NoSetHierarchyError("Setspec should have two components: category:slug")
160
161     # def getRecord(self, **kw):
162     #     """Returns (header, metadata, about) for given record."""
163     #     slug = self.identifier_to_slug(kw['identifier'])
164     #     try:
165     #         book = Book.objects.get(slug=slug)
166     #         return self.record_for_book(book)
167     #     except Book.DoesNotExist:
168     #         book_type = ContentType.objects.get_for_model(Book)
169     #         try:
170     #             deleted_book = Deleted.objects.get(content_type=book_type,
171     #                                               slug=slug)
172     #         except:
173     #             raise error.IdDoesNotExistError("No item for this identifier")
174     #         return self.record_for_book(deleted_book)
175
176     # def validate_kw(self, kw):
177     #     if 'resumptionToken' in kw:
178     #         raise error.BadResumptionTokenError("No resumption token support at this point")
179     #     if 'metadataPrefix' in kw and not self.metadata_registry.hasWriter(kw['metadataPrefix']):
180     #         raise error.CannotDisseminateFormatError("This format is not supported")
181
182     # def identifier_to_slug(self, ident):
183     #     return ident.split(':')[-1]
184
185     # def slug_to_identifier(self, slug):
186     #     return self.oai_id % slug
187
188     # def listIdentifiers(self, **kw):
189     #     self.validate_kw(kw)
190     #     records = [self.record_for_book(book, headers_only=True) for
191     #                book in self.books(None,
192     #                        kw.get('from_', None),
193     #                        kw.get('until', None))]
194     #     return records, None
195
196     # def listRecords(self, **kw):
197     #     """
198     #         can get a resumptionToken kw.
199     #         returns result, token
200     #     """
201     #     self.validate_kw(kw)
202     #     records = [self.record_for_book(book) for
203     #                book in self.books(None,
204     #                        kw.get('from_', None),
205     #                        kw.get('until', None))]
206     #
207     #     return records, None
208
209     # def listMetadataFormats(self, **kw):
210     #     formats = [
211     #         ('oai_dc',
212     #          'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
213     #          server.NS_OAIDC),
214     #         ('qdc',
215     #          'http://dublincore.org/schemas/xmls/qdc/2006/01/06/dcterms.xsd',
216     #          NS_DCTERMS)]
217     #     if 'identifier' in kw:
218     #         slug = self.identifier_to_slug(kw['identifier'])
219     #         try:
220     #             b = Book.objects.get(slug=slug)
221     #             return formats
222     #         except:
223     #             try:
224     #                 d = Deleted.objects.get(slug=slug)
225     #                 return []
226     #             except:
227     #                 raise error.IdDoesNotExistError("This id does not exist")
228     #     else:
229     #         return formats
230
231     # def listSets(self, **kw):
232     #     raise error.NoSetHierarchyError("Wolne Lektury does not support sets.")
233     #     # tags = []
234     #     # for category in Catalogue.TAG_CATEGORIES:
235     #     #     for tag in Tag.objects.filter(category=category):
236     #     #         tags.append(("%s:%s" % (tag.category, tag.slug),
237     #     #                      tag.name,
238     #     #                      tag.description))
239     #     # return tags, None