beta implementation
[wolnelektury.git] / apps / oai / handlers.py
1 from oaipmh import server, common, metadata, error
2 from catalogue.models import Book, Tag
3 from api.models import Deleted
4 from api.handlers import WL_BASE
5 from librarian.dcparser import BookInfo
6 from django.contrib.contenttypes.models import ContentType
7 from django.contrib.auth.models import User
8 from datetime import datetime
9 from lxml import etree
10 from lxml.etree import ElementTree
11 from django.db.models import Q
12
13
14 wl_dc_reader = metadata.MetadataReader(
15     fields={
16     'title':       ('textList', 'rdf:RDF/rdf:Description/dc:title/text()'),
17     'creator':     ('textList', 'rdf:RDF/rdf:Description/dc:creator/text()'),
18     'subject':     ('textList', 'rdf:RDF/rdf:Description/dc:subject.period/text() | rdf:RDF/rdf:Description/dc:subject.type/text() | rdf:RDF/rdf:Description/dc:subject.genre/text()'),
19     'description': ('textList', 'rdf:RDF/rdf:Description/dc:description/text()'),
20     'publisher':   ('textList', 'rdf:RDF/rdf:Description/dc:publisher/text()'),
21     'contributor': ('textList', 'rdf:RDF/rdf:Description/dc:contributor.editor/text() | rdf:RDF/rdf:Description/dc:contributor.translator/text() | rdf:RDF/rdf:Description/dc:contributor.technical_editor/text()'),
22     'date':        ('textList', 'rdf:RDF/rdf:Description/dc:date/text()'),
23     'type':        ('textList', 'rdf:RDF/rdf:Description/dc:type/text()'),
24     'format':      ('textList', 'rdf:RDF/rdf:Description/dc:format/text()'),
25     'identifier':  ('textList', 'rdf:RDF/rdf:Description/dc:identifier.url/text()'),
26     'source':      ('textList', 'rdf:RDF/rdf:Description/dc:source/text()'),
27     'language':    ('textList', 'rdf:RDF/rdf:Description/dc:language/text()'),
28     #    'relation':    ('textList', 'rdf:RDF/rdf:Description/dc:relation/text()'),
29     #    'coverage':    ('textList', 'rdf:RDF/rdf:Description/dc:coverage/text()'),
30     'rights':      ('textList', 'rdf:RDF/rdf:Description/dc:rights/text()')
31     },
32     namespaces={
33     'dc': 'http://purl.org/dc/elements/1.1/',
34     'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}
35     )
36
37
38 class Catalogue(common.ResumptionOAIPMH):
39     TAG_CATEGORIES = ['author', 'epoch', 'kind', 'genre']
40     
41     def __init__(self):
42         super(Catalogue, self).__init__()
43
44         # earliest change
45         year_zero = datetime(1990, 1, 1, 0, 0, 0)
46
47         try:
48             earliest_change = \
49                 Book.objects.order_by('changed_at')[0].changed_at
50         except: earliest_change = year_zero
51
52         try:
53             earliest_delete = \
54                 Deleted.objects.ordery_by('deleted_at')[0].deleted_at
55         except: earliest_delete = year_zero
56
57         self.earliest_datestamp = earliest_change <= earliest_delete and \
58             earliest_change or earliest_delete
59
60         # admins
61         self.admin_emails = [u.email for u in User.objects.filter(is_superuser=True)]
62
63     def metadata(self, book):
64         xml = etree.parse(book.xml_file)
65         md = wl_dc_reader(xml)
66         return md.getMap()
67
68     def record_for_book(self, book, headers_only=False):
69         meta = None
70         if isinstance(book, Book):
71             header = common.Header(book.slug, book.changed_at, [], False)
72             if not headers_only:
73                 meta = common.Metadata(self.metadata(book))
74             about = None
75         elif isinstance(book, Deleted):
76             header = common.Header(book.slug, book.deleted_at, [], True)
77             if not headers_only:
78                 meta = common.Metadata({})
79             about = None
80         return header, meta, about
81
82     def identify(self, **kw):
83         ident = common.Identify(
84             'Wolne Lektury',  # generate
85             '%s/oaipmh' % WL_BASE,  # generate
86             '1.1',  # version
87             self.admin_emails,  # adminEmails
88             self.earliest_datestamp,  # earliest datestamp of any change
89             'persistent',  # deletedRecord
90             'YYYY-MM-DDThh:mm:ssZ',  # granularity
91             'identity',  # compression
92             []  # descriptions
93             )
94         return ident
95
96     def books(self, tag, from_, until):
97         if tag:
98             books = Book.tagged.with_all([tag])
99         else:
100             books = Book.objects.all()
101         deleted = Deleted.objects.all()
102
103         books = books.order_by('changed_at')
104         if from_:
105             books = books.filter(changed_at__gte=from_)
106             deleted = deleted.filter(deleted_at__gte=from_)
107         if until:
108             books = books.filter(changed_at__lte=until)
109             deleted = deleted.filter(deleted_at__gte=until)
110         return list(books) + list(deleted)
111
112     @staticmethod
113     def tag_to_setspec(tag):
114         return "%s:%s" % (tag.category, tag.slug)
115
116     @staticmethod
117     def setspec_to_tag(s):
118         if not s: return None
119         cs = s.split(':')
120         if len(cs) == 2:
121             if not cs[0] in Catalogue.TAG_CATEGORIES:
122                 raise error.NoSetHierarchyError()
123             tag = Tag.objects.get(slug=cs[1], category=cs[0])
124             return tag
125         raise error.NoSetHierarchyError()
126
127     def getRecord(self, **kw):
128         """
129 Returns (header, metadata, about) for given record.
130         """
131         slug = kw['record']
132         try:
133             book = Book.objects.get(slug=slug)
134             return self.record_for_book(book)
135         except Book.DoesNotExist:
136             book_type = ContentType.objects.get_for_model(Book)
137             try:
138                 deleted_book = Deleted.objects.get(content_type=book_type,
139                                                   slug=slug)
140             except:
141                 raise error.NoRecordsMatchError()
142             return self.record_for_book(deleted_book)
143
144     def listIdentifiers(self, **kw):
145         records = [self.record_for_book(book, headers_only=True) for
146                    book in self.books(
147                        self.setspec_to_tag(
148                            kw.get('set', None)),
149                            kw.get('from', None),
150                            kw.get('until', None))]
151         return records
152
153     def listRecords(self, **kw):
154         """
155 can get a resumptionToken kw.
156 returns result, token
157         """
158         records = [self.record_for_book(book) for
159                    book in self.books(
160                        self.setspec_to_tag(
161                            kw.get('set', None)),
162                            kw.get('from', None),
163                            kw.get('until', None))]
164
165         return records, None
166
167     def listMetadataFormats(self, **kw):
168         return [('oaidc',
169                  'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
170                  server.NS_OAIDC)]
171
172     def listSets(self, **kw):
173         tags = []
174         for category in Catalogue.TAG_CATEGORIES:
175             for tag in Tag.objects.filter(category=category):
176                 tags.append(("%s:%s" % (tag.category, tag.slug),
177                              tag.name,
178                              tag.description))
179         return tags