increased word spacing in WL XML
[redakcja.git] / apps / catalogue / management / commands / add_parent.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import sys
7
8 from datetime import date
9 from lxml import etree
10
11 from django.core.management import BaseCommand
12
13 from catalogue.models import Book
14 from librarian import RDFNS, DCNS
15
16 TEMPLATE = '''<utwor>
17 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
18 <rdf:Description rdf:about="http://redakcja.wolnelektury.pl/documents/book/%(slug)s/">
19 %(dc)s
20 </rdf:Description>
21 </rdf:RDF>
22
23 </utwor>
24 '''
25
26 DC_TEMPLATE = '<dc:%(tag)s xml:lang="pl" xmlns:dc="http://purl.org/dc/elements/1.1/">%(value)s</dc:%(tag)s>'
27
28 DC_TAGS = (
29     'creator',
30     'title',
31     'relation.hasPart',
32     'contributor.translator',
33     'contributor.editor',
34     'contributor.technical_editor',
35     'contributor.funding',
36     'contributor.thanks',
37     'publisher',
38     'subject.period',
39     'subject.type',
40     'subject.genre',
41     'description',
42     'identifier.url',
43     'source',
44     'source.URL',
45     'rights.license',
46     'rights',
47     'date.pd',
48     'format',
49     'type',
50     'date',
51     'audience',
52     'language',
53 )
54
55 IDENTIFIER_PREFIX = 'http://wolnelektury.pl/katalog/lektura/'
56
57
58 def dc_desc_element(book):
59     xml = book.materialize()
60     tree = etree.fromstring(xml)
61     return tree.find(".//" + RDFNS("Description"))
62
63
64 def distinct_dc_values(tag, desc_elements):
65     values = set()
66     for desc in desc_elements:
67         values.update(elem.text for elem in desc.findall(DCNS(tag)))
68     return values
69
70
71 class Command(BaseCommand):
72     args = 'slug'
73
74     def handle(self, slug, **options):
75         children_slugs = [line.strip() for line in sys.stdin]
76         children = Book.objects.filter(dc_slug__in=children_slugs)
77         desc_elements = [dc_desc_element(child) for child in children]
78         title = u'Utwory wybrane'
79         own_attributes = {
80             'title': title,
81             'relation.hasPart': [IDENTIFIER_PREFIX + child_slug for child_slug in children_slugs],
82             'identifier.url': IDENTIFIER_PREFIX + slug,
83             'date': date.today().isoformat(),
84         }
85         dc_tags = []
86         for tag in DC_TAGS:
87             if tag in own_attributes:
88                 values = own_attributes[tag]
89                 if not isinstance(values, list):
90                     values = [values]
91             else:
92                 values = distinct_dc_values(tag, desc_elements)
93             for value in values:
94                 dc_tags.append(DC_TEMPLATE % {'tag': tag, 'value': value})
95         xml = TEMPLATE % {'slug': slug, 'dc': '\n'.join(dc_tags)}
96         Book.create(
97             text=xml,
98             creator=None,
99             slug=slug,
100             title=title,
101             gallery=slug)