eisbn csv generator
[wolnelektury.git] / src / catalogue / management / commands / eisbn_csv.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import csv
6 import sys
7 from django.core.management.base import BaseCommand
8 from django.utils.timezone import localtime
9
10 from catalogue.models import Book
11 from librarian import RDFNS, DCNS
12
13
14 FORMATS = ('HTML', 'PDF', 'TXT', 'EPUB', 'MOBI')
15
16 FORMATS_WITH_CHILDREN = ('PDF', 'EPUB', 'MOBI')
17
18
19 PRODUCT_FORMS_1 = {
20     'HTML': 'EC',
21     'PDF': 'EB',
22     'TXT': 'EB',
23     'EPUB': 'ED',
24     'MOBI': 'ED',
25 }
26
27 PRODUCT_FORMS_2 = {
28     'HTML': 'E105',
29     'PDF': 'E107',
30     'TXT': 'E112',
31     'EPUB': 'E101',
32     'MOBI': 'E127',
33 }
34
35
36 def is_institution(name):
37     return name.startswith(u'Zgromadzenie Ogólne')
38
39
40 class Command(BaseCommand):
41     @staticmethod
42     def dc_values(desc, tag):
43         return [e.text for e in desc.findall('.//' + DCNS(tag))]
44
45     def handle(self, *args, **options):
46         writer = csv.writer(sys.stdout)
47         for book in Book.objects.all():
48             desc = book.wldocument().edoc.find('.//' + RDFNS('Description'))
49             formats = FORMATS_WITH_CHILDREN if book.children.exists() else FORMATS
50             for file_format in formats:
51                 imprint = u'Fundacja Nowoczesna Polska'
52                 title = book.title
53                 subtitle = ''
54                 year = ''
55                 volume = ''
56                 publication_date = localtime(book.created_at).date().isoformat()
57                 info_date = publication_date
58                 author = '; '.join(self.dc_values(desc, 'creator'))
59                 author_person = author if not is_institution(author) else ''
60                 author_institution = author if is_institution(author) else ''
61                 publication_type = 'DGO'
62                 edition = '1'
63                 product_form1 = PRODUCT_FORMS_1[file_format]
64                 product_form2 = PRODUCT_FORMS_2[file_format]
65                 language = self.dc_values(desc, 'language')[0]
66                 row = [
67                     imprint,
68                     title,
69                     subtitle,
70                     year,
71                     volume,
72                     publication_date,
73                     info_date,
74                     author_person,
75                     author_institution,
76                     publication_type,
77                     edition,
78                     product_form1,
79                     product_form2,
80                     language,
81                 ]
82                 writer.writerow([s.encode('utf-8') for s in row])