3c46ea6ac79c11cb9158c3d0e246c5d4cf5f9257
[wolnelektury.git] / src / catalogue / management / commands / eisbn_csv.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import csv
6 import sys
7 from django.core.management.base import BaseCommand
8 from django.utils.timezone import localtime
9
10 from catalogue.models import Book
11 from librarian import RDFNS, DCNS
12
13
14 FORMATS = ('HTML', 'PDF', 'TXT', 'EPUB', 'MOBI')
15
16 FORMATS_WITH_CHILDREN = ('PDF', 'EPUB', 'MOBI')
17
18
19 PRODUCT_FORMS_1 = {
20     'HTML': 'EC',
21     'PDF': 'EB',
22     'TXT': 'EB',
23     'EPUB': 'ED',
24     'MOBI': 'ED',
25 }
26
27 PRODUCT_FORMS_2 = {
28     'HTML': 'E105',
29     'PDF': 'E107',
30     'TXT': 'E112',
31     'EPUB': 'E101',
32     'MOBI': 'E127',
33 }
34
35 VOLUME_NUMBERS = {
36     u'pierwszy': 1,
37     u'drugi': 2,
38     u'trzeci': 3,
39     u'czwarty': 4,
40     u'piąty': 5,
41     u'szósty': 6,
42     u'I': 1,
43     u'II': 2,
44     u'III': 3,
45     u'IV': 4,
46     u'V': 5,
47     u'VI': 6,
48 }
49
50
51 def is_institution(name):
52     return name.startswith(u'Zgromadzenie Ogólne')
53
54
55 VOLUME_SEPARATOR = ', tom '
56
57
58 def get_volume(title):
59     if VOLUME_SEPARATOR not in title:
60         return title, ''
61     else:
62         vol_idx = title.index(VOLUME_SEPARATOR)
63         stripped = title[:vol_idx]
64         vol_name = title[vol_idx + len(VOLUME_SEPARATOR):]
65         if vol_name in VOLUME_NUMBERS:
66             return stripped, VOLUME_NUMBERS[vol_name]
67         else:
68             return title, ''
69
70
71 class Command(BaseCommand):
72     @staticmethod
73     def dc_values(desc, tag):
74         return [e.text for e in desc.findall('.//' + DCNS(tag))]
75
76     def handle(self, *args, **options):
77         writer = csv.writer(sys.stdout)
78         for book in Book.objects.all():
79             desc = book.wldocument().edoc.find('.//' + RDFNS('Description'))
80             formats = FORMATS_WITH_CHILDREN if book.children.exists() else FORMATS
81             for file_format in formats:
82                 # imprint = u'Fundacja Nowoczesna Polska'
83                 imprint = '; '.join(self.dc_values(desc, 'publisher'))
84                 title, volume = get_volume(book.title)
85                 subtitle = ''
86                 year = ''
87                 publication_date = localtime(book.created_at).date().isoformat()
88                 info_date = publication_date
89                 author = '; '.join(self.dc_values(desc, 'creator'))
90                 author_person = author if not is_institution(author) else ''
91                 author_institution = author if is_institution(author) else ''
92                 publication_type = 'DGO'
93                 edition = '1'
94                 product_form1 = PRODUCT_FORMS_1[file_format]
95                 product_form2 = PRODUCT_FORMS_2[file_format]
96                 language = self.dc_values(desc, 'language')[0]
97                 row = [
98                     imprint,
99                     title,
100                     subtitle,
101                     year,
102                     volume,
103                     publication_date,
104                     info_date,
105                     author_person,
106                     author_institution,
107                     publication_type,
108                     edition,
109                     product_form1,
110                     product_form2,
111                     language,
112                 ]
113                 writer.writerow([s.encode('utf-8') for s in row])