ptrad
[redakcja.git] / src / sources / document.py
1 import os
2 from librarian import RDFNS, DCNS
3 from lxml import etree
4 from datetime import date
5 from . import ocr
6 from django.conf import settings
7
8
9 def build_document_texts(book):
10     texts = []
11     for builder in text_builders:
12         root = etree.Element('utwor')
13         # add meta
14         add_rdf(root, book)
15
16         # add master
17         master = etree.SubElement(root, 'powiesc')
18
19         for book_source in book.booksource_set.all():
20             for page in book_source.get_ocr_files():
21                 builder(master, page)
22
23         texts.append(etree.tostring(root, encoding='unicode', pretty_print=True))
24     return texts
25
26
27 text_builders = [
28     ocr.add_page_to_master,
29     ocr.add_page_to_master_as_stanzas,
30     ocr.add_page_to_master_as_p,
31 ]
32
33
34 def add_rdf(root, book):
35     # TODO: to librarian
36     rdf = etree.SubElement(root, RDFNS('RDF'))
37     desc = etree.SubElement(rdf, RDFNS('Description'), **{})
38
39     # author
40     for author in book.authors.all():
41         etree.SubElement(desc, DCNS('creator')).text = f'{author.last_name_pl}, {author.first_name_pl}'
42     # translator
43     for tr in book.translators.all():
44         etree.SubElement(desc, DCNS('contributor.translator')).text = f'{tr.last_name_pl}, {tr.first_name_pl}'
45     # title
46     etree.SubElement(desc, DCNS('title')).text = book.title
47     # created_at
48     etree.SubElement(desc, DCNS('date')).text = date.today().isoformat()
49     # date.pd
50     etree.SubElement(desc, DCNS('date.pd')).text = str(book.pd_year)
51     #publisher
52     etree.SubElement(desc, DCNS('publisher')). text = 'Fundacja Wolne Lektury'
53     #language
54     etree.SubElement(desc, DCNS('language')).text = book.language # 3to2?
55     #description
56     #source_name
57     # TODO: allow multiple source meta entries.
58     sources = []
59     for book_source in book.booksource_set.all():
60         sources.append(book_source.source.name)
61     etree.SubElement(desc, DCNS('source')).text = ';\n '.join(sources)
62     #url
63     etree.SubElement(desc, DCNS('identifier.url')).text = f'https://wolnelektury.pl/katalog/lektura/{book.slug}/'
64     #license?
65     #license_description?
66     etree.SubElement(desc, DCNS('rights')).text = ''
67     #epochs
68     for tag in book.epochs.all():
69         etree.SubElement(desc, DCNS('subject.period')).text = tag.name
70     #kinds
71     for tag in book.kinds.all():
72         etree.SubElement(desc, DCNS('subject.type')).text = tag.name
73     #genres
74     for tag in book.genres.all():
75         etree.SubElement(desc, DCNS('subject.genre')).text = tag.name
76
77