X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/e6c778f6e92e584815a14d3e6a31a03a5e2ab7e1..e0f595e44766e352edfce0aaf5d32be57f448882:/src/sources/document.py diff --git a/src/sources/document.py b/src/sources/document.py new file mode 100644 index 00000000..e46ce2f5 --- /dev/null +++ b/src/sources/document.py @@ -0,0 +1,74 @@ +import os +from librarian import RDFNS, DCNS +from lxml import etree +from datetime import date +from . import ocr +from django.conf import settings + + +def build_document_texts(book_source): + texts = [] + for builder in text_builders: + root = etree.Element('utwor') + # add meta + add_rdf(root, book_source) + + # add master + master = etree.SubElement(root, 'powiesc') + + for page in book_source.get_ocr_files(): + builder(master, page) + + texts.append(etree.tostring(root, encoding='unicode', pretty_print=True)) + return texts + + +text_builders = [ + ocr.add_page_to_master, + ocr.add_page_to_master_as_stanzas, + ocr.add_page_to_master_as_p, +] + + +def add_rdf(root, book_source): + book = book_source.book + + # TODO: to librarian + rdf = etree.SubElement(root, RDFNS('RDF')) + desc = etree.SubElement(rdf, RDFNS('Description'), **{}) + + # author + for author in book.authors.all(): + etree.SubElement(desc, DCNS('creator')).text = f'{author.last_name_pl}, {author.first_name_pl}' + # translator + for tr in book.translators.all(): + etree.SubElement(desc, DCNS('contributor.translator')).text = f'{tr.last_name_pl}, {tr.first_name_pl}' + # title + etree.SubElement(desc, DCNS('title')).text = book.title + # created_at + etree.SubElement(desc, DCNS('date')).text = date.today().isoformat() + # date.pd + etree.SubElement(desc, DCNS('date.pd')).text = book.pd_year + #publisher + etree.SubElement(desc, DCNS('publisher')). text = 'Fundacja Wolne Lektury' + #language + etree.SubElement(desc, DCNS('language')).text = book.language # 3to2? + #description + #source_name + etree.SubElement(desc, DCNS('source')).text = book_source.source.name + #url + etree.SubElement(desc, DCNS('identifier.url')).text = f'https://wolnelektury.pl/katalog/lektura/{book.slug}/' + #license? + #license_description? + etree.SubElement(desc, DCNS('rights')).text = '' + #epochs + for tag in book.epochs.all(): + etree.SubElement(desc, DCNS('subject.period')).text = tag.name + #kinds + for tag in book.kinds.all(): + etree.SubElement(desc, DCNS('subject.type')).text = tag.name + #genres + for tag in book.genres.all(): + etree.SubElement(desc, DCNS('subject.genre')).text = tag.name + +