X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/e0f595e44766e352edfce0aaf5d32be57f448882..c46844cb5a794566d15437b0c4a809e9230baecf:/src/sources/document.py diff --git a/src/sources/document.py b/src/sources/document.py index e46ce2f5..7c31110f 100644 --- a/src/sources/document.py +++ b/src/sources/document.py @@ -6,19 +6,20 @@ from . import ocr from django.conf import settings -def build_document_texts(book_source): +def build_document_texts(book): texts = [] for builder in text_builders: root = etree.Element('utwor') # add meta - add_rdf(root, book_source) + add_rdf(root, book) # add master master = etree.SubElement(root, 'powiesc') - for page in book_source.get_ocr_files(): - builder(master, page) - + for book_source in book.booksource_set.all(): + for page in book_source.get_ocr_files(): + builder(master, page) + texts.append(etree.tostring(root, encoding='unicode', pretty_print=True)) return texts @@ -30,9 +31,7 @@ text_builders = [ ] -def add_rdf(root, book_source): - book = book_source.book - +def add_rdf(root, book): # TODO: to librarian rdf = etree.SubElement(root, RDFNS('RDF')) desc = etree.SubElement(rdf, RDFNS('Description'), **{}) @@ -48,14 +47,18 @@ def add_rdf(root, book_source): # created_at etree.SubElement(desc, DCNS('date')).text = date.today().isoformat() # date.pd - etree.SubElement(desc, DCNS('date.pd')).text = book.pd_year + etree.SubElement(desc, DCNS('date.pd')).text = str(book.pd_year) #publisher etree.SubElement(desc, DCNS('publisher')). text = 'Fundacja Wolne Lektury' #language etree.SubElement(desc, DCNS('language')).text = book.language # 3to2? #description #source_name - etree.SubElement(desc, DCNS('source')).text = book_source.source.name + # TODO: allow multiple source meta entries. + sources = [] + for book_source in book.booksource_set.all(): + sources.append(book_source.source.name) + etree.SubElement(desc, DCNS('source')).text = ';\n '.join(sources) #url etree.SubElement(desc, DCNS('identifier.url')).text = f'https://wolnelektury.pl/katalog/lektura/{book.slug}/' #license?