X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/1f9c252b0da609f7dff02ab74b60f39f57dd2e3d..f164694b5e7ad5ed5f6d95743f9259bd3a9292bd:/src/librarian/dcparser.py diff --git a/src/librarian/dcparser.py b/src/librarian/dcparser.py index f402c04..ce03be2 100644 --- a/src/librarian/dcparser.py +++ b/src/librarian/dcparser.py @@ -1,11 +1,11 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# Copyright © Fundacja Wolne Lektury. See NOTICE for more information. # from xml.parsers.expat import ExpatError from datetime import date +import io import time import re -import six from librarian.util import roman_to_int from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS, @@ -15,14 +15,13 @@ import lxml.etree as etree from lxml.etree import XMLSyntaxError from librarian.meta.types.bool import BoolValue -from librarian.meta.types.date import DateValue from librarian.meta.types.person import Person from librarian.meta.types.wluri import WLURI -from librarian.meta.types.text import TextValue +from librarian.meta.types import text -class Field(object): - def __init__(self, uri, attr_name, value_type=TextValue, +class Field: + def __init__(self, uri, attr_name, value_type=text.TextValue, multiple=False, salias=None, **kwargs): self.uri = uri self.name = attr_name @@ -88,23 +87,7 @@ class Field(object): return False -class DCInfo(type): - def __new__(mcs, classname, bases, class_dict): - fields = list(class_dict['FIELDS']) - - for base in bases[::-1]: - if hasattr(base, 'FIELDS'): - for field in base.FIELDS[::-1]: - try: - fields.index(field) - except ValueError: - fields.insert(0, field) - - class_dict['FIELDS'] = tuple(fields) - return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict) - - -class WorkInfo(six.with_metaclass(DCInfo, object)): +class BookInfo: FIELDS = ( Field(DCNS('creator'), 'authors', Person, salias='author', multiple=True), @@ -121,7 +104,7 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): Field(DCNS('contributor.thanks'), 'thanks', required=False), Field(DCNS('date'), 'created_at'), - Field(DCNS('date.pd'), 'released_to_public_domain_at', DateValue, + Field(DCNS('date.pd'), 'released_to_public_domain_at', required=False), Field(DCNS('publisher'), 'publisher', multiple=True), @@ -141,6 +124,48 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): Field(WLNS('contentWarning'), 'content_warnings', multiple=True, required=False), Field(WLNS('developmentStage'), 'stage', required=False), + + Field(DCNS('audience'), 'audiences', text.Audience, salias='audience', multiple=True, + required=False), + + Field(DCNS('subject.period'), 'epochs', text.Epoch, salias='epoch', multiple=True, + required=False), + Field(DCNS('subject.type'), 'kinds', text.Kind, salias='kind', multiple=True, + required=False), + Field(DCNS('subject.genre'), 'genres', text.Genre, salias='genre', multiple=True, + required=False), + Field('category.legimi', 'legimi', text.LegimiCategory, required=False), + Field('category.thema.main', 'thema_main', text.MainThemaCategory, required=False), + Field('category.thema', 'thema', text.ThemaCategory, required=False, multiple=True), + Field(DCNS('subject.location'), 'location', required=False), + + Field(DCNS('contributor.translator'), 'translators', + Person, salias='translator', multiple=True, required=False), + Field(DCNS('relation.hasPart'), 'parts', WLURI, + multiple=True, required=False), + Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI, + required=False), + + Field(DCNS('relation.coverImage.url'), 'cover_url', required=False), + Field(DCNS('relation.coverImage.attribution'), 'cover_by', + required=False), + Field(DCNS('relation.coverImage.source'), 'cover_source', + required=False), + # WLCover-specific. + Field(WLNS('coverBarColor'), 'cover_bar_color', required=False), + Field(WLNS('coverBoxPosition'), 'cover_box_position', required=False), + Field(WLNS('coverClass'), 'cover_class', default=['default']), + Field(WLNS('coverLogoUrl'), 'cover_logo_urls', multiple=True, + required=False), + Field(WLNS('endnotes'), 'endnotes', BoolValue, + required=False), + + Field('pdf-id', 'isbn_pdf', required=False), + Field('epub-id', 'isbn_epub', required=False), + Field('mobi-id', 'isbn_mobi', required=False), + Field('txt-id', 'isbn_txt', required=False), + Field('html-id', 'isbn_html', required=False), + ) @classmethod @@ -151,7 +176,7 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): @classmethod def from_bytes(cls, xml, *args, **kwargs): - return cls.from_file(six.BytesIO(xml), *args, **kwargs) + return cls.from_file(io.BytesIO(xml), *args, **kwargs) @classmethod def from_file(cls, xmlfile, *args, **kwargs): @@ -237,7 +262,6 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): for field in self.FIELDS: value = field.validate(dc_fields, fallbacks=fallbacks, strict=strict, validate_required=validate_required) - print(field.name, value) setattr(self, 'prop_' + field.name, value) self.fmap[field.name] = field if field.salias: @@ -303,11 +327,11 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): for x in v: e = etree.Element(field.uri) if x is not None: - e.text = six.text_type(x) + e.text = str(x) description.append(e) else: e = etree.Element(field.uri) - e.text = six.text_type(v) + e.text = str(v) description.append(e) return root @@ -322,9 +346,9 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): if field.multiple: if len(v) == 0: continue - v = [six.text_type(x) for x in v if x is not None] + v = [str(x) for x in v if x is not None] else: - v = six.text_type(v) + v = str(v) dc[field.name] = {'uri': field.uri, 'value': v} rdf['fields'] = dc @@ -339,62 +363,18 @@ class WorkInfo(six.with_metaclass(DCInfo, object)): if field.multiple: if len(v) == 0: continue - v = [six.text_type(x) for x in v if x is not None] + v = [str(x) for x in v if x is not None] else: - v = six.text_type(v) + v = str(v) result[field.name] = v if field.salias: v = getattr(self, field.salias) if v is not None: - result[field.salias] = six.text_type(v) + result[field.salias] = str(v) return result -class BookInfo(WorkInfo): - FIELDS = ( - Field(DCNS('audience'), 'audiences', salias='audience', multiple=True, - required=False), - - Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, - required=False), - Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True, - required=False), - Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True, - required=False), - Field(WLNS('category.legimi'), 'legimi', required=False), - - Field(DCNS('subject.location'), 'location', required=False), - - Field(DCNS('contributor.translator'), 'translators', - Person, salias='translator', multiple=True, required=False), - Field(DCNS('relation.hasPart'), 'parts', WLURI, - multiple=True, required=False), - Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI, - required=False), - - Field(DCNS('relation.coverImage.url'), 'cover_url', required=False), - Field(DCNS('relation.coverImage.attribution'), 'cover_by', - required=False), - Field(DCNS('relation.coverImage.source'), 'cover_source', - required=False), - # WLCover-specific. - Field(WLNS('coverBarColor'), 'cover_bar_color', required=False), - Field(WLNS('coverBoxPosition'), 'cover_box_position', required=False), - Field(WLNS('coverClass'), 'cover_class', default=['default']), - Field(WLNS('coverLogoUrl'), 'cover_logo_urls', multiple=True, - required=False), - Field(WLNS('endnotes'), 'endnotes', BoolValue, - required=False), - - Field('pdf-id', 'isbn_pdf', required=False), - Field('epub-id', 'isbn_epub', required=False), - Field('mobi-id', 'isbn_mobi', required=False), - Field('txt-id', 'isbn_txt', required=False), - Field('html-id', 'isbn_html', required=False), - ) - - def parse(file_name, cls=BookInfo): return cls.from_file(file_name)