# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
#
from xml.parsers.expat import ExpatError
from datetime import date
+import io
import time
import re
-import six
from librarian.util import roman_to_int
from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
- XMLNS, WLURI, WLNS, PLMETNS)
+ XMLNS, WLNS, PLMETNS)
import lxml.etree as etree
from lxml.etree import XMLSyntaxError
from librarian.meta.types.bool import BoolValue
-from librarian.meta.types.date import DateValue
from librarian.meta.types.person import Person
-from librarian.meta.types.text import TextValue
+from librarian.meta.types.wluri import WLURI
+from librarian.meta.types import text
-class Field(object):
- def __init__(self, uri, attr_name, validator=TextValue, strict=None,
+class Field:
+ def __init__(self, uri, attr_name, value_type=text.TextValue,
multiple=False, salias=None, **kwargs):
self.uri = uri
self.name = attr_name
- self.validator = validator
- self.strict = strict
+ self.value_type = value_type
self.multiple = multiple
self.salias = salias
self.default = kwargs.get('default', [] if multiple else [None])
def validate_value(self, val, strict=False):
- if strict and self.strict is not None:
- validator = self.strict
- else:
- validator = self.validator
+ #if strict:
+ # value.validate()
+
try:
if self.multiple:
- if validator is None:
- return val
- new_values = []
- for v in val:
- nv = v
- if v is not None:
- #nv = validator(v)
- nv = v
- if hasattr(v, 'lang'):
- setattr(nv, 'lang', v.lang)
- new_values.append(nv)
- return new_values
+ return val
elif len(val) > 1:
raise ValidationError(
"Multiple values not allowed for field '%s'" % self.uri
% self.uri
)
else:
- if validator is None or val[0] is None:
- return val[0]
- #nv = validator(val[0])
- nv = val[0]
- if hasattr(val[0], 'lang') and not hasattr(validator, 'no_lang'):
- setattr(nv, 'lang', val[0].lang)
- return nv
+ return val[0]
except ValueError as e:
raise ValidationError(
"Field '%s' - invald value: %s"
return False
-class DCInfo(type):
- def __new__(mcs, classname, bases, class_dict):
- fields = list(class_dict['FIELDS'])
-
- for base in bases[::-1]:
- if hasattr(base, 'FIELDS'):
- for field in base.FIELDS[::-1]:
- try:
- fields.index(field)
- except ValueError:
- fields.insert(0, field)
-
- class_dict['FIELDS'] = tuple(fields)
- return super(DCInfo, mcs).__new__(mcs, classname, bases, class_dict)
-
-
-class WorkInfo(six.with_metaclass(DCInfo, object)):
+class BookInfo:
FIELDS = (
Field(DCNS('creator'), 'authors', Person, salias='author',
multiple=True),
Field(DCNS('contributor.thanks'), 'thanks', required=False),
Field(DCNS('date'), 'created_at'),
- Field(DCNS('date.pd'), 'released_to_public_domain_at', DateValue,
+ Field(DCNS('date.pd'), 'released_to_public_domain_at',
required=False),
Field(DCNS('publisher'), 'publisher', multiple=True),
Field(WLNS('contentWarning'), 'content_warnings', multiple=True,
required=False),
Field(WLNS('developmentStage'), 'stage', required=False),
+
+ Field(DCNS('audience'), 'audiences', text.Audience, salias='audience', multiple=True,
+ required=False),
+
+ Field(DCNS('subject.period'), 'epochs', text.Epoch, salias='epoch', multiple=True,
+ required=False),
+ Field(DCNS('subject.type'), 'kinds', text.Kind, salias='kind', multiple=True,
+ required=False),
+ Field(DCNS('subject.genre'), 'genres', text.Genre, salias='genre', multiple=True,
+ required=False),
+ Field('category.legimi', 'legimi', text.LegimiCategory, required=False),
+ Field('category.thema.main', 'thema_main', text.MainThemaCategory, required=False),
+ Field('category.thema', 'thema', text.ThemaCategory, required=False, multiple=True),
+ Field(DCNS('subject.location'), 'location', required=False),
+
+ Field(DCNS('contributor.translator'), 'translators',
+ Person, salias='translator', multiple=True, required=False),
+ Field(DCNS('relation.hasPart'), 'parts', WLURI,
+ multiple=True, required=False),
+ Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI,
+ required=False),
+
+ Field(DCNS('relation.coverImage.url'), 'cover_url', required=False),
+ Field(DCNS('relation.coverImage.attribution'), 'cover_by',
+ required=False),
+ Field(DCNS('relation.coverImage.source'), 'cover_source',
+ required=False),
+ # WLCover-specific.
+ Field(WLNS('coverBarColor'), 'cover_bar_color', required=False),
+ Field(WLNS('coverBoxPosition'), 'cover_box_position', required=False),
+ Field(WLNS('coverClass'), 'cover_class', default=['default']),
+ Field(WLNS('coverLogoUrl'), 'cover_logo_urls', multiple=True,
+ required=False),
+ Field(WLNS('endnotes'), 'endnotes', BoolValue,
+ required=False),
+
+ Field('pdf-id', 'isbn_pdf', required=False),
+ Field('epub-id', 'isbn_epub', required=False),
+ Field('mobi-id', 'isbn_mobi', required=False),
+ Field('txt-id', 'isbn_txt', required=False),
+ Field('html-id', 'isbn_html', required=False),
+
)
@classmethod
@classmethod
def from_bytes(cls, xml, *args, **kwargs):
- return cls.from_file(six.BytesIO(xml), *args, **kwargs)
+ return cls.from_file(io.BytesIO(xml), *args, **kwargs)
@classmethod
def from_file(cls, xmlfile, *args, **kwargs):
p = p.getparent()
for e in desc.getchildren():
- field = cls.get_field_by_uri(e.tag)
+ tag = e.tag
+ if tag == 'meta':
+ meta_id = e.attrib.get('id')
+ if meta_id and meta_id.endswith('-id'):
+ tag = meta_id
+
+ field = cls.get_field_by_uri(tag)
if field is None:
# Ignore unknown fields.
- ### TODO: does it do <meta> for isbn?
continue
- fv = field_dict.get(e.tag, [])
+ fv = field_dict.get(tag, [])
if e.text is not None:
- val = field.validator(e.text)
+ val = field.value_type.from_text(e.text)
val.lang = e.attrib.get(XMLNS('lang'), lang)
-
- if e.tag == 'meta':
- meta_id = e.attrib.get('id')
- if meta_id and meta_id.endswith('-id'):
- field_dict[meta_id] = [val.replace('ISBN-', 'ISBN ')]
else:
val = e.text
fv.append(val)
- field_dict[e.tag] = fv
+ field_dict[tag] = fv
return cls(desc.attrib, field_dict, *args, **kwargs)
for x in v:
e = etree.Element(field.uri)
if x is not None:
- e.text = six.text_type(x)
+ e.text = str(x)
description.append(e)
else:
e = etree.Element(field.uri)
- e.text = six.text_type(v)
+ e.text = str(v)
description.append(e)
return root
if field.multiple:
if len(v) == 0:
continue
- v = [six.text_type(x) for x in v if x is not None]
+ v = [str(x) for x in v if x is not None]
else:
- v = six.text_type(v)
+ v = str(v)
dc[field.name] = {'uri': field.uri, 'value': v}
rdf['fields'] = dc
if field.multiple:
if len(v) == 0:
continue
- v = [six.text_type(x) for x in v if x is not None]
+ v = [str(x) for x in v if x is not None]
else:
- v = six.text_type(v)
+ v = str(v)
result[field.name] = v
if field.salias:
v = getattr(self, field.salias)
if v is not None:
- result[field.salias] = six.text_type(v)
+ result[field.salias] = str(v)
return result
-class BookInfo(WorkInfo):
- FIELDS = (
- Field(DCNS('audience'), 'audiences', salias='audience', multiple=True,
- required=False),
-
- Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True,
- required=False),
- Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True,
- required=False),
- Field(DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
- required=False),
- Field(WLNS('category.legimi'), 'legimi', required=False),
-
- Field(DCNS('subject.location'), 'location', required=False),
-
- Field(DCNS('contributor.translator'), 'translators',
- Person, salias='translator', multiple=True, required=False),
- Field(DCNS('relation.hasPart'), 'parts', WLURI,
- multiple=True, required=False),
- Field(DCNS('relation.isVariantOf'), 'variant_of', WLURI,
- required=False),
-
- Field(DCNS('relation.coverImage.url'), 'cover_url', required=False),
- Field(DCNS('relation.coverImage.attribution'), 'cover_by',
- required=False),
- Field(DCNS('relation.coverImage.source'), 'cover_source',
- required=False),
- # WLCover-specific.
- Field(WLNS('coverBarColor'), 'cover_bar_color', required=False),
- Field(WLNS('coverBoxPosition'), 'cover_box_position', required=False),
- Field(WLNS('coverClass'), 'cover_class', default=['default']),
- Field(WLNS('coverLogoUrl'), 'cover_logo_urls', multiple=True,
- required=False),
- Field(WLNS('endnotes'), 'endnotes', BoolValue,
- required=False),
-
- Field('pdf-id', 'isbn_pdf', required=False),
- Field('epub-id', 'isbn_epub', required=False),
- Field('mobi-id', 'isbn_mobi', required=False),
- Field('txt-id', 'isbn_txt', required=False),
- Field('html-id', 'isbn_html', required=False),
- )
-
-
def parse(file_name, cls=BookInfo):
return cls.from_file(file_name)