X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/12b5230d8fdb3ad995e867fb5d58a69e8a627e68..c87453a1da79443d74132398e7dd1aaa83140fee:/librarian/dcparser.py diff --git a/librarian/dcparser.py b/librarian/dcparser.py index fd3eec5..0a2822c 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -40,13 +40,13 @@ class Person(object): surname = parts[0] names = [] elif len(parts) != 2: - raise ValueError("Invalid person name. There should be at most one comma: \"%s\"." % text) + raise ValueError("Invalid person name. There should be at most one comma: \"%s\"." % text.encode('utf-8')) else: surname = parts[0] if len(parts[1]) == 0: # there is no non-whitespace data after the comma raise ValueError("Found a comma, but no names given: \"%s\" -> %r." % (text, parts)) - names = [name for name in parts[1].split() if len(name)] # all non-whitespace tokens + names = parts[1].split() return cls(surname, *names) def readable(self): @@ -238,7 +238,7 @@ class WorkInfo(object): Field(DCNS('date'), 'created_at'), Field(DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False), - Field(DCNS('publisher'), 'publisher'), + Field(DCNS('publisher'), 'publisher', multiple=True), Field(DCNS('language'), 'language'), Field(DCNS('description'), 'description', required=False), @@ -310,6 +310,10 @@ class WorkInfo(object): text = text.decode('utf-8') val = TextPlus(text) val.lang = e.attrib.get(XMLNS('lang'), lang) + if e.tag == 'meta': + meta_id = e.attrib.get('id') + if meta_id and meta_id.endswith('-id'): + field_dict[meta_id] = [val.replace('ISBN-', 'ISBN ')] else: val = e.text fv.append(val) @@ -458,6 +462,11 @@ class BookInfo(WorkInfo): # WLCover-specific. Field(WLNS('coverBarColor'), 'cover_bar_color', required=False), Field(WLNS('coverBoxPosition'), 'cover_box_position', required=False), + Field('pdf-id', 'isbn_pdf', required=False), + Field('epub-id', 'isbn_epub', required=False), + Field('mobi-id', 'isbn_mobi', required=False), + Field('txt-id', 'isbn_txt', required=False), + Field('html-id', 'isbn_html', required=False), )