From f1ae0a40b774ad7260e7449986764c9104553e03 Mon Sep 17 00:00:00 2001 From: Jan Szejko Date: Wed, 25 Jan 2017 17:16:14 +0100 Subject: [PATCH] accept missing dc tags --- librarian/pypdf.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/librarian/pypdf.py b/librarian/pypdf.py index aa4dc1d..920f1b9 100644 --- a/librarian/pypdf.py +++ b/librarian/pypdf.py @@ -83,7 +83,7 @@ class EduModule(Xmill): def get_dc(self, element, dc_field, single=False): values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri})) if single: - return values[0] + return values[0] if len(values) else '' return values def handle_rdf__RDF(self, _): @@ -114,7 +114,10 @@ class EduModule(Xmill): @escape(True) def get_description(self, element): - return self.get_dc(element, 'description', single=True) + desc = self.get_dc(element, 'description', single=True) + if not desc: + print '!! no descripton' + return desc def handle_utwor(self, element): lines = [ -- 2.20.1