+from .tag import Tag, Category
+
+
+def metadata_from_text(text):
+ from lxml import etree
+ metadata = {}
+ text = text.replace(u'\ufeff', '')
+ # This is bad. The editor shouldn't spew unknown HTML entities.
+ text = text.replace(u' ', u'\u00a0')
+
+ try:
+ t = etree.fromstring(text)
+ except:
+ return {'title': '<<Resource invalid>>'}
+ header = t.find('.//header')
+ if header is None:
+ header = etree.fromstring(text).find('.//{http://nowoczesnapolska.org.pl/sst#}header')
+ metadata['title'] = getattr(header, 'text', ' ') or ' '
+ # print 'meta', d['title']
+
+ m = t.find('metadata')
+ if m is None:
+ m = t.find('{http://nowoczesnapolska.org.pl/sst#}metadata')
+ if m is not None:
+ c = m.find('{http://purl.org/dc/elements/1.1/}relation.coverimage.url')
+ if c is not None:
+ metadata['cover_url'] = c.text
+ for category in Category.objects.all():
+ for elem in m.findall('{http://purl.org/dc/elements/1.1/}' + category.dc_tag):
+ if elem.text is not None:
+ if category.multiple:
+ if category.dc_tag not in metadata:
+ metadata[category.dc_tag] = []
+ metadata[category.dc_tag].append(elem.text)
+ else:
+ if category.dc_tag in metadata:
+ metadata['multiple_values'] = category.dc_tag
+ metadata[category.dc_tag] = elem.text
+ return metadata