1 # -*- coding: utf-8 -*-
2 from xml.parsers.expat import ExpatError
4 # Import ElementTree from anywhere
6 import xml.etree.ElementTree as ET # Python >= 2.5
9 import elementtree.ElementTree as ET # effbot's pure Python module
11 import lxml.etree as ET # ElementTree API using libxml2
16 __all__ = ('parse', 'ParseError')
19 class ParseError(Exception):
20 def __init__(self, message):
21 super(self, Exception).__init__(message)
24 class XMLNamespace(object):
25 '''Represents XML namespace.'''
27 def __init__(self, uri):
30 def __call__(self, tag):
31 return '{%s}%s' % (self.uri, tag)
33 def __contains__(self, tag):
34 return tag.startswith(str(self))
37 return 'XMLNamespace(%r)' % self.uri
40 return '%s' % self.uri
43 class BookInfo(object):
44 RDF = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
45 DC = XMLNamespace('http://purl.org/dc/elements/1.1/')
48 DC('creator') : ('author', converters.str_to_person),
49 DC('title') : ('title', converters.str_to_unicode),
50 DC('subject.period') : ('epoch', converters.str_to_unicode),
51 DC('subject.type') : ('kind', converters.str_to_unicode),
52 DC('subject.genre') : ('genre', converters.str_to_unicode),
53 DC('date') : ('created_at', converters.str_to_date),
54 DC('date.pd') : ('released_to_public_domain_at', converters.str_to_date),
55 DC('contributor.translator') : ('translator', converters.str_to_person),
56 DC('contributor.technical_editor') : ('technical_editor', converters.str_to_person),
57 DC('publisher') : ('publisher', converters.str_to_unicode),
58 DC('source') : ('source_name', converters.str_to_unicode),
59 DC('source.URL') : ('source_url', converters.str_to_unicode),
60 DC('identifier.url') : ('url', converters.str_to_unicode),
61 DC('relation.hasPart') : ('parts', converters.str_to_unicode_list),
65 def from_string(cls, xml):
66 from StringIO import StringIO
67 return cls.from_file(StringIO(xml))
70 def from_file(cls, xml_file):
74 tree = ET.parse(xml_file)
78 description = tree.find('//' + book_info.RDF('Description'))
79 if description is None:
80 raise ParseError('no Description tag found in document')
82 for element in description.findall('*'):
83 book_info.parse_element(element)
87 def parse_element(self, element):
89 attribute, converter = self.mapping[element.tag]
90 setattr(self, attribute, converter(element.text, getattr(self, attribute, None)))
95 """XML representation of this object."""
96 ET._namespace_map[str(self.RDF)] = 'rdf'
97 ET._namespace_map[str(self.DC)] = 'dc'
99 root = ET.Element(self.RDF('RDF'))
100 description = ET.SubElement(root, self.RDF('Description'))
102 for tag, (attribute, converter) in self.mapping.iteritems():
103 if hasattr(self, attribute):
105 e.text = unicode(getattr(self, attribute))
106 description.append(e)
108 return unicode(ET.tostring(root, 'utf-8'), 'utf-8')
111 def parse(file_name):
112 return BookInfo.from_file(file_name)
115 if __name__ == '__main__':
118 info = parse(sys.argv[1])
119 for attribute, _ in BookInfo.mapping.values():
120 print '%s: %r' % (attribute, getattr(info, attribute, None))