From: Marek Stępniowski Date: Wed, 3 Sep 2008 16:36:44 +0000 (+0200) Subject: Moved dcparser to lib folder. X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/e180078f82a3d7e73857a9544b3b7fdfd475f93f?hp=3618eb351066fde7992f64c81a053d0a89db2235 Moved dcparser to lib folder. --- diff --git a/catalogue/lib/dcparser/__init__.py b/catalogue/lib/dcparser/__init__.py deleted file mode 100644 index 793f5270d..000000000 --- a/catalogue/lib/dcparser/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- - -from dcparser import parse, ParseError -from person import Person - diff --git a/catalogue/lib/dcparser/converters.py b/catalogue/lib/dcparser/converters.py deleted file mode 100644 index 773aeddcb..000000000 --- a/catalogue/lib/dcparser/converters.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -from datetime import date -import time -import re - -from person import Person - - -def str_to_unicode(value): - return unicode(value) - - -def str_to_person(value): - comma_count = value.count(',') - - if comma_count == 0: - last_name, first_names = value, [] - elif comma_count == 1: - last_name, first_names = value.split(',') - first_names = [name for name in first_names.split(' ') if len(name)] - else: - raise ValueError("value contains more than one comma: %r" % value) - - return Person(last_name.strip(), *first_names) - - -def str_to_date(value): - try: - t = time.strptime(value, '%Y-%m-%d') - except ValueError: - t = time.strptime(value, '%Y') - return date(t[0], t[1], t[2]) - - diff --git a/catalogue/lib/dcparser/dcparser.py b/catalogue/lib/dcparser/dcparser.py deleted file mode 100644 index e8a733a73..000000000 --- a/catalogue/lib/dcparser/dcparser.py +++ /dev/null @@ -1,121 +0,0 @@ -# -*- coding: utf-8 -*- -from xml.parsers.expat import ExpatError - -# Import ElementTree from anywhere -try: - import xml.etree.ElementTree as ET # Python >= 2.5 -except ImportError: - try: - import elementtree.ElementTree as ET # effbot's pure Python module - except ImportError: - import lxml.etree as ET # ElementTree API using libxml2 - -import converters - - - -__all__ = ('parse', 'ParseError') - - - -class ParseError(Exception): - def __init__(self, message): - super(self, Exception).__init__(message) - - - -class XMLNamespace(object): - '''Represents XML namespace.''' - - def __init__(self, uri): - self.uri = uri - - def __call__(self, tag): - return '{%s}%s' % (self.uri, tag) - - def __contains__(self, tag): - return tag.startswith(str(self)) - - def __repr__(self): - return 'NS(%r)' % self.uri - - def __str__(self): - return '%s' % self.uri - - - -class BookInfo(object): - RDF = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') - DC = XMLNamespace('http://purl.org/dc/elements/1.1/') - - mapping = { - DC('creator') : ('author', converters.str_to_person), - DC('title') : ('title', converters.str_to_unicode), - DC('subject.period') : ('epoch', converters.str_to_unicode), - DC('subject.type') : ('kind', converters.str_to_unicode), - DC('subject.genre') : ('genre', converters.str_to_unicode), - DC('date') : ('created_at', converters.str_to_date), - DC('date.pd') : ('released_to_public_domain_at', converters.str_to_date), - DC('contributor.translator') : ('translator', converters.str_to_person), - DC('contributor.technical_editor') : ('technical_editor', converters.str_to_person), - DC('publisher') : ('publisher', converters.str_to_unicode), - DC('source') : ('source_name', converters.str_to_unicode), - DC('source.URL') : ('source_url', converters.str_to_unicode), - } - - - @classmethod - def from_string(cls, xml): - """docstring for from_string""" - from StringIO import StringIO - return cls.from_file(StringIO(xml)) - - - @classmethod - def from_file(cls, xml_file): - book_info = cls() - - try: - tree = ET.parse(xml_file) - except ExpatError, e: - raise ParseError(e) - - description = tree.find('//' + book_info.RDF('Description')) - if description is None: - raise ParseError('no Description tag found in document') - - for element in description.findall('*'): - book_info.parse_element(element) - - return book_info - - - def parse_element(self, element): - try: - attribute, converter = self.mapping[element.tag] - setattr(self, attribute, converter(element.text)) - except KeyError: - pass - - - def to_xml(self): - """XML representation of this object.""" - ET._namespace_map[str(self.RDF)] = 'rdf' - ET._namespace_map[str(self.DC)] = 'dc' - - root = ET.Element(self.RDF('RDF')) - description = ET.SubElement(root, self.RDF('Description')) - - for tag, (attribute, converter) in self.mapping.iteritems(): - if hasattr(self, attribute): - e = ET.Element(tag) - e.text = unicode(getattr(self, attribute)) - description.append(e) - - return unicode(ET.tostring(root, 'utf-8'), 'utf-8') - - -def parse(file_name): - return BookInfo.from_file(file_name) - - diff --git a/catalogue/lib/dcparser/person.py b/catalogue/lib/dcparser/person.py deleted file mode 100644 index 16412f768..000000000 --- a/catalogue/lib/dcparser/person.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - - -class Person(object): - """Single person with last name and a list of first names.""" - def __init__(self, last_name, *first_names): - self.last_name = last_name - self.first_names = first_names - - - def __eq__(self, right): - return self.last_name == right.last_name and self.first_names == right.first_names - - - def __unicode__(self): - if len(self.first_names) > 0: - return '%s, %s' % (self.last_name, ' '.join(self.first_names)) - else: - return self.last_name - - - def __repr__(self): - return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names) - diff --git a/lib/dcparser/__init__.py b/lib/dcparser/__init__.py new file mode 100644 index 000000000..793f5270d --- /dev/null +++ b/lib/dcparser/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +from dcparser import parse, ParseError +from person import Person + diff --git a/lib/dcparser/converters.py b/lib/dcparser/converters.py new file mode 100644 index 000000000..773aeddcb --- /dev/null +++ b/lib/dcparser/converters.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +from datetime import date +import time +import re + +from person import Person + + +def str_to_unicode(value): + return unicode(value) + + +def str_to_person(value): + comma_count = value.count(',') + + if comma_count == 0: + last_name, first_names = value, [] + elif comma_count == 1: + last_name, first_names = value.split(',') + first_names = [name for name in first_names.split(' ') if len(name)] + else: + raise ValueError("value contains more than one comma: %r" % value) + + return Person(last_name.strip(), *first_names) + + +def str_to_date(value): + try: + t = time.strptime(value, '%Y-%m-%d') + except ValueError: + t = time.strptime(value, '%Y') + return date(t[0], t[1], t[2]) + + diff --git a/lib/dcparser/dcparser.py b/lib/dcparser/dcparser.py new file mode 100644 index 000000000..e8a733a73 --- /dev/null +++ b/lib/dcparser/dcparser.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +from xml.parsers.expat import ExpatError + +# Import ElementTree from anywhere +try: + import xml.etree.ElementTree as ET # Python >= 2.5 +except ImportError: + try: + import elementtree.ElementTree as ET # effbot's pure Python module + except ImportError: + import lxml.etree as ET # ElementTree API using libxml2 + +import converters + + + +__all__ = ('parse', 'ParseError') + + + +class ParseError(Exception): + def __init__(self, message): + super(self, Exception).__init__(message) + + + +class XMLNamespace(object): + '''Represents XML namespace.''' + + def __init__(self, uri): + self.uri = uri + + def __call__(self, tag): + return '{%s}%s' % (self.uri, tag) + + def __contains__(self, tag): + return tag.startswith(str(self)) + + def __repr__(self): + return 'NS(%r)' % self.uri + + def __str__(self): + return '%s' % self.uri + + + +class BookInfo(object): + RDF = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') + DC = XMLNamespace('http://purl.org/dc/elements/1.1/') + + mapping = { + DC('creator') : ('author', converters.str_to_person), + DC('title') : ('title', converters.str_to_unicode), + DC('subject.period') : ('epoch', converters.str_to_unicode), + DC('subject.type') : ('kind', converters.str_to_unicode), + DC('subject.genre') : ('genre', converters.str_to_unicode), + DC('date') : ('created_at', converters.str_to_date), + DC('date.pd') : ('released_to_public_domain_at', converters.str_to_date), + DC('contributor.translator') : ('translator', converters.str_to_person), + DC('contributor.technical_editor') : ('technical_editor', converters.str_to_person), + DC('publisher') : ('publisher', converters.str_to_unicode), + DC('source') : ('source_name', converters.str_to_unicode), + DC('source.URL') : ('source_url', converters.str_to_unicode), + } + + + @classmethod + def from_string(cls, xml): + """docstring for from_string""" + from StringIO import StringIO + return cls.from_file(StringIO(xml)) + + + @classmethod + def from_file(cls, xml_file): + book_info = cls() + + try: + tree = ET.parse(xml_file) + except ExpatError, e: + raise ParseError(e) + + description = tree.find('//' + book_info.RDF('Description')) + if description is None: + raise ParseError('no Description tag found in document') + + for element in description.findall('*'): + book_info.parse_element(element) + + return book_info + + + def parse_element(self, element): + try: + attribute, converter = self.mapping[element.tag] + setattr(self, attribute, converter(element.text)) + except KeyError: + pass + + + def to_xml(self): + """XML representation of this object.""" + ET._namespace_map[str(self.RDF)] = 'rdf' + ET._namespace_map[str(self.DC)] = 'dc' + + root = ET.Element(self.RDF('RDF')) + description = ET.SubElement(root, self.RDF('Description')) + + for tag, (attribute, converter) in self.mapping.iteritems(): + if hasattr(self, attribute): + e = ET.Element(tag) + e.text = unicode(getattr(self, attribute)) + description.append(e) + + return unicode(ET.tostring(root, 'utf-8'), 'utf-8') + + +def parse(file_name): + return BookInfo.from_file(file_name) + + diff --git a/lib/dcparser/person.py b/lib/dcparser/person.py new file mode 100644 index 000000000..16412f768 --- /dev/null +++ b/lib/dcparser/person.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + + +class Person(object): + """Single person with last name and a list of first names.""" + def __init__(self, last_name, *first_names): + self.last_name = last_name + self.first_names = first_names + + + def __eq__(self, right): + return self.last_name == right.last_name and self.first_names == right.first_names + + + def __unicode__(self): + if len(self.first_names) > 0: + return '%s, %s' % (self.last_name, ' '.join(self.first_names)) + else: + return self.last_name + + + def __repr__(self): + return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names) +