1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian.
5 # Copyright © 2008,2009,2010 Fundacja Nowoczesna Polska <fundacja@nowoczesnapolska.org.pl>
7 # For full list of contributors see AUTHORS file.
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU Affero General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU Affero General Public License for more details.
19 # You should have received a copy of the GNU Affero General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 class ParseError(Exception):
26 def __init__(self, cause, message=None):
29 self.message = message or self.cause.message
31 self.message = "No message."
33 class ValidationError(Exception):
36 class NoDublinCore(ValidationError):
39 class XMLNamespace(object):
40 '''A handy structure to repsent names in an XML namespace.'''
42 def __init__(self, uri):
45 def __call__(self, tag):
46 return '{%s}%s' % (self.uri, tag)
48 def __contains__(self, tag):
49 return tag.startswith('{'+str(self)+'}')
52 return 'XMLNamespace(%r)' % self.uri
55 return '%s' % self.uri
57 class EmptyNamespace(XMLNamespace):
59 super(EmptyNamespace, self).__init__('')
61 def __call__(self, tag):
64 # some common namespaces we use
65 RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
66 DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
67 XINS = XMLNamespace("http://www.w3.org/2001/XInclude")
68 XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
70 WLNS = EmptyNamespace()
72 import lxml.etree as etree
75 DEFAULT_BOOKINFO = dcparser.BookInfo(
76 { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},\
77 { DCNS('creator'): [u'Some, Author'],
78 DCNS('title'): [u'Some Title'],
79 DCNS('subject.period'): [u'Unknown'],
80 DCNS('subject.type'): [u'Unknown'],
81 DCNS('subject.genre'): [u'Unknown'],
82 DCNS('date'): ['1970-01-01'],
83 # DCNS('date'): [creation_date],
84 DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
86 [u"""Publikacja zrealizowana w ramach projektu
87 Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
88 wykonana przez Bibliotekę Narodową z egzemplarza
89 pochodzącego ze zbiorów BN."""],
90 DCNS('identifier.url'):
91 [u"http://wolnelektury.pl/katalog/lektura/template"],
93 [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })
95 def xinclude_forURI(uri):
96 e = etree.Element( XINS("include") )
98 return etree.tostring(e, encoding=unicode)
100 def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
101 """Wrap the text within the minimal XML structure with a DC template."""
102 bookinfo.created_at = creation_date
104 dcstring = etree.tostring(bookinfo.to_etree(),\
105 method='xml', encoding=unicode, pretty_print=True)
107 return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext +\
108 u'\n</plain-text>\n</utwor>';
111 def serialize_raw(element):
112 b = u'' + (element.text or '')
114 for child in element.iterchildren():
115 e = etree.tostring(child, method='xml', encoding=unicode, pretty_print=True)
121 'raw': serialize_raw,
124 def serialize_children(element, format='raw'):
125 return SERIALIZERS[format](element)