X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/b6ec0976a383cc1823f4a199bc3e6dc40880b049..a1a95fe51f648a7fff2cf92c55a2706d6f7fa3e0:/librarian/__init__.py diff --git a/librarian/__init__.py b/librarian/__init__.py index fdd6b55..42bc518 100644 --- a/librarian/__init__.py +++ b/librarian/__init__.py @@ -3,6 +3,8 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from __future__ import with_statement + import os import re import shutil @@ -65,19 +67,61 @@ WLNS = EmptyNamespace() class WLURI(object): """Represents a WL URI. Extracts slug and language from it.""" + DEFAULT_LANGUAGE = u'pol' slug = None language = None + example = 'http://wolnelektury.pl/katalog/lektura/template/' _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/lektura/' - '(?P[-a-z]+)(/(?P[a-z]{3})/?)?') + '(?P[-a-z0-9]+)(/(?P[a-z]{3}))?/?$') + + def __init__(self, uri=None): + if uri is not None: + uri = unicode(uri) + self.uri = uri + match = self._re_wl_uri.match(uri) + if not match: + raise ValueError('Supplied URI (%s) does not match ' + 'the WL document URI template.' % uri) + self.slug = match.group('slug') + self.language = match.group('lang') or self.DEFAULT_LANGUAGE - def __init__(self, uri): - self.uri = uri - match = self._re_wl_uri.match(uri) - assert match - self.slug = match.group('slug') - self.language = match.group('lang') + @classmethod + def from_slug_and_lang(cls, slug, lang): + """Contructs an URI from slug and language code. + + >>> WLURI.from_slug_and_lang('a-slug', WLURI.DEFAULT_LANGUAGE).uri + u'http://wolnelektury.pl/katalog/lektura/a-slug/' + >>> WLURI.from_slug_and_lang('a-slug', 'deu').uri + u'http://wolnelektury.pl/katalog/lektura/a-slug/deu/' + + """ + if lang is None: + lang = cls.DEFAULT_LANGUAGE + uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug + if lang is not None and lang != cls.DEFAULT_LANGUAGE: + uri += lang + '/' + return cls(uri) + + def __unicode__(self): + return self.uri + + def __str__(self): + return self.uri + + def __eq__(self, other): + return self.slug, self.language == other.slug, other.language + + def filename_stem(self): + stem = self.slug + if self.language != self.DEFAULT_LANGUAGE: + stem += '_' + self.language + return stem + + def validate_language(self, language): + if language != self.language: + raise ValidationError("Incorrect language definition in URI") class DocProvider(object): @@ -94,9 +138,9 @@ class DocProvider(object): """Should return a file-like object with a WL document XML.""" return self.by_slug_and_lang(slug) - def by_uri(self, uri): + def by_uri(self, uri, wluri=WLURI): """Should return a file-like object with a WL document XML.""" - wluri = WLURI(uri) + wluri = wluri(uri) return self.by_slug_and_lang(wluri.slug, wluri.language) @@ -109,7 +153,7 @@ class DirDocProvider(DocProvider): return super(DirDocProvider, self).__init__() def by_slug_and_lang(self, slug, lang=None): - fname = "%s%s.xml" % (slug, ".%s" % lang if lang else "") + fname = WLURI.from_slug_and_lang(slug, lang).filename_stem() + '.xml' return open(os.path.join(self.dir, fname)) @@ -124,7 +168,7 @@ DEFAULT_BOOKINFO = dcparser.BookInfo( DCNS('subject.type'): [u'Unknown'], DCNS('subject.genre'): [u'Unknown'], DCNS('date'): ['1970-01-01'], - DCNS('language'): [u'pol'], + DCNS('language'): [WLURI.DEFAULT_LANGUAGE], # DCNS('date'): [creation_date], DCNS('publisher'): [u"Fundacja Nowoczesna Polska"], DCNS('description'): @@ -132,8 +176,7 @@ DEFAULT_BOOKINFO = dcparser.BookInfo( Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN."""], - DCNS('identifier.url'): - [u"http://wolnelektury.pl/katalog/lektura/template"], + DCNS('identifier.url'): [WLURI.example], DCNS('rights'): [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })