From 918a554570309fdddb8158b963fef3b3ae3c2c2b Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Wed, 7 Dec 2011 13:01:25 +0100 Subject: [PATCH] move WL-specific stuff to WLURI --- librarian/__init__.py | 59 +++++++++++++++++++++++++++++++++++-------- librarian/dcparser.py | 11 ++++---- librarian/parser.py | 3 ++- 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/librarian/__init__.py b/librarian/__init__.py index 9c48a02..52234fe 100644 --- a/librarian/__init__.py +++ b/librarian/__init__.py @@ -67,19 +67,59 @@ WLNS = EmptyNamespace() class WLURI(object): """Represents a WL URI. Extracts slug and language from it.""" + DEFAULT_LANGUAGE = u'pol' slug = None language = None + example = 'http://wolnelektury.pl/katalog/lektura/template/' _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/lektura/' '(?P[-a-z]+)(/(?P[a-z]{3})/?)?') - def __init__(self, uri): - self.uri = uri - match = self._re_wl_uri.match(uri) - assert match - self.slug = match.group('slug') - self.language = match.group('lang') + def __init__(self, uri=None): + if uri is not None: + self.uri = uri + match = self._re_wl_uri.match(uri) + assert match + self.slug = match.group('slug') + self.language = match.group('lang') or self.DEFAULT_LANGUAGE + + @classmethod + def from_slug_and_lang(cls, slug, lang): + """Contructs an URI from slug and language code. + + >>> WLURI.from_slug_and_lang('a-slug', WLURI.DEFAULT_LANGUAGE).uri + 'http://wolnelektury.pl/katalog/lektura/a-slug/' + >>> WLURI.from_slug_and_lang('a-slug', 'deu').uri + 'http://wolnelektury.pl/katalog/lektura/a-slug/deu/' + + """ + if lang is None: + lang = self.DEFAULT_LANGUAGE + uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug + if lang is not None and lang != cls.DEFAULT_LANGUAGE: + uri += lang + '/' + instance = cls() + instance.slug = slug + instance.language = lang + instance.uri = uri + return instance + + def __unicode__(self): + return self.uri + + def __eq__(self, other): + return self.slug, self.language == other.slug, other.language + + def filename_stem(self): + stem = self.slug + if self.language != self.DEFAULT_LANGUAGE: + stem += '_' + self.language + return stem + + def validate_language(self, language): + if language != self.language: + raise ValidationError("Incorrect language definition in URI") class DocProvider(object): @@ -111,7 +151,7 @@ class DirDocProvider(DocProvider): return super(DirDocProvider, self).__init__() def by_slug_and_lang(self, slug, lang=None): - fname = "%s%s.xml" % (slug, ".%s" % lang if lang else "") + fname = WLURI.from_slug_and_lang(slug, lang).filename_stem() + '.xml' return open(os.path.join(self.dir, fname)) @@ -126,7 +166,7 @@ DEFAULT_BOOKINFO = dcparser.BookInfo( DCNS('subject.type'): [u'Unknown'], DCNS('subject.genre'): [u'Unknown'], DCNS('date'): ['1970-01-01'], - DCNS('language'): [u'pol'], + DCNS('language'): [WLURI.DEFAULT_LANGUAGE], # DCNS('date'): [creation_date], DCNS('publisher'): [u"Fundacja Nowoczesna Polska"], DCNS('description'): @@ -134,8 +174,7 @@ DEFAULT_BOOKINFO = dcparser.BookInfo( Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN."""], - DCNS('identifier.url'): - [u"http://wolnelektury.pl/katalog/lektura/template"], + DCNS('identifier.url'): [WLURI.example], DCNS('rights'): [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] }) diff --git a/librarian/dcparser.py b/librarian/dcparser.py index 5492f7a..ae8466c 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -141,7 +141,7 @@ class BookInfo(object): Field( DCNS('publisher'), 'publisher'), Field( DCNS('source'), 'source_name', required=False), Field( DCNS('source.URL'), 'source_url', required=False), - Field( DCNS('identifier.url'), 'url'), + Field( DCNS('identifier.url'), 'url', WLURI), Field( DCNS('relation.hasPart'), 'parts', multiple=True, required=False), Field( DCNS('rights.license'), 'license', required=False), Field( DCNS('rights'), 'license_description'), @@ -149,10 +149,6 @@ class BookInfo(object): Field( DCNS('description'), 'description', required=False), ) - @property - def slug(self): - return WLURI(self.url).slug - @classmethod def from_string(cls, xml): from StringIO import StringIO @@ -216,6 +212,11 @@ class BookInfo(object): self.fmap[field.name] = field if field.salias: self.fmap[field.salias] = field + self.validate() + + def validate(self): + self.url.validate_language(self.language) + def __getattribute__(self, name): try: field = object.__getattribute__(self, 'fmap')[name] diff --git a/librarian/parser.py b/librarian/parser.py index 469b7df..5ae06e2 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -191,7 +191,8 @@ class WLDocument(object): if make_author_dir: save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8')) - save_path = os.path.join(save_path, self.book_info.slug) + save_path = os.path.join(save_path, + self.book_info.uri.filename_stem()) if ext: save_path += '.%s' % ext else: -- 2.20.1