"""Represents a WL URI. Extracts slug from it."""
slug = None
- example = 'http://wolnelektury.pl/katalog/lektura/template/'
- _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektura/'
+ example = 'http://edukacjamedialna.edu.pl/'
+ _re_wl_uri = re.compile(r'http://(www\.)?edukacjamedialna.edu.pl/'
'(?P<slug>[-a-z0-9]+)/?$')
def __init__(self, uri):
u'http://wolnelektury.pl/katalog/lektura/a-slug/'
"""
- uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
+ uri = 'http://prawokultury.pl/publikacje/%s/' % slug
return cls(uri)
def __unicode__(self):
DEFAULT_BOOKINFO = dcparser.BookInfo(
{ RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
- { DCNS('creator'): [u'Some, Author'],
+ {
+ DCNS('creator.expert'): [u'Some, Author'],
+ DCNS('creator.scenario'): [u'Some, Author'],
+ DCNS('creator.textbook'): [u'Some, Author'],
DCNS('title'): [u'Some Title'],
DCNS('subject.period'): [u'Unknown'],
DCNS('subject.type'): [u'Unknown'],
return os.path.join(os.path.dirname(__file__), path)
- class OutputFile(object):
- """Represents a file returned by one of the converters."""
-
+ class IOFile(object):
+ """ Represents a file fed as input or returned as a result. """
_string = None
_filename = None
+ _filename_tmp = False
+
+ def __init__(self, attachments=None):
+ self.attachments = attachments or {}
def __del__(self):
- if self._filename:
+ if self._filename_tmp:
os.unlink(self._filename)
def __nonzero__(self):
return self._string is not None or self._filename is not None
@classmethod
- def from_string(cls, string):
+ def from_string(cls, string, *args, **kwargs):
"""Converter returns contents of a file as a string."""
- instance = cls()
+ instance = cls(*args, **kwargs)
instance._string = string
return instance
@classmethod
- def from_filename(cls, filename):
+ def from_filename(cls, filename, *args, **kwargs):
"""Converter returns contents of a file as a named file."""
- instance = cls()
+ instance = cls(*args, **kwargs)
instance._filename = filename
return instance
temp.write(self._string)
temp.close()
self._filename = temp.name
+ self._filename_tmp = True
return self._filename
else:
return None
os.makedirs(dirname)
shutil.copy(self.get_filename(), path)
+ def dump_to(self, path, directory=None):
+ """ Path should be name for main file. """
+ self.save_as(path)
+ dirname = os.path.dirname(os.path.abspath(path))
+ for filename, attachment in self.attachments.items():
+ attachment.save_as(os.path.join(dirname, filename))
+
+
+ class Format(object):
+ """ Generic format class. """
+ def __init__(self, wldoc, **kwargs):
+ self.wldoc = wldoc
+ self.customization = kwargs
+
+ def build(self):
+ raise NotImplementedError
+
class URLOpener(urllib.FancyURLopener):
version = 'FNP Librarian (http://github.com/fnp/librarian)'
ns[f.__name__] = f
-def reg_substitute_entities():
- ENTITY_SUBSTITUTIONS = [
- (u'---', u'—'),
- (u'--', u'–'),
- (u'...', u'…'),
- (u',,', u'„'),
- (u'"', u'”'),
- ]
-
- def substitute_entities(context, text):
- """XPath extension function converting all entites in passed text."""
- if isinstance(text, list):
- text = ''.join(text)
- for entity, substitutution in ENTITY_SUBSTITUTIONS:
- text = text.replace(entity, substitutution)
- return text
+ENTITY_SUBSTITUTIONS = [
+ (u'---', u'—'),
+ (u'--', u'–'),
+ (u'...', u'…'),
+ (u',,', u'„'),
+ (u'"', u'”'),
+]
+
+def substitute_entities(context, text):
+ """XPath extension function converting all entites in passed text."""
+ if isinstance(text, list):
+ text = ''.join(text)
+ for entity, substitutution in ENTITY_SUBSTITUTIONS:
+ text = text.replace(entity, substitutution)
+ return text
+
+def reg_substitute_entities():
_register_function(substitute_entities)
+def strip(context, text):
+ """Remove unneeded whitespace from beginning and end"""
+ if isinstance(text, list):
+ text = ''.join(text)
+ return re.sub(r'\s+', ' ', text).strip()
+
+
def reg_strip():
- def strip(context, text):
- """Remove unneeded whitespace from beginning and end"""
- if isinstance(text, list):
- text = ''.join(text)
- return re.sub(r'\s+', ' ', text).strip()
_register_function(strip)
+def starts_white(context, text):
+ if isinstance(text, list):
+ text = ''.join(text)
+ if not text:
+ return False
+ return text[0].isspace()
+
+
def reg_starts_white():
- def starts_white(context, text):
- if isinstance(text, list):
- text = ''.join(text)
- if not text:
- return False
- return text[0].isspace()
_register_function(starts_white)
_register_function(ends_white)
+def wrap_words(context, text, wrapping):
+ """XPath extension function automatically wrapping words in passed text"""
+ if isinstance(text, list):
+ text = ''.join(text)
+ if not wrapping:
+ return text
+
+ words = re.split(r'\s', text)
+
+ line_length = 0
+ lines = [[]]
+ for word in words:
+ line_length += len(word) + 1
+ if line_length > wrapping:
+ # Max line length was exceeded. We create new line
+ lines.append([])
+ line_length = len(word)
+ lines[-1].append(word)
+ return '\n'.join(' '.join(line) for line in lines)
+
+
def reg_wrap_words():
- def wrap_words(context, text, wrapping):
- """XPath extension function automatically wrapping words in passed text"""
- if isinstance(text, list):
- text = ''.join(text)
- if not wrapping:
- return text
-
- words = re.split(r'\s', text)
-
- line_length = 0
- lines = [[]]
- for word in words:
- line_length += len(word) + 1
- if line_length > wrapping:
- # Max line length was exceeded. We create new line
- lines.append([])
- line_length = len(word)
- lines[-1].append(word)
- return '\n'.join(' '.join(line) for line in lines)
_register_function(wrap_words)
+def person_name(context, text):
+ """ Converts "Name, Forename" to "Forename Name" """
+ if isinstance(text, list):
+ text = ''.join(text)
+ return Person.from_text(text).readable()
+
+
def reg_person_name():
- def person_name(context, text):
- """ Converts "Name, Forename" to "Forename Name" """
- if isinstance(text, list):
- text = ''.join(text)
- return Person.from_text(text).readable()
_register_function(person_name)
+def texcommand(context, text):
+ """Remove non-letters"""
+ if isinstance(text, list):
+ text = ''.join(text)
+ return re.sub(r'[^a-zA-Z]', '', text).strip()
+
+
def reg_texcommand():
- def texcommand(context, text):
- """Remove non-letters"""
- if isinstance(text, list):
- text = ''.join(text)
- return re.sub(r'[^a-zA-Z]', '', text).strip()
_register_function(texcommand)
+ def reg_get(format_):
+ def get(context, *args):
+ obj = format_
+ for arg in args:
+ if hasattr(obj, arg):
+ obj = getattr(obj, arg)
+ else:
+ try:
+ obj = obj[arg]
+ except (TypeError, KeyError), e:
+ # Just raise proper AttributeError.
+ getattr(obj, arg)
+ return obj
+ _register_function(get)
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from librarian import ValidationError, NoDublinCore, ParseError, NoProvider
- from librarian import RDFNS
- from librarian.cover import WLCover
+ from librarian import RDFNS, IOFile
+ from librarian.styles.wolnelektury.cover import WLCover
from librarian import dcparser
from xml.parsers.expat import ExpatError
LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
provider = None
- def __init__(self, edoc, parse_dublincore=True, provider=None,
- strict=False, meta_fallbacks=None):
- self.edoc = edoc
+ _edoc = None
+ @property
+ def edoc(self):
+ if self._edoc is None:
+ data = self.source.get_string()
+ if not isinstance(data, unicode):
+ data = data.decode('utf-8')
+ data = data.replace(u'\ufeff', '')
+ try:
+ parser = etree.XMLParser(remove_blank_text=False)
+ self._edoc = etree.parse(StringIO(data.encode('utf-8')), parser)
+ except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+ raise ParseError(e)
+ return self._edoc
+
+ _rdf_elem = None
+ @property
+ def rdf_elem(self):
+ if self._rdf_elem is None:
+ dc_path = './/' + RDFNS('RDF')
+ self._rdf_elem = self.edoc.getroot().find(dc_path)
+ if self._rdf_elem is None:
+ raise NoDublinCore('Document has no DublinCore - which is required.')
+ return self._rdf_elem
+
+ _book_info = None
+ @property
+ def book_info(self):
+ if not self.parse_dublincore:
+ return None
+ if self._book_info is None:
+ self._book_info = dcparser.BookInfo.from_element(
+ self.rdf_elem, fallbacks=self.meta_fallbacks, strict=self.strict)
+ return self._book_info
+
+ def __init__(self, iofile, provider=None,
+ parse_dublincore=True, # shouldn't it be in a subclass?
+ strict=False, # ?
+ meta_fallbacks=None # ?
+ ):
+ self.source = iofile
self.provider = provider
-
- root_elem = edoc.getroot()
-
- dc_path = './/' + RDFNS('RDF')
-
- if root_elem.tag != 'utwor':
+ self.parse_dublincore = parse_dublincore
+ self.strict = strict
+ self.meta_fallbacks = meta_fallbacks
+ if self.edoc.getroot().tag != 'utwor':
raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
-
if parse_dublincore:
- self.rdf_elem = root_elem.find(dc_path)
-
- if self.rdf_elem is None:
- raise NoDublinCore('Document has no DublinCore - which is required.')
-
- self.book_info = dcparser.BookInfo.from_element(
- self.rdf_elem, fallbacks=meta_fallbacks, strict=strict)
- else:
- self.book_info = None
+ self.book_info
@classmethod
def from_string(cls, xml, *args, **kwargs):
- return cls.from_file(StringIO(xml), *args, **kwargs)
+ return cls(IOFile.from_string(xml), *args, **kwargs)
@classmethod
def from_file(cls, xmlfile, *args, **kwargs):
-
- # first, prepare for parsing
if isinstance(xmlfile, basestring):
- file = open(xmlfile, 'rb')
- try:
- data = file.read()
- finally:
- file.close()
+ iofile = IOFile.from_filename(xmlfile)
else:
- data = xmlfile.read()
-
- if not isinstance(data, unicode):
- data = data.decode('utf-8')
+ iofile = IOFile.from_file(xmlfile)
+ return cls(iofile, *args, **kwargs)
- data = data.replace(u'\ufeff', '')
-
- try:
- parser = etree.XMLParser(remove_blank_text=False)
- tree = etree.parse(StringIO(data.encode('utf-8')), parser)
-
- return cls(tree, *args, **kwargs)
- except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
- raise ParseError(e)
def swap_endlines(self):
"""Converts line breaks in stanzas into <br/> tags."""
elem.text = chunks.pop(0)
def parts(self):
- if self.provider is None:
- raise NoProvider('No document provider supplied.')
if self.book_info is None:
raise NoDublinCore('No Dublin Core in document.')
+ if self.book_info.parts and self.provider is None:
+ raise NoProvider('No document provider supplied.')
for part_uri in self.book_info.parts:
yield self.from_file(self.provider.by_uri(part_uri),
provider=self.provider)
# Converters
def as_html(self, *args, **kwargs):
- from librarian import html
+ from librarian import pyhtml as html
return html.transform(self, *args, **kwargs)
def as_text(self, *args, **kwargs):