Stable version 1.2.5.
authorMarek Stępniowski <marek@stepniowski.com>
Thu, 3 Sep 2009 14:39:08 +0000 (16:39 +0200)
committerŁukasz Rekucki <lrekucki@gmail.com>
Fri, 19 Mar 2010 16:17:17 +0000 (17:17 +0100)
28 files changed:
.gitignore
MANIFEST.in
ez_setup.py [new file with mode: 0644]
librarian/__init__.py
librarian/book2html.xslt
librarian/config.xml [new file with mode: 0644]
librarian/dcparser.py
librarian/html.py
librarian/parser.py
librarian/text.py
librarian/wl2html_base.xslt [new file with mode: 0644]
librarian/wl2html_full.xslt [new file with mode: 0644]
librarian/wl2html_partial.xslt [new file with mode: 0644]
scripts/book2html
scripts/book2ihtml [new file with mode: 0755]
scripts/book2txt
setup.cfg [new file with mode: 0644]
setup.py
tests/files/text/asnyk_miedzy_nami.txt [deleted file]
tests/files/text/asnyk_miedzy_nami.xml [changed mode: 0644->0755]
tests/files/text/asnyk_miedzy_nami_expected.html [new file with mode: 0644]
tests/files/text/asnyk_miedzy_nami_expected.txt [new file with mode: 0644]
tests/files/text/asnyk_miedzy_nami_nodc.txt [new file with mode: 0644]
tests/files/text/asnyk_miedzy_nami_nodc.xml [new file with mode: 0644]
tests/test_dcparser.py [changed mode: 0755->0644]
tests/test_html.py [new file with mode: 0644]
tests/test_text.py [changed mode: 0755->0644]
tests/utils.py

index bfdc1af..f72688b 100644 (file)
@@ -3,5 +3,5 @@
 MANIFEST
 dist
 build
 MANIFEST
 dist
 build
-nbproject
-nbproject/*
+*.egg-info
+.coverage
index 9b7ec3d..38ee542 100644 (file)
@@ -1,2 +1,2 @@
-include librarian/*.xslt
-recursive-include tests/files/ *.xml
+include librarian/*.xslt 
+include librarian/config.xml
diff --git a/ez_setup.py b/ez_setup.py
new file mode 100644 (file)
index 0000000..4a84fea
--- /dev/null
@@ -0,0 +1,275 @@
+#!python
+"""Bootstrap setuptools installation
+
+If you want to use setuptools in your package's setup.py, just include this
+file in the same directory with it, and add this to the top of your setup.py::
+
+    from ez_setup import use_setuptools
+    use_setuptools()
+
+If you want to require a specific version of setuptools, set a download
+mirror, or use an alternate download directory, you can do so by supplying
+the appropriate options to ``use_setuptools()``.
+
+This file can also be run as a script to install or upgrade setuptools.
+"""
+import sys
+DEFAULT_VERSION = "0.6c9"
+DEFAULT_URL     = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3]
+
+md5_data = {
+    'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca',
+    'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb',
+    'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b',
+    'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a',
+    'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618',
+    'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac',
+    'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5',
+    'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4',
+    'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c',
+    'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b',
+    'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27',
+    'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277',
+    'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa',
+    'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e',
+    'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e',
+    'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f',
+    'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2',
+    'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc',
+    'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167',
+    'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64',
+    'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d',
+    'setuptools-0.6c6-py2.3.egg': '35686b78116a668847237b69d549ec20',
+    'setuptools-0.6c6-py2.4.egg': '3c56af57be3225019260a644430065ab',
+    'setuptools-0.6c6-py2.5.egg': 'b2f8a7520709a5b34f80946de5f02f53',
+    'setuptools-0.6c7-py2.3.egg': '209fdf9adc3a615e5115b725658e13e2',
+    'setuptools-0.6c7-py2.4.egg': '5a8f954807d46a0fb67cf1f26c55a82e',
+    'setuptools-0.6c7-py2.5.egg': '45d2ad28f9750e7434111fde831e8372',
+    'setuptools-0.6c8-py2.3.egg': '50759d29b349db8cfd807ba8303f1902',
+    'setuptools-0.6c8-py2.4.egg': 'cba38d74f7d483c06e9daa6070cce6de',
+    'setuptools-0.6c8-py2.5.egg': '1721747ee329dc150590a58b3e1ac95b',
+    'setuptools-0.6c9-py2.3.egg': 'a83c4020414807b496e4cfbe08507c03',
+    'setuptools-0.6c9-py2.4.egg': '260a2be2e5388d66bdaee06abec6342a',
+    'setuptools-0.6c9-py2.5.egg': 'fe67c3e5a17b12c0e7c541b7ea43a8e6',
+    'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a',
+}
+
+import sys, os
+try: from hashlib import md5
+except ImportError: from md5 import md5
+
+def _validate_md5(egg_name, data):
+    if egg_name in md5_data:
+        digest = md5(data).hexdigest()
+        if digest != md5_data[egg_name]:
+            print >>sys.stderr, (
+                "md5 validation of %s failed!  (Possible download problem?)"
+                % egg_name
+            )
+            sys.exit(2)
+    return data
+
+def use_setuptools(
+    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
+    download_delay=15
+):
+    """Automatically find/download setuptools and make it available on sys.path
+
+    `version` should be a valid setuptools version number that is available
+    as an egg for download under the `download_base` URL (which should end with
+    a '/').  `to_dir` is the directory where setuptools will be downloaded, if
+    it is not already available.  If `download_delay` is specified, it should
+    be the number of seconds that will be paused before initiating a download,
+    should one be required.  If an older version of setuptools is installed,
+    this routine will print a message to ``sys.stderr`` and raise SystemExit in
+    an attempt to abort the calling script.
+    """
+    was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules
+    def do_download():
+        egg = download_setuptools(version, download_base, to_dir, download_delay)
+        sys.path.insert(0, egg)
+        import setuptools; setuptools.bootstrap_install_from = egg
+    try:
+        import pkg_resources
+    except ImportError:
+        return do_download()       
+    try:
+        pkg_resources.require("setuptools>="+version); return
+    except pkg_resources.VersionConflict, e:
+        if was_imported:
+            print >>sys.stderr, (
+            "The required version of setuptools (>=%s) is not available, and\n"
+            "can't be installed while this script is running. Please install\n"
+            " a more recent version first, using 'easy_install -U setuptools'."
+            "\n\n(Currently using %r)"
+            ) % (version, e.args[0])
+            sys.exit(2)
+        else:
+            del pkg_resources, sys.modules['pkg_resources']    # reload ok
+            return do_download()
+    except pkg_resources.DistributionNotFound:
+        return do_download()
+
+def download_setuptools(
+    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
+    delay = 15
+):
+    """Download setuptools from a specified location and return its filename
+
+    `version` should be a valid setuptools version number that is available
+    as an egg for download under the `download_base` URL (which should end
+    with a '/'). `to_dir` is the directory where the egg will be downloaded.
+    `delay` is the number of seconds to pause before an actual download attempt.
+    """
+    import urllib2, shutil
+    egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3])
+    url = download_base + egg_name
+    saveto = os.path.join(to_dir, egg_name)
+    src = dst = None
+    if not os.path.exists(saveto):  # Avoid repeated downloads
+        try:
+            from distutils import log
+            if delay:
+                log.warn("""
+---------------------------------------------------------------------------
+This script requires setuptools version %s to run (even to display
+help).  I will attempt to download it for you (from
+%s), but
+you may need to enable firewall access for this script first.
+I will start the download in %d seconds.
+
+(Note: if this machine does not have network access, please obtain the file
+
+   %s
+
+and place it in this directory before rerunning this script.)
+---------------------------------------------------------------------------""",
+                    version, download_base, delay, url
+                ); from time import sleep; sleep(delay)
+            log.warn("Downloading %s", url)
+            src = urllib2.urlopen(url)
+            # Read/write all in one block, so we don't create a corrupt file
+            # if the download is interrupted.
+            data = _validate_md5(egg_name, src.read())
+            dst = open(saveto,"wb"); dst.write(data)
+        finally:
+            if src: src.close()
+            if dst: dst.close()
+    return os.path.realpath(saveto)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def main(argv, version=DEFAULT_VERSION):
+    """Install or upgrade setuptools and EasyInstall"""
+    try:
+        import setuptools
+    except ImportError:
+        egg = None
+        try:
+            egg = download_setuptools(version, delay=0)
+            sys.path.insert(0,egg)
+            from setuptools.command.easy_install import main
+            return main(list(argv)+[egg])   # we're done here
+        finally:
+            if egg and os.path.exists(egg):
+                os.unlink(egg)
+    else:
+        if setuptools.__version__ == '0.0.1':
+            print >>sys.stderr, (
+            "You have an obsolete version of setuptools installed.  Please\n"
+            "remove it from your system entirely before rerunning this script."
+            )
+            sys.exit(2)
+
+    req = "setuptools>="+version
+    import pkg_resources
+    try:
+        pkg_resources.require(req)
+    except pkg_resources.VersionConflict:
+        try:
+            from setuptools.command.easy_install import main
+        except ImportError:
+            from easy_install import main
+        main(list(argv)+[download_setuptools(delay=0)])
+        sys.exit(0) # try to force an exit
+    else:
+        if argv:
+            from setuptools.command.easy_install import main
+            main(argv)
+        else:
+            print "Setuptools version",version,"or greater has been installed."
+            print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)'
+
+def update_md5(filenames):
+    """Update our built-in md5 registry"""
+
+    import re
+
+    for name in filenames:
+        base = os.path.basename(name)
+        f = open(name,'rb')
+        md5_data[base] = md5(f.read()).hexdigest()
+        f.close()
+
+    data = ["    %r: %r,\n" % it for it in md5_data.items()]
+    data.sort()
+    repl = "".join(data)
+
+    import inspect
+    srcfile = inspect.getsourcefile(sys.modules[__name__])
+    f = open(srcfile, 'rb'); src = f.read(); f.close()
+
+    match = re.search("\nmd5_data = {\n([^}]+)}", src)
+    if not match:
+        print >>sys.stderr, "Internal error!"
+        sys.exit(2)
+
+    src = src[:match.start(1)] + repl + src[match.end(1):]
+    f = open(srcfile,'w')
+    f.write(src)
+    f.close()
+
+
+if __name__=='__main__':
+    if len(sys.argv)>2 and sys.argv[1]=='--md5update':
+        update_md5(sys.argv[2:])
+    else:
+        main(sys.argv[1:])
+
+
+
+
+
index 9132f5c..5997a4e 100644 (file)
@@ -2,10 +2,87 @@
 # exception classes
 
 class ParseError(Exception):
 # exception classes
 
 class ParseError(Exception):
-    pass
+    
+    def __init__(self, cause, message=None):
+        self.cause = cause
+        try:
+            self.message = message or self.cause.message
+        except:
+            self.message = "No message."
 
 class ValidationError(Exception):
     pass
 
 class NoDublinCore(ValidationError):
     pass
 
 class ValidationError(Exception):
     pass
 
 class NoDublinCore(ValidationError):
     pass
+
+class XMLNamespace(object):
+    '''A handy structure to repsent names in an XML namespace.'''
+
+    def __init__(self, uri):
+        self.uri = uri
+
+    def __call__(self, tag):
+        return '{%s}%s' % (self.uri, tag)
+
+    def __contains__(self, tag):
+        return tag.startswith('{'+str(self)+'}')
+
+    def __repr__(self):
+        return 'XMLNamespace(%r)' % self.uri
+
+    def __str__(self):
+        return '%s' % self.uri
+
+class EmptyNamespace(XMLNamespace):
+    def __init__(self):
+        super(EmptyNamespace, self).__init__('')
+
+    def __call__(self, tag):
+        return tag
+
+# some common namespaces we use
+RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
+DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
+XINS = XMLNamespace("http://www.w3.org/2001/XInclude")
+XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
+
+WLNS = EmptyNamespace()
+
+import lxml.etree as etree
+import dcparser
+
+DEFAULT_BOOKINFO = dcparser.BookInfo(
+        { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},\
+        { DCNS('creator'): [u'Some, Author'],
+          DCNS('title'): [u'Some Title'],
+          DCNS('subject.period'): [u'Unknown'],
+          DCNS('subject.type'): [u'Unknown'],
+          DCNS('subject.genre'): [u'Unknown'],
+          DCNS('date'): ['1970-01-01'],
+          # DCNS('date'): [creation_date],
+          DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
+          DCNS('description'):
+          [u"""Publikacja zrealizowana w ramach projektu
+             Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
+             wykonana przez Bibliotekę Narodową z egzemplarza
+             pochodzącego ze zbiorów BN."""],
+          DCNS('identifier.url'):
+            [u"http://wolnelektury.pl/katalog/lektura/template"],
+          DCNS('rights'):
+            [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })
+
+def xinclude_forURI(uri):
+    e = etree.Element( XINS("include") )
+    e.set("href", uri)
+    return etree.tostring(e, encoding=unicode)
+    
+def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
+    """Wrap the text within the minimal XML structure with a DC template."""
+    bookinfo.created_at = creation_date
+    
+    dcstring = etree.tostring(bookinfo.to_etree(),\
+        method='xml', encoding=unicode, pretty_print=True)
+
+    return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext +\
+        u'\n</plain-text>\n</utwor>';
\ No newline at end of file
index 71f1182..369b542 100644 (file)
 <!-- ========================================== -->
 <!-- Title page -->
 <xsl:template match="autor_utworu" mode="header">
 <!-- ========================================== -->
 <!-- Title page -->
 <xsl:template match="autor_utworu" mode="header">
-    <span class="author"><xsl:apply-templates mode="inline" /></span>
+    <span class="author editable"><xsl:apply-templates mode="inline" /></span>
 </xsl:template>
 
 <xsl:template match="nazwa_utworu" mode="header">
 </xsl:template>
 
 <xsl:template match="nazwa_utworu" mode="header">
-    <span class="title"><xsl:apply-templates mode="inline" /></span>
+    <span class="title editable"><xsl:apply-templates mode="inline" /></span>
 </xsl:template>
 
 <xsl:template match="dzielo_nadrzedne" mode="header">
 </xsl:template>
 
 <xsl:template match="dzielo_nadrzedne" mode="header">
 </xsl:template>
 
 <xsl:template match="akap|akap_dialog|akap_cd">
 </xsl:template>
 
 <xsl:template match="akap|akap_dialog|akap_cd">
-    <p class="paragraph"><xsl:apply-templates mode="inline" /></p>
+    <p class="paragraph editable"><xsl:apply-templates mode="inline" /></p>
 </xsl:template>
 
 <xsl:template match="strofa">
 </xsl:template>
 
 <xsl:template match="strofa">
-    <div class="stanza">
+    <div class="stanza editable">
         <xsl:choose>
             <xsl:when test="count(br) > 0">     
                 <xsl:call-template name="verse">
         <xsl:choose>
             <xsl:when test="count(br) > 0">     
                 <xsl:call-template name="verse">
 </xsl:template>
 
 
 </xsl:template>
 
 
-</xsl:stylesheet>
-
+</xsl:stylesheet>
\ No newline at end of file
diff --git a/librarian/config.xml b/librarian/config.xml
new file mode 100644 (file)
index 0000000..e1f4b6f
--- /dev/null
@@ -0,0 +1,125 @@
+<config>
+    <block-elements>              
+        <!-- tagi głowne -->
+        <utwor />
+        <opowiadanie />
+        <liryka_l />
+        <liryka_lp />
+        <powiesc />
+        <dramat_wierszowany_l />
+        <dramat_wierszowany_lp />
+        <dramat_wspolczesny />
+
+        <!-- inne tagi -->
+        <nota />
+        <dedykacja />
+        <kwestia />
+        <motto />
+        <didaskalia />
+    </block-elements>
+
+    <inline-elements>       
+        <!-- with emphasis -->
+        <mat />
+        <didask_tekst />
+        <slowo_obce />
+        <wyroznienie />
+        <osoba />
+        <tytul_dziela />
+    </inline-elements>
+
+    <paragraph-elements>
+        <!-- akapity -->
+        <akap />
+        <akap_cd />
+        <akap_dialog />
+        <miejsce_czas />
+        <motto_podpis />
+        <wers_cd />
+        <wers_akap />
+        <wers_wciety />
+    </paragraph-elements>        
+        
+    <header-1-elements>
+        <!-- placeholder -->        
+        <dzielo_nadrzedne />
+    </header-1-elements>
+    
+    <header-2-elements>
+        <naglowek_akt />        
+        <naglowek_czesc />
+        <autor_utworu />
+        <nazwa_utworu />
+        <srodtytul />
+    </header-2-elements>
+    
+    <header-3-elements>
+        <naglowek_scena />
+        <naglowek_rozdzial />
+        <podtytul />
+    </header-3-elements>
+    
+    <header-4-elements>
+        <naglowek_osoba />
+        <naglowek_podrozdzial />
+    </header-4-elements>
+
+    <special-tags>
+        <strofa />
+        <lista_osob />
+        <lista_osoba />
+        <sekcja_swiatlo />
+        <sekcja_asterysk />
+        <separator_linia />
+        <zastepnik_wersu />
+        <dlugi_cytat />
+    </special-tags>
+
+    <annotations>
+        <pa />
+        <pe />
+        <pr />
+        <pt />
+    </annotations>
+    
+    <no-show-elements>
+        <begin />
+        <end />
+        <extra />
+        <uwaga />
+        <motyw />        
+        <br />
+        <pa />
+        <pe />
+        <pr />
+        <pt />
+    </no-show-elements>
+
+    <editable>
+        <strofa />
+        <akap />
+        <akap_cd />
+        <akap_dialog />
+        <dzielo_nadrzedne />
+
+        <naglowek_akt />
+        <naglowek_czesc />
+        <autor_utworu />
+        <nazwa_utworu />
+        <srodtytul />
+
+        <naglowek_scena />
+        <naglowek_rozdzial />
+        <podtytul />
+
+        <naglowek_osoba />
+        <naglowek_podrozdzial />
+
+        <lista_osoba />
+
+        <dlugi_cytat />
+        <poezja_cyt />
+
+        <didaskalia />
+    </editable>
+</config>
\ No newline at end of file
index 830b089..80d6247 100644 (file)
@@ -3,7 +3,7 @@ from xml.parsers.expat import ExpatError
 from datetime import date
 import time
 
 from datetime import date
 import time
 
-from librarian import ValidationError, NoDublinCore
+from librarian import ValidationError, NoDublinCore, ParseError, DCNS, RDFNS
 
 import lxml.etree as etree # ElementTree API using libxml2
 from lxml.etree import XMLSyntaxError
 
 import lxml.etree as etree # ElementTree API using libxml2
 from lxml.etree import XMLSyntaxError
@@ -105,54 +105,31 @@ class Field(object):
 
         return self.validate_value(f)
 
 
         return self.validate_value(f)
 
-# ==========
-# = Parser =
-# ==========
 
 
-class XMLNamespace(object):
-    '''Represents XML namespace.'''
-    
-    def __init__(self, uri):
-        self.uri = uri
-
-    def __call__(self, tag):
-        return '{%s}%s' % (self.uri, tag)
 
 
-    def __contains__(self, tag):
-        return tag.startswith(str(self))
 
 
-    def __repr__(self):
-        return 'XMLNamespace(%r)' % self.uri
-    
-    def __str__(self):
-        return '%s' % self.uri
-
-
-class BookInfo(object):
-    RDF = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
-    DC = XMLNamespace('http://purl.org/dc/elements/1.1/')
-    
+class BookInfo(object):    
     FIELDS = (
     FIELDS = (
-        Field( DC('creator'), 'author', as_person),
-        Field( DC('title'), 'title'),
-        Field( DC('subject.period'), 'epoches', salias='epoch', multiple=True),
-        Field( DC('subject.type'), 'kinds', salias='kind', multiple=True),
-        Field( DC('subject.genre'), 'genres', salias='genre', multiple=True),
-        Field( DC('date'), 'created_at', as_date),
-        Field( DC('date.pd'), 'released_to_public_domain_at', as_date, required=False),
-        Field( DC('contributor.editor'), 'editors', \
+        Field( DCNS('creator'), 'author', as_person),
+        Field( DCNS('title'), 'title'),
+        Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
+        Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
+        Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True),
+        Field( DCNS('date'), 'created_at', as_date),
+        Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
+        Field( DCNS('contributor.editor'), 'editors', \
             as_person, salias='editor', multiple=True, default=[]),
             as_person, salias='editor', multiple=True, default=[]),
-        Field( DC('contributor.translator'), 'translators', \
+        Field( DCNS('contributor.translator'), 'translators', \
             as_person,  salias='translator', multiple=True, default=[]),
             as_person,  salias='translator', multiple=True, default=[]),
-        Field( DC('contributor.technical_editor'), 'technical_editors',
+        Field( DCNS('contributor.technical_editor'), 'technical_editors',
             as_person, salias='technical_editor', multiple=True, default=[]),
             as_person, salias='technical_editor', multiple=True, default=[]),
-        Field( DC('publisher'), 'publisher'),
-        Field( DC('source'), 'source_name', required=False),
-        Field( DC('source.URL'), 'source_url', required=False),
-        Field( DC('identifier.url'), 'url'),
-        Field( DC('relation.hasPart'), 'parts', multiple=True, required=False),
-        Field( DC('rights.license'), 'license', required=False),
-        Field( DC('rights'), 'license_description'), 
+        Field( DCNS('publisher'), 'publisher'),
+        Field( DCNS('source'), 'source_name', required=False),
+        Field( DCNS('source.URL'), 'source_url', required=False),
+        Field( DCNS('identifier.url'), 'url'),
+        Field( DCNS('relation.hasPart'), 'parts', multiple=True, required=False),
+        Field( DCNS('rights.license'), 'license', required=False),
+        Field( DCNS('rights'), 'license_description'),
     )
 
     @classmethod
     )
 
     @classmethod
@@ -166,7 +143,7 @@ class BookInfo(object):
         try:
             iter = etree.iterparse(xmlfile, ['start', 'end'])            
             for (event, element) in iter:
         try:
             iter = etree.iterparse(xmlfile, ['start', 'end'])            
             for (event, element) in iter:
-                if element.tag == cls.RDF('RDF') and event == 'start':
+                if element.tag == RDFNS('RDF') and event == 'start':
                     desc_tag = element
                     break
 
                     desc_tag = element
                     break
 
@@ -176,7 +153,7 @@ class BookInfo(object):
 
             # continue 'till the end of RDF section
             for (event, element) in iter:
 
             # continue 'till the end of RDF section
             for (event, element) in iter:
-                if element.tag == cls.RDF('RDF') and event == 'end':
+                if element.tag == RDFNS('RDF') and event == 'end':
                     break
 
             # if there is no end, Expat should yell at us with an ExpatError
                     break
 
             # if there is no end, Expat should yell at us with an ExpatError
@@ -192,7 +169,7 @@ class BookInfo(object):
     def from_element(cls, rdf_tag):
         # the tree is already parsed, so we don't need to worry about Expat errors
         field_dict = {}
     def from_element(cls, rdf_tag):
         # the tree is already parsed, so we don't need to worry about Expat errors
         field_dict = {}
-        desc = rdf_tag.find(".//" + cls.RDF('Description') )        
+        desc = rdf_tag.find(".//" + RDFNS('Description') )
         
         if desc is None:
             raise NoDublinCore("No DublinCore section found.")
         
         if desc is None:
             raise NoDublinCore("No DublinCore section found.")
@@ -202,14 +179,14 @@ class BookInfo(object):
             fv.append(e.text)
             field_dict[e.tag] = fv
                 
             fv.append(e.text)
             field_dict[e.tag] = fv
                 
-        return cls( desc.attrib, field_dict )        
+        return cls( desc.attrib, field_dict )
 
     def __init__(self, rdf_attrs, dc_fields):
         """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description.
         dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the 
         given field. """
 
 
     def __init__(self, rdf_attrs, dc_fields):
         """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description.
         dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the 
         given field. """
 
-        self.about = rdf_attrs.get(self.RDF('about'))
+        self.about = rdf_attrs.get(RDFNS('about'))
         self.fmap = {}
 
         for field in self.FIELDS:
         self.fmap = {}
 
         for field in self.FIELDS:
@@ -258,14 +235,14 @@ class BookInfo(object):
         #etree._namespace_map[str(self.DC)] = 'dc'
         
         if parent is None:
         #etree._namespace_map[str(self.DC)] = 'dc'
         
         if parent is None:
-            root = etree.Element(self.RDF('RDF'))
+            root = etree.Element(RDFNS('RDF'))
         else:
         else:
-            root = parent.makeelement(self.RDF('RDF'))
+            root = parent.makeelement(RDFNS('RDF'))
 
 
-        description = etree.SubElement(root, self.RDF('Description'))
+        description = etree.SubElement(root, RDFNS('Description'))
         
         if self.about:
         
         if self.about:
-            description.set(self.RDF('about'), self.about)
+            description.set(RDFNS('about'), self.about)
         
         for field in self.FIELDS:
             v = getattr(self, field.name, None)
         
         for field in self.FIELDS:
             v = getattr(self, field.name, None)
@@ -283,6 +260,25 @@ class BookInfo(object):
         
         return root
 
         
         return root
 
+
+    def serialize(self):
+        rdf = {}
+        rdf['about'] = { 'uri': RDFNS('about'), 'value': self.about }
+
+        dc = {}
+        for field in self.FIELDS:
+            v = getattr(self, field.name, None)
+            if v is not None:
+                if field.multiple:
+                    if len(v) == 0: continue
+                    v = [ unicode(x) for x in v if v is not None ]
+                else:
+                    v = unicode(v)
+                    
+                dc[field.name] = {'uri': field.uri, 'value': v}
+        rdf['fields'] = dc
+        return rdf
+
     def to_dict(self):
         result = {'about': self.about}
         for field in self.FIELDS:
     def to_dict(self):
         result = {'about': self.about}
         for field in self.FIELDS:
@@ -291,14 +287,14 @@ class BookInfo(object):
             if v is not None:
                 if field.multiple:
                     if len(v) == 0: continue
             if v is not None:
                 if field.multiple:
                     if len(v) == 0: continue
-                    v = [ unicode(x) for x in v ]
+                    v = [ unicode(x) for x in v if v is not None ]
                 else:
                     v = unicode(v)
                 result[field.name] = v
 
             if field.salias:
                 v = getattr(self, field.salias)
                 else:
                     v = unicode(v)
                 result[field.name] = v
 
             if field.salias:
                 v = getattr(self, field.salias)
-                if v is not None: result[field.salias] = v
+                if v is not None: result[field.salias] = unicode(v)
         
         return result
 
         
         return result
 
index 4edbf33..6551995 100644 (file)
@@ -6,7 +6,9 @@ import copy
 
 from lxml import etree
 from librarian.parser import WLDocument
 
 from lxml import etree
 from librarian.parser import WLDocument
+from librarian import XHTMLNS, ParseError
 
 
+from lxml.etree import XMLSyntaxError, XSLTApplyError
 
 ENTITY_SUBSTITUTIONS = [
     (u'---', u'—'),
 
 ENTITY_SUBSTITUTIONS = [
     (u'---', u'—'),
@@ -16,6 +18,14 @@ ENTITY_SUBSTITUTIONS = [
     (u'"', u'”'),
 ]
 
     (u'"', u'”'),
 ]
 
+STYLESHEETS = {
+    'legacy': 'book2html.xslt',
+    'full': 'wl2html_full.xslt',
+    'partial': 'wl2html_partial.xslt'
+}
+
+def get_stylesheet(name):
+    return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
 
 def substitute_entities(context, text):
     """XPath extension function converting all entites in passed text."""
 
 def substitute_entities(context, text):
     """XPath extension function converting all entites in passed text."""
@@ -25,38 +35,44 @@ def substitute_entities(context, text):
         text = text.replace(entity, substitutution)
     return text
 
         text = text.replace(entity, substitutution)
     return text
 
-
 # Register substitute_entities function with lxml
 ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
 ns['substitute_entities'] = substitute_entities
 
 # Register substitute_entities function with lxml
 ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
 ns['substitute_entities'] = substitute_entities
 
-
-def transform(input, output_filename=None, is_file=True):
+def transform(input, output_filename=None, is_file=True, \
+    parse_dublincore=True, stylesheet='legacy', options={}):
     """Transforms file input_filename in XML to output_filename in XHTML."""
     # Parse XSLT
     """Transforms file input_filename in XML to output_filename in XHTML."""
     # Parse XSLT
-    style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt')
-    style = etree.parse(style_filename)
+    try:
+        style_filename = get_stylesheet(stylesheet)
+        style = etree.parse(style_filename)
 
 
-    if is_file:
-        document = WLDocument.from_file(input, True)
-    else:
-        document = WLDocument.from_string(input, True)
-
-    result = document.transform(style)
-    del document # no longer needed large object :)
+        if is_file:
+            document = WLDocument.from_file(input, True, \
+                parse_dublincore=parse_dublincore)
+        else:
+            document = WLDocument.from_string(input, True, \
+                parse_dublincore=parse_dublincore)
 
 
-    if result.find('//p') is not None:
-        add_anchors(result.getroot())
-        add_table_of_contents(result.getroot())
+        result = document.transform(style, **options)
+        del document # no longer needed large object :)        
         
         
-        if output_filename is not None:
-            result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8')
+        if etree.ETXPath('//p|//{%s}p' % str(XHTMLNS))(result) is not None:
+            add_anchors(result.getroot())
+            add_table_of_contents(result.getroot())
+        
+            if output_filename is not None:
+                result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8')
+            else:
+                return result
+            return True
         else:
         else:
-            return result
-        return True
-    else:
-        return False
-
+            print "[Librarian] didn't find any paragraphs"
+            return "<empty />"
+    except KeyError:
+        raise ValueError("'%s' is not a valid stylesheet.")
+    except (XMLSyntaxError, XSLTApplyError), e:
+        raise ParseError(e)
 
 class Fragment(object):
     def __init__(self, id, themes):
 
 class Fragment(object):
     def __init__(self, id, themes):
index 595dd97..55b4e4b 100644 (file)
@@ -1,8 +1,11 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from librarian import ValidationError, NoDublinCore, dcparser, ParseError
+from librarian import ValidationError, NoDublinCore,  ParseError
+from librarian import RDFNS, DCNS
+from librarian import dcparser
+
 from xml.parsers.expat import ExpatError
 from lxml import etree
 from xml.parsers.expat import ExpatError
 from lxml import etree
-from lxml.etree import XMLSyntaxError
+from lxml.etree import XMLSyntaxError, XSLTApplyError
 
 import re
 from StringIO import StringIO
 
 import re
 from StringIO import StringIO
@@ -10,29 +13,32 @@ from StringIO import StringIO
 class WLDocument(object):
     LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
 
 class WLDocument(object):
     LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
 
-    def __init__(self, edoc):
+    def __init__(self, edoc, parse_dublincore=True):
         self.edoc = edoc
 
         root_elem = edoc.getroot()
         self.edoc = edoc
 
         root_elem = edoc.getroot()
-        rdf_ns = dcparser.BookInfo.RDF
-        dc_path = './/' + rdf_ns('RDF')
+       
+        dc_path = './/' + RDFNS('RDF')
         
         if root_elem.tag != 'utwor':
             raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
 
         
         if root_elem.tag != 'utwor':
             raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
 
-        self.rdf_elem = root_elem.find(dc_path)
-
-        if self.rdf_elem is None:
-            raise NoDublinCore('Document has no DublinCore - which is required.')
-
-        self.book_info = dcparser.BookInfo.from_element(self.rdf_elem)
+        if parse_dublincore:
+            self.rdf_elem = root_elem.find(dc_path)
 
 
+            if self.rdf_elem is None:
+                raise NoDublinCore('Document has no DublinCore - which is required.')
+            
+            self.book_info = dcparser.BookInfo.from_element(self.rdf_elem)
+        else:
+            self.book_info = None
+    
     @classmethod
     @classmethod
-    def from_string(cls, xml, swap_endlines=False):
-        return cls.from_file(StringIO(xml), swap_endlines)
+    def from_string(cls, xml, swap_endlines=False, parse_dublincore=True):
+        return cls.from_file(StringIO(xml), swap_endlines, parse_dublincore=parse_dublincore)
 
     @classmethod
 
     @classmethod
-    def from_file(cls, xmlfile, swap_endlines=False):
+    def from_file(cls, xmlfile, swap_endlines=False, parse_dublincore=True):
 
         # first, prepare for parsing
         if isinstance(xmlfile, basestring):
 
         # first, prepare for parsing
         if isinstance(xmlfile, basestring):
@@ -52,11 +58,38 @@ class WLDocument(object):
     
         try:
             parser = etree.XMLParser(remove_blank_text=True)
     
         try:
             parser = etree.XMLParser(remove_blank_text=True)
-            return cls( etree.parse(StringIO(data), parser) )
-        except XMLSyntaxError, e:
-             raise ParseError(e.message)            
-        except ExpatError, e:
-            raise ParseError(e.message)            
+            return cls(etree.parse(StringIO(data), parser), parse_dublincore=parse_dublincore)
+        except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+            raise ParseError(e)                  
+
+    def part_as_text(self, path):
+        # convert the path to XPath        
+        print "[L] Retrieving part:", path
+
+        elems = self.edoc.xpath(self.path_to_xpath(path))
+        print "[L] xpath", elems
+        
+        if len(elems) == 0:
+            return None        
+
+        return etree.tostring(elems[0], encoding=unicode, pretty_print=True)
+
+
+    def path_to_xpath(self, path):
+        parts = []
+
+        for part in path.split('/'):
+            match = re.match(r'([^\[]+)\[(\d+)\]', part)
+            if not match:
+                parts.append(part)
+            else:
+                tag, n = match.groups()
+                parts.append("node()[position() = %d and name() = '%s']" % (int(n), tag) )
+
+        if parts[0] == '.':
+            parts[0] = ''
+
+        return '/'.join(parts)
 
     def transform(self, stylesheet, **options):
         return self.edoc.xslt(stylesheet, **options)
 
     def transform(self, stylesheet, **options):
         return self.edoc.xslt(stylesheet, **options)
@@ -68,3 +101,17 @@ class WLDocument(object):
     def serialize(self):
         self.update_dc()
         return etree.tostring(self.edoc, encoding=unicode, pretty_print=True)
     def serialize(self):
         self.update_dc()
         return etree.tostring(self.edoc, encoding=unicode, pretty_print=True)
+
+    def merge_chunks(self, chunk_dict):
+        unmerged = []
+
+        for key, data in chunk_dict.iteritems():
+            try:
+                xpath = self.path_to_xpath(key)
+                node = self.edoc.xpath(xpath)[0]                
+                repl = etree.fromstring(data)
+                node.getparent().replace(node, repl);
+            except Exception, e:
+                unmerged.append( repr( (key, xpath, e) ) )
+
+        return unmerged
\ No newline at end of file
index 0754a99..972dd61 100644 (file)
@@ -1,12 +1,10 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-import os
+from librarian import dcparser, parser
+from lxml import etree
 import cStringIO
 import cStringIO
-import re
 import codecs
 import codecs
-
-from lxml import etree
-
-from librarian import dcparser
+import os
+import re
 
 
 ENTITY_SUBSTITUTIONS = [
 
 
 ENTITY_SUBSTITUTIONS = [
@@ -78,22 +76,27 @@ ns['substitute_entities'] = substitute_entities
 ns['wrap_words'] = wrap_words
 
 
 ns['wrap_words'] = wrap_words
 
 
-def transform(input_filename, output_filename, **options):
+def transform(input_filename, output_filename, is_file=True, parse_dublincore=True, **options):
     """Transforms file input_filename in XML to output_filename in TXT."""
     # Parse XSLT
     style_filename = os.path.join(os.path.dirname(__file__), 'book2txt.xslt')
     style = etree.parse(style_filename)
 
     if is_file:
     """Transforms file input_filename in XML to output_filename in TXT."""
     # Parse XSLT
     style_filename = os.path.join(os.path.dirname(__file__), 'book2txt.xslt')
     style = etree.parse(style_filename)
 
     if is_file:
-        document = WLDocument.from_file(input, True)
+        document = parser.WLDocument.from_file(input_filename, True, parse_dublincore=parse_dublincore)
     else:
     else:
-        document = WLDocument.from_string(input, True)
+        document = parser.WLDocument.from_string(input_filename, True, parse_dublincore=parse_dublincore)
 
     result = document.transform(style, **options)
 
     output_file = codecs.open(output_filename, 'wb', encoding='utf-8')
 
     result = document.transform(style, **options)
 
     output_file = codecs.open(output_filename, 'wb', encoding='utf-8')
+    
+    if parse_dublincore:
+        url = dcparser.parse(input_filename).url
+    else:
+        url = '*' * 10
     output_file.write(TEMPLATE % {
     output_file.write(TEMPLATE % {
-        'url': dcparser.parse(input_filename).url,
+        'url': url,
         'text': unicode(result),
     })
 
         'text': unicode(result),
     })
 
diff --git a/librarian/wl2html_base.xslt b/librarian/wl2html_base.xslt
new file mode 100644 (file)
index 0000000..cd31ef1
--- /dev/null
@@ -0,0 +1,376 @@
+
+<xsl:stylesheet
+    version="1.0"
+    
+    xmlns="http://www.w3.org/1999/xhtml"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"   
+    xmlns:wl2o="http://nowoczesnapolska.org.pl/WL/2.0/Overlay"   
+    xmlns:wl="http://wolnelektury.pl/functions"
+
+    exclude-result-prefixes="wl" >
+
+    <xsl:variable name="config" select="document('config.xml')" />
+
+    <xsl:output method="xml"
+        encoding="utf-8"
+        indent="yes"
+        omit-xml-declaration = "yes" />
+
+    <xsl:strip-space elements = "strofa utwor kwestia liryka_l liryka_lp powiesc opowiadanie dramat_wierszowany_lp" />
+    <!--     
+        Dokument ten opisuje podstawowe przekształcenia potrzebne
+     do zamiany dokumentu WLML 1.0 na poprawnie sformatowany
+     dokument XHMTL.
+
+    -->
+
+    <xsl:template name="generic-attributes">
+        <xsl:param name="element" />
+        <xsl:param name="mypath" />
+        <xsl:variable name="tag" select="name($element)" />
+
+        <xsl:if test="$with-paths">
+        <xsl:attribute name="wl2o:path">
+            <xsl:value-of select="$mypath" />
+        </xsl:attribute>
+        </xsl:if>
+
+        <xsl:if test="$config//editable/*[name() = $tag]">
+            <xsl:attribute name="wl2o:editable">editable</xsl:attribute>
+        </xsl:if>
+
+        <xsl:attribute name="class">
+            <xsl:value-of select="$tag"/>
+        </xsl:attribute>
+    </xsl:template>
+
+    <xsl:template name="generic-descent">
+        <xsl:param name="element" />
+        <xsl:param name="mypath" />
+        
+        <xsl:for-each select="child::node()">            
+            <xsl:apply-templates select="." mode="element-tag">
+                <xsl:with-param name="offset" select="position()" />
+                <xsl:with-param name="parent-path" select="$mypath" />
+            </xsl:apply-templates>
+        </xsl:for-each>
+    </xsl:template>
+    
+    <xsl:template name="generic-content">
+        <xsl:param name="element" />
+        <xsl:param name="mypath" />
+
+        <xsl:call-template name="generic-attributes">
+            <xsl:with-param name="element" select="$element" />
+            <xsl:with-param name="mypath" select="$mypath" />
+        </xsl:call-template>
+
+        <xsl:call-template name="generic-descent">
+            <xsl:with-param name="element" select="$element" />
+            <xsl:with-param name="mypath" select="$mypath" />
+        </xsl:call-template>
+    </xsl:template>
+    
+    <!-- Generyczne szablony -->
+    <xsl:template name="generic" >
+        <xsl:param name="element" />
+        <xsl:param name="mypath" />
+        <xsl:param name="offset" />
+
+        <!-- <xsl:param name="parent-type" select="'block'" /> -->
+
+        <xsl:variable name="tag" select="name($element)" />        
+            
+        <xsl:choose>            
+            <!-- ignore namespaced elements -->
+            <xsl:when test="namespace-uri()" />
+
+            <xsl:when test="$config//block-elements/*[local-name() = $tag]">
+                <xsl:element name="div" namespace="http://www.w3.org/1999/xhtml">
+                    <xsl:apply-templates select="$element" mode="element-content" >
+                        <xsl:with-param name="mypath" select="$mypath"/>
+                    </xsl:apply-templates>
+                </xsl:element>
+            </xsl:when>
+
+            <xsl:when test="$config//paragraph-elements/*[local-name() = $tag]">
+                <xsl:element name="p" namespace="http://www.w3.org/1999/xhtml">                    
+                        <xsl:apply-templates select="$element" mode="element-content" >
+                        <xsl:with-param name="mypath" select="$mypath"/>
+                    </xsl:apply-templates>
+                </xsl:element>
+            </xsl:when>
+            
+            <xsl:when test="$config//inline-elements/*[local-name() = $tag]">
+                <xsl:element name="span" namespace="http://www.w3.org/1999/xhtml">
+                    <xsl:apply-templates select="$element" mode="element-content" >
+                        <xsl:with-param name="mypath" select="$mypath"/>
+                    </xsl:apply-templates>
+                </xsl:element>
+            </xsl:when>
+
+            <xsl:when test="$config//header-1-elements/*[local-name() = $tag]">
+                <xsl:element name="h1" namespace="http://www.w3.org/1999/xhtml">
+                    <xsl:apply-templates select="$element" mode="element-content" >
+                        <xsl:with-param name="mypath" select="$mypath"/>
+                    </xsl:apply-templates>
+                </xsl:element>
+            </xsl:when>
+
+            <xsl:when test="$config//header-2-elements/*[local-name() = $tag]">
+                <xsl:element name="h2" namespace="http://www.w3.org/1999/xhtml">
+                    <xsl:apply-templates select="$element" mode="element-content" >
+                        <xsl:with-param name="mypath" select="$mypath"/>
+                    </xsl:apply-templates>
+                </xsl:element>
+            </xsl:when>
+
+            <xsl:when test="$config//header-3-elements/*[local-name() = $tag]">
+                <xsl:element name="h3" namespace="http://www.w3.org/1999/xhtml">
+                    <xsl:apply-templates select="$element" mode="element-content" >
+                        <xsl:with-param name="mypath" select="$mypath"/>
+                    </xsl:apply-templates>
+                </xsl:element>
+            </xsl:when>
+
+            <xsl:when test="$config//header-4-elements/*[local-name() = $tag]">
+                <xsl:element name="h4" namespace="http://www.w3.org/1999/xhtml">
+                    <xsl:apply-templates select="$element" mode="element-content" >
+                        <xsl:with-param name="mypath" select="$mypath"/>
+                    </xsl:apply-templates>
+                </xsl:element>
+            </xsl:when>
+
+            <xsl:when test="$config//no-show-elements/*[local-name() = $tag]" />
+
+            <xsl:otherwise>
+                <xsl:message terminate="yes">
+                    Nieznany tag '<xsl:value-of select="$tag" />' :(.
+                </xsl:message>
+            </xsl:otherwise>
+        </xsl:choose>
+    </xsl:template>
+    
+
+    <!--
+    <special-tags>
+        <strofa />
+        <lista_osob />
+        <sekcja_swiatlo />
+        <sekcja_asterysk />
+        <separator_linia />
+    </special-tags>
+    -->    
+
+    <xsl:template match="dlugi_cytat|poezja_cyt" mode="element-tag">
+        <xsl:param name="offset" />
+        <xsl:param name="parent-path" />
+        <xsl:variable name="mypath"
+            select="concat($parent-path, '/', name(), '[', string($offset),']')" />
+
+        <xsl:element name="blockquote" >
+            <xsl:call-template name="generic-attributes">
+                <xsl:with-param name="element" select="current()" />
+                <xsl:with-param name="mypath" select="$mypath" />
+            </xsl:call-template>
+            <xsl:call-template name="generic-descent">
+                <xsl:with-param name="element" select="current()" />
+                <xsl:with-param name="mypath" select="$mypath" />
+            </xsl:call-template>
+        </xsl:element>
+    </xsl:template>
+
+
+    <xsl:template match="lista_osob" mode="element-tag">
+        <xsl:param name="offset" />
+        <xsl:param name="parent-path" />
+        <xsl:variable name="mypath"
+            select="concat($parent-path, '/', name(), '[', string($offset),']')" />
+
+        <xsl:element name="div" >
+            <xsl:call-template name="generic-attributes">
+                <xsl:with-param name="element" select="current()" />
+                <xsl:with-param name="mypath" select="$mypath" />
+            </xsl:call-template>
+
+            <xsl:apply-templates select="./naglowek-listy" mode="element-tag" />
+            <ul>
+                <xsl:for-each select="./lista_osoba">
+                <xsl:apply-templates select="." mode="element-tag">
+                    <xsl:with-param name="offset" select="position()" />
+                    <xsl:with-param name="parent-path" select="$mypath" />
+                </xsl:apply-templates>
+                </xsl:for-each>
+            </ul>
+        </xsl:element>
+    </xsl:template>
+
+    <xsl:template match="lista_osoba" mode="element-tag">
+        <xsl:param name="offset" />
+        <xsl:param name="parent-path" />
+        <xsl:variable name="mypath"
+            select="concat($parent-path, '/', name(), '[', string($offset),']')" />
+
+        <xsl:element name="li" >
+            <xsl:call-template name="generic-attributes">
+                <xsl:with-param name="element" select="current()" />
+                <xsl:with-param name="mypath" select="$mypath" />
+            </xsl:call-template>
+            <xsl:call-template name="generic-descent">
+                <xsl:with-param name="element" select="current()" />
+                <xsl:with-param name="mypath" select="$mypath" />
+            </xsl:call-template>
+        </xsl:element>
+    </xsl:template>
+
+    <xsl:template match="separator_linia" mode="element-tag">
+        <xsl:param name="offset" />
+        <xsl:param name="parent-path" />
+        <xsl:variable name="mypath"
+            select="concat($parent-path, '/', name(), '[', string($offset),']')" />
+
+        <xsl:element name="hr" >
+            <xsl:call-template name="generic-attributes">
+                <xsl:with-param name="element" select="current()" />
+                <xsl:with-param name="mypath" select="$mypath" />
+            </xsl:call-template>            
+        </xsl:element>
+    </xsl:template>
+
+    <xsl:template match="sekcja_swiatlo" mode="element-tag">
+        <xsl:param name="offset" />
+        <xsl:param name="parent-path" />
+        <xsl:variable name="mypath"
+            select="concat($parent-path, '/', name(), '[', string($offset),']')" />
+
+        <xsl:element name="br" >
+            <xsl:call-template name="generic-attributes">
+                <xsl:with-param name="element" select="current()" />
+                <xsl:with-param name="mypath" select="$mypath" />
+            </xsl:call-template>
+        </xsl:element>
+    </xsl:template>
+
+    <xsl:template match="sekcja_asterysk" mode="element-tag">
+        <xsl:param name="offset" />
+        <xsl:param name="parent-path" />
+        <xsl:variable name="mypath"
+            select="concat($parent-path, '/', name(), '[', string($offset),']')" />
+
+        <xsl:element name="p" >
+            <xsl:call-template name="generic-attributes">
+                <xsl:with-param name="element" select="current()" />
+                <xsl:with-param name="mypath" select="$mypath" />
+            </xsl:call-template>
+            *
+        </xsl:element>
+    </xsl:template>
+
+    <xsl:template match="zastepnik_wersu|wers_akap|wers_cd|wers_wciety" mode="element-tag">
+        <xsl:param name="offset" />
+        <xsl:param name="parent-path" />
+
+        <xsl:variable name="mypath"
+            select="concat($parent-path, '/', name(), '[',string($offset),']')" />
+       
+        <xsl:call-template name="generic-descent">
+            <xsl:with-param name="element" select="current()" />
+            <xsl:with-param name="mypath" select="$mypath" />
+        </xsl:call-template>        
+    </xsl:template>
+
+    <!-- strofy -->
+    <xsl:template match="strofa" mode="element-tag">
+        <xsl:param name="offset" />
+        <xsl:param name="parent-path" />
+
+        <xsl:variable name="mypath"
+            select="concat($parent-path, '/', name(), '[', string($offset),']')" />
+
+        <xsl:element name="div" >            
+            <xsl:call-template name="generic-attributes">
+                <xsl:with-param name="element" select="current()" />
+                <xsl:with-param name="mypath" select="$mypath" />
+            </xsl:call-template>
+
+            <xsl:choose>
+                <xsl:when test="count(br) > 0">
+                    <xsl:call-template name="verse">
+                        <xsl:with-param name="verse-content" select="br[1]/preceding-sibling::text() | br[1]/preceding-sibling::node()" />
+                        <xsl:with-param name="verse-type" select="br[1]/preceding-sibling::*[name() = 'wers_wciety' or name() = 'wers_akap' or name() = 'wers_cd'][1]" />
+                        <xsl:with-param name="mypath" select="$mypath" />
+                    </xsl:call-template>
+                    <xsl:for-each select="br">
+                               <!-- Each BR tag "consumes" text after it -->
+                        <xsl:variable name="lnum" select="count(preceding-sibling::br)" />
+                        <xsl:call-template name="verse">
+                            <xsl:with-param name="verse-content"
+                                select="following-sibling::text()[count(preceding-sibling::br) = $lnum+1] | following-sibling::node()[count(preceding-sibling::br) = $lnum+1]" />
+                            <xsl:with-param name="verse-type" select="following-sibling::*[count(preceding-sibling::br) = $lnum+1 and (name() = 'wers_wciety' or name() = 'wers_akap' or name() = 'wers_cd')][1]" />
+                            <xsl:with-param name="mypath" select="$mypath" />
+                        </xsl:call-template>
+                    </xsl:for-each>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:call-template name="verse">
+                        <xsl:with-param name="verse-content" select="child::node()" />
+                        <xsl:with-param name="verse-type" select="wers_wciety|wers_akap|wers_cd[1]" />
+                        <xsl:with-param name="mypath" select="$mypath" />
+                    </xsl:call-template>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:element>
+    </xsl:template>
+
+    <xsl:template name="verse">
+        <xsl:param name="verse-content" />
+        <xsl:param name="verse-type" />
+        <xsl:param name="mypath" />
+
+        <xsl:element name="p">
+            <xsl:attribute name="class">
+                <xsl:value-of select="name($verse-type)" />
+            </xsl:attribute>
+            <xsl:for-each select="$verse-content">
+                <xsl:apply-templates select="." mode="element-tag">
+                    <xsl:with-param name="offset" select="position()" />
+                    <xsl:with-param name="parent-path" select="$mypath" />
+                </xsl:apply-templates>
+            </xsl:for-each>
+        </xsl:element>
+    </xsl:template>
+
+
+<!-- default content processing -->
+    <xsl:template match="*" mode="element-content">
+        <xsl:param name="mypath" />
+        <xsl:call-template name="generic-content">
+            <xsl:with-param name="element" select="current()"/>
+            <xsl:with-param name="mypath" select="$mypath"/>
+        </xsl:call-template>
+    </xsl:template>
+
+    <xsl:template match="*" mode="element-tag" >
+        <xsl:param name="offset" />
+        <xsl:param name="parent-path" />
+
+        <xsl:variable name="mypath"
+            select="concat($parent-path, '/', name(), '[', string($offset),']')" />
+
+        <xsl:call-template name="generic">
+            <xsl:with-param name="element" select="current()" />
+            <xsl:with-param name="offset" select="$offset" />
+            <xsl:with-param name="mypath" select="$mypath" />
+        </xsl:call-template>
+    </xsl:template>
+
+    <xsl:template match="text()" mode="element-tag">
+        
+        <xsl:value-of select="wl:substitute_entities(.)" />
+        
+        <!--<xsl:value-of select="." /> -->
+    </xsl:template>
+
+    <xsl:template match="node()" />
+    
+</xsl:stylesheet>
diff --git a/librarian/wl2html_full.xslt b/librarian/wl2html_full.xslt
new file mode 100644 (file)
index 0000000..deaf0c5
--- /dev/null
@@ -0,0 +1,27 @@
+
+<xsl:stylesheet version="1.0"    
+    xmlns="http://www.w3.org/1999/xhtml"    
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+    <xsl:param name="with-paths" select="boolean(0)" />
+    <xsl:param name="with-annotations" select="boolean(1)" />
+    
+    <xsl:include href="wl2html_base.xslt" />
+    <xsl:output encoding="utf-8" indent="yes" omit-xml-declaration = "yes" />
+
+    <xsl:template match="/">
+        <div class="document">
+
+            <xsl:if test="with-toc" />
+
+            <xsl:call-template name="generic">
+                <xsl:with-param name="element" select="/utwor" />
+                <xsl:with-param name="mypath" select="'.'" />
+                <xsl:with-param name="offset" select="position()" />
+            </xsl:call-template>       
+
+            <xsl:if test="with-annotations" />
+        </div>
+    </xsl:template>
+
+</xsl:stylesheet>
\ No newline at end of file
diff --git a/librarian/wl2html_partial.xslt b/librarian/wl2html_partial.xslt
new file mode 100644 (file)
index 0000000..0fdca74
--- /dev/null
@@ -0,0 +1,20 @@
+<xsl:stylesheet version="1.0"    
+    xmlns="http://www.w3.org/1999/xhtml"    
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+    <xsl:param name="with-paths" select="boolean(0)" />
+    <xsl:param name="base-path" select="'.'"/>
+    <xsl:param name="base-offset" select="1" />    
+    
+    <xsl:include href="wl2html_base.xslt" />    
+    <xsl:output encoding="utf-8" indent="yes" omit-xml-declaration = "yes" /> 
+
+    <xsl:template match="/">
+        <xsl:message>Processing...</xsl:message>
+        <xsl:apply-templates select="/*" mode="element-tag">
+            <xsl:with-param name="offset" select="$base-offset" />
+            <xsl:with-param name="parent-path" select="$base-path" />
+        </xsl:apply-templates>
+    </xsl:template>   
+
+</xsl:stylesheet>
\ No newline at end of file
index 02f2fa7..5594223 100755 (executable)
@@ -14,7 +14,9 @@ if __name__ == '__main__':
 
     parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
         help='print status messages to stdout')
 
     parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
         help='print status messages to stdout')
-
+    parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True,
+        help='don\'t try to parse dublin core metadata')
+            
     options, input_filenames = parser.parse_args()
 
     if len(input_filenames) < 1:
     options, input_filenames = parser.parse_args()
 
     if len(input_filenames) < 1:
@@ -28,7 +30,7 @@ if __name__ == '__main__':
         
         output_filename = os.path.splitext(input_filename)[0] + '.html'
         try:
         
         output_filename = os.path.splitext(input_filename)[0] + '.html'
         try:
-            html.transform(input_filename, output_filename)
+            html.transform(input_filename, output_filename, parse_dublincore=options.parse_dublincore)
         except ParseError, e:
             print '%(file)s:%(name)s:%(message)s' % {
                 'file': input_filename,
         except ParseError, e:
             print '%(file)s:%(name)s:%(message)s' % {
                 'file': input_filename,
diff --git a/scripts/book2ihtml b/scripts/book2ihtml
new file mode 100755 (executable)
index 0000000..2f94be9
--- /dev/null
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+import os
+import optparse
+
+from librarian import html, ParseError
+
+
+if __name__ == '__main__':
+    # Parse commandline arguments
+    usage = """Usage: %prog [options] SOURCE [SOURCE...]
+    Convert SOURCE files to HTML format."""
+
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+        help='print status messages to stdout')
+    parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True,
+        help='don\'t try to parse dublin core metadata')
+            
+    options, input_filenames = parser.parse_args()
+
+    if len(input_filenames) < 1:
+        parser.print_help()
+        exit(1)
+
+    # Do some real work
+    for input_filename in input_filenames:
+        if options.verbose:
+            print input_filename
+        
+        output_filename = os.path.splitext(input_filename)[0] + '.html'
+        try:
+            html.transform(input_filename, output_filename, parse_dublincore=options.parse_dublincore,\
+                stylesheet='partial')
+        except ParseError, e:
+            print '%(file)s:%(name)s:%(message)s' % {
+                'file': input_filename,
+                'name': e.__class__.__name__,
+                'message': e.message.encode('utf-8')
+            }
+        except IOError, e:
+            print '%(file)s:%(name)s:%(message)s' % {
+                'file': input_filename,
+                'name': e.__class__.__name__,
+                'message': e.strerror,
+            }
+        except BaseException, e:
+            print '%(file)s:%(etype)s:%(message)s' % {
+                'file': input_filename,
+                'etype': e.__class__.__name__,
+                'message': e.message.encode('utf-8'),
+            }
+            raise e
+
index 41a3978..55482a6 100755 (executable)
@@ -17,7 +17,9 @@ if __name__ == '__main__':
         help='print status messages to stdout')
     parser.add_option('-w', '--wrap', action='store', type='int', dest='wrapping', default=0,
         help='set line wrap column')
         help='print status messages to stdout')
     parser.add_option('-w', '--wrap', action='store', type='int', dest='wrapping', default=0,
         help='set line wrap column')
-    
+    parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True,
+        help='don\'t try to parse dublin core metadata')
+            
     options, input_filenames = parser.parse_args()
     
     if len(input_filenames) < 1:
     options, input_filenames = parser.parse_args()
     
     if len(input_filenames) < 1:
@@ -31,7 +33,8 @@ if __name__ == '__main__':
         
         output_filename = os.path.splitext(input_filename)[0] + '.txt'
         try:
         
         output_filename = os.path.splitext(input_filename)[0] + '.txt'
         try:
-            text.transform(input_filename, output_filename, wrapping=str(options.wrapping))
+            text.transform(input_filename, output_filename, parse_dublincore=options.parse_dublincore,
+                wrapping=str(options.wrapping))
         except ParseError, e:
             print '%(file)s:%(name)s:%(message)s' % {
                 'file': input_filename,
         except ParseError, e:
             print '%(file)s:%(name)s:%(message)s' % {
                 'file': input_filename,
diff --git a/setup.cfg b/setup.cfg
new file mode 100644 (file)
index 0000000..f2f658c
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[aliases]
+test = nosetests --detailed-errors --with-doctest --with-coverage --cover-package=librarian
index 09bb42b..34d016e 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -1,21 +1,21 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-from distutils.core import setup
-from tests.utils import TestCommand
+from ez_setup import use_setuptools
+use_setuptools()
+
+from setuptools import setup, find_packages
+
 
 setup(
     name='librarian',
 
 setup(
     name='librarian',
-    version='1.2.1',
+    version='1.2.5',
     description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
     author='Marek Stępniowski',
     author_email='marek@stepniowski.com',
     url='http://redmine.nowoczesnapolska.org.pl/',
     description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
     author='Marek Stępniowski',
     author_email='marek@stepniowski.com',
     url='http://redmine.nowoczesnapolska.org.pl/',
-    packages=['librarian', 'tests'],
-    package_dir={'librarian': 'librarian', 'tests': 'tests'},
-    package_data={
-        'librarian': ['*.xslt'],
-        'tests': ['files/dcparser/*.xml', 'files/erroneous/*.xml'],
-    },
+    packages=find_packages(exclude=['tests']),
+    include_package_data=True,
+    install_requires=['lxml>=2.2'],
     scripts=['scripts/book2html', 'scripts/book2txt', 'scripts/bookfragments', 'scripts/genslugs'],
     scripts=['scripts/book2html', 'scripts/book2txt', 'scripts/bookfragments', 'scripts/genslugs'],
-    cmdclass={'test': TestCommand},
+    tests_require=['nose>=0.11', 'coverage>=3.0.1'],
 )
 )
diff --git a/tests/files/text/asnyk_miedzy_nami.txt b/tests/files/text/asnyk_miedzy_nami.txt
deleted file mode 100644 (file)
index e69de29..0000000
old mode 100644 (file)
new mode 100755 (executable)
index 5716a28..d7ab4fc
@@ -1,25 +1,64 @@
 <?xml version='1.0' encoding='utf-8'?>
 <?xml version='1.0' encoding='utf-8'?>
-<utwor><liryka_lp>
-    <autor_utworu>Adam Asnyk</autor_utworu>
-    <nazwa_utworu>Między nami nic nie było</nazwa_utworu>
-
-    <strofa>Między nami nic nie było!/
-    Żadnych zwierzeń, wyznań żadnych!/
-    Nic nas z sobą nie łączyło ---/
-    Prócz wiosennych marzeń zdradnych;</strofa>
-
-    <strofa>Prócz tych woni, barw i blasków,/
-    Unoszących się w przestrzeni;/
-    Prócz szumiących śpiewem lasków/
-    I tej świeżej łąk zieleni;</strofa>
-
-    <strofa>Prócz tych kaskad i potoków,/
-    Zraszających każdy parów,/
-    Prócz girlandy tęcz, obłoków,/
-    Prócz natury słodkich czarów;</strofa>
-
-    <strofa>Prócz tych wspólnych, jasnych zdrojów,/
-    Z których serce zachwyt piło;/
-    Prócz pierwiosnków i powojów,---/
-    Między nami nic nie było!</strofa>
-</liryka_lp></utwor>
+<utwor>
+  <liryka_lp>
+
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Asnyk/Między_nami_nic_nie_było">
+<dc:creator xml:lang="pl">Asnyk, Adam</dc:creator>
+<dc:title xml:lang="pl">Między nami nic nie było</dc:title>
+<dc:contributor.editor xml:lang="pl">Sekuła, Aleksandra</dc:contributor.editor>
+<dc:contributor.technical_editor xml:lang="pl">Sutkowska, Olga</dc:contributor.technical_editor>
+<dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+<dc:subject.period xml:lang="pl">Pozytywizm</dc:subject.period>
+<dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
+<dc:subject.genre xml:lang="pl">Wiersz</dc:subject.genre>
+<dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+<dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo</dc:identifier.url>
+<dc:source.URL xml:lang="pl">http://www.polona.pl/Content/5164</dc:source.URL>
+<dc:source xml:lang="pl">(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898</dc:source>
+<dc:rights xml:lang="pl">Domena publiczna - Adam Asnyk zm. 1897</dc:rights>
+<dc:date.pd xml:lang="pl">1897</dc:date.pd>
+<dc:format xml:lang="pl">xml</dc:format>
+<dc:type xml:lang="pl">text</dc:type>
+<dc:type xml:lang="en">text</dc:type>
+<dc:date xml:lang="pl">2007-09-06</dc:date>
+<dc:audience xml:lang="pl">L</dc:audience>
+<dc:language xml:lang="pl">pol</dc:language>
+</rdf:Description>
+</rdf:RDF>
+
+
+<autor_utworu>Adam Asnyk</autor_utworu>
+
+<nazwa_utworu><begin id="b1189062500041"/><motyw id="m1189062500041">Miłość platoniczna</motyw>Między nami nic nie było</nazwa_utworu>
+
+
+
+<strofa>Między nami nic nie było!/
+Żadnych zwierzeń, wyznań żadnych!/
+Nic nas z sobą nie łączyło ---/
+Prócz wiosennych marzeń zdradnych;</strofa>
+
+
+
+<strofa><begin id="b1189062528872"/><motyw id="m1189062528872">Natura</motyw>Prócz tych woni, barw i blasków,/
+Unoszących się w przestrzeni;/
+Prócz szumiących śpiewem lasków/
+I tej świeżej łąk zieleni;</strofa>
+
+
+
+<strofa>Prócz tych kaskad i potoków,/
+Zraszających każdy parów,/
+Prócz girlandy tęcz, obłoków,/
+Prócz natury słodkich czarów;</strofa>
+
+
+
+<strofa>Prócz tych wspólnych, jasnych zdrojów,/
+Z których serce zachwyt piło;/
+Prócz pierwiosnków i powojów,---/
+Między nami nic nie było!<end id="e1189062528872"/><end id="e1189062500041"/></strofa>
+
+</liryka_lp>
+</utwor>
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.html b/tests/files/text/asnyk_miedzy_nami_expected.html
new file mode 100644 (file)
index 0000000..49a2691
--- /dev/null
@@ -0,0 +1,47 @@
+<div xmlns:wl="http://wolnelektury.pl/functions" id="book-text">
+  <div id="toc">
+    <h2>Spis treści</h2>
+    <ol/>
+  </div>
+  <h1>
+    <span class="author">Adam Asnyk</span>
+    <span class="title"><a name="m1189062500041" class="theme-begin" fid="1189062500041">Miłość platoniczna</a>Między nami nic nie było</span>
+  </h1>
+  <a name="m1189062500041" class="theme-begin" fid="1189062500041">Miłość platoniczna</a>
+  <div class="stanza">
+    <p class="verse"><a name="f1" class="target"> </a><a href="#f1" class="anchor">1</a>Między nami nic nie było!</p>
+    <p class="verse">
+Żadnych zwierzeń, wyznań żadnych!</p>
+    <p class="verse">
+Nic nas z sobą nie łączyło —</p>
+    <p class="verse">
+Prócz wiosennych marzeń zdradnych;</p>
+  </div>
+  <div class="stanza">
+    <p class="verse"><a name="f5" class="target"> </a><a href="#f5" class="anchor">5</a><a name="m1189062528872" class="theme-begin" fid="1189062528872">Natura</a>Prócz tych woni, barw i blasków,</p>
+    <p class="verse">
+Unoszących się w przestrzeni;</p>
+    <p class="verse">
+Prócz szumiących śpiewem lasków</p>
+    <p class="verse">
+I tej świeżej łąk zieleni;</p>
+  </div>
+  <div class="stanza">
+    <p class="verse">Prócz tych kaskad i potoków,</p>
+    <p class="verse"><a name="f10" class="target"> </a><a href="#f10" class="anchor">10</a>
+Zraszających każdy parów,</p>
+    <p class="verse">
+Prócz girlandy tęcz, obłoków,</p>
+    <p class="verse">
+Prócz natury słodkich czarów;</p>
+  </div>
+  <div class="stanza">
+    <p class="verse">Prócz tych wspólnych, jasnych zdrojów,</p>
+    <p class="verse">
+Z których serce zachwyt piło;</p>
+    <p class="verse"><a name="f15" class="target"> </a><a href="#f15" class="anchor">15</a>
+Prócz pierwiosnków i powojów,—</p>
+    <p class="verse">
+Między nami nic nie było!<span class="theme-end" fid="1189062528872"/><span class="theme-end" fid="1189062500041"/></p>
+  </div>
+</div>
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.txt b/tests/files/text/asnyk_miedzy_nami_expected.txt
new file mode 100644 (file)
index 0000000..6e54969
--- /dev/null
@@ -0,0 +1,38 @@
+Kodowanie znaków w dokumencie: UTF-8.
+-----
+Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl/). Reprodukcja cyfrowa wykonana przez
+Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. Ten utwór nie jest chroniony prawem autorskim i znajduje
+się w domenie publicznej, co oznacza, że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać.
+
+Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dostępna jest na stronie http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo.
+-----
+
+
+
+
+
+Adam Asnyk
+
+Między nami nic nie było
+
+
+
+Między nami nic nie było!
+Żadnych zwierzeń, wyznań żadnych!
+Nic nas z sobą nie łączyło —
+Prócz wiosennych marzeń zdradnych;
+
+Prócz tych woni, barw i blasków,
+Unoszących się w przestrzeni;
+Prócz szumiących śpiewem lasków
+I tej świeżej łąk zieleni;
+
+Prócz tych kaskad i potoków,
+Zraszających każdy parów,
+Prócz girlandy tęcz, obłoków,
+Prócz natury słodkich czarów;
+
+Prócz tych wspólnych, jasnych zdrojów,
+Z których serce zachwyt piło;
+Prócz pierwiosnków i powojów,—
+Między nami nic nie było!
diff --git a/tests/files/text/asnyk_miedzy_nami_nodc.txt b/tests/files/text/asnyk_miedzy_nami_nodc.txt
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tests/files/text/asnyk_miedzy_nami_nodc.xml b/tests/files/text/asnyk_miedzy_nami_nodc.xml
new file mode 100644 (file)
index 0000000..5716a28
--- /dev/null
@@ -0,0 +1,25 @@
+<?xml version='1.0' encoding='utf-8'?>
+<utwor><liryka_lp>
+    <autor_utworu>Adam Asnyk</autor_utworu>
+    <nazwa_utworu>Między nami nic nie było</nazwa_utworu>
+
+    <strofa>Między nami nic nie było!/
+    Żadnych zwierzeń, wyznań żadnych!/
+    Nic nas z sobą nie łączyło ---/
+    Prócz wiosennych marzeń zdradnych;</strofa>
+
+    <strofa>Prócz tych woni, barw i blasków,/
+    Unoszących się w przestrzeni;/
+    Prócz szumiących śpiewem lasków/
+    I tej świeżej łąk zieleni;</strofa>
+
+    <strofa>Prócz tych kaskad i potoków,/
+    Zraszających każdy parów,/
+    Prócz girlandy tęcz, obłoków,/
+    Prócz natury słodkich czarów;</strofa>
+
+    <strofa>Prócz tych wspólnych, jasnych zdrojów,/
+    Z których serce zachwyt piło;/
+    Prócz pierwiosnków i powojów,---/
+    Między nami nic nie było!</strofa>
+</liryka_lp></utwor>
old mode 100755 (executable)
new mode 100644 (file)
index 62e664c..fcbc363
@@ -1,56 +1,44 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-
-import unittest
-
+from librarian import dcparser
 from lxml import etree
 from lxml import etree
-from utils import get_file_path
-from librarian import dcparser, html, ParseError
-from utils import AutoTestMetaclass
-
-class TestDCParser(unittest.TestCase):
-    __metaclass__ = AutoTestMetaclass
+from nose.tools import *
+from os.path import splitext
+from tests.utils import get_all_fixtures
+import codecs
 
 
-    TEST_DIR = 'dcparser'
 
 
-    def run_auto_test(self, in_data, out_data):
-        info = dcparser.BookInfo.from_string(in_data).to_dict()
-        should_be = eval(out_data)
-        for key in should_be:
-            self.assertEqual( info[key], should_be[key] )
+def check_dcparser(xml_file, result_file):
+    xml = file(xml_file).read()
+    result = codecs.open(result_file, encoding='utf-8').read()
+    info = dcparser.BookInfo.from_string(xml).to_dict()
+    should_be = eval(result)
+    for key in should_be:
+        assert_equals(info[key], should_be[key])
 
 
-class TestDCSerialize(unittest.TestCase):
-    __metaclass__ = AutoTestMetaclass
 
 
-    TEST_DIR = 'dcserialize'
+def test_dcparser():
+    for fixture in get_all_fixtures('dcparser', '*.xml'):
+        base_name = splitext(fixture)[0]
+        yield check_dcparser, fixture, base_name + '.out'
 
 
-    def run_auto_test(self, in_data, out_data):
-        import lxml.etree
-        # first parse the input
-        info = dcparser.BookInfo.from_string(in_data)
 
 
-        # serialize
-        serialized = lxml.etree.tostring(info.to_etree(), encoding=unicode).encode('utf-8')
+def check_serialize(xml_file):
+    xml = file(xml_file).read()
+    info = dcparser.BookInfo.from_string(xml)
 
 
-        # then parse again
-        info_bis = dcparser.BookInfo.from_string(serialized)
+    # serialize
+    serialized = etree.tostring(info.to_etree(), encoding=unicode).encode('utf-8')
+    # then parse again
+    info_bis = dcparser.BookInfo.from_string(serialized)
 
 
-        # check if they are the same
-        for key in vars(info):
-            self.assertEqual( getattr(info, key), getattr(info_bis, key))
+    # check if they are the same
+    for key in vars(info):
+        assert_equals(getattr(info, key), getattr(info_bis, key))
+    for key in vars(info_bis):
+        assert_equals(getattr(info, key), getattr(info_bis, key))
 
 
-        for key in vars(info_bis):
-            self.assertEqual( getattr(info, key), getattr(info_bis, key))
 
 
-class TestParserErrors(unittest.TestCase):
-    def test_error(self):
-        try:
-            html.transform(get_file_path('erroneous', 'asnyk_miedzy_nami.xml'),
-                           get_file_path('erroneous', 'asnyk_miedzy_nami.html'))
-            self.fail()
-        except ParseError:
-            pass
-            #self.assertEqual(e.position, (25, 13))    
+def test_serialize():
+    for fixture in get_all_fixtures('dcparser', '*.xml'):
+        yield check_serialize, fixture
 
 
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/test_html.py b/tests/test_html.py
new file mode 100644 (file)
index 0000000..86fcfac
--- /dev/null
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+from librarian import html, NoDublinCore
+from nose.tools import *
+from utils import get_fixture, remove_output_file
+
+
+def teardown_transform():
+    remove_output_file('text', 'asnyk_miedzy_nami.html')
+
+
+@with_setup(None, teardown_transform)
+def test_transform():
+    output_file_path = get_fixture('text', 'asnyk_miedzy_nami.html')
+    expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html')
+    
+    html.transform(
+        get_fixture('text', 'asnyk_miedzy_nami.xml'),
+        output_file_path,
+    )
+    
+    assert_equal(file(output_file_path).read(), file(expected_output_file_path).read())
+
+
+@with_setup(None, teardown_transform)
+@raises(NoDublinCore)
+def test_no_dublincore():
+    html.transform(
+        get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
+        get_fixture('text', 'asnyk_miedzy_nami_nodc.html'),
+    )
+
+
+@with_setup(None, teardown_transform)
+def test_passing_parse_dublincore_to_transform():
+    """Passing parse_dublincore=False to transform omits DublinCore parsing."""
+    html.transform(
+        get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
+        get_fixture('text', 'asnyk_miedzy_nami.html'),
+        parse_dublincore=False,
+    )
old mode 100755 (executable)
new mode 100644 (file)
index 00fd787..020c571
@@ -1,22 +1,41 @@
-#!/usr/bin/env python
-# encoding: utf-8
+# -*- coding: utf-8 -*-
+from librarian import text, NoDublinCore
+from nose.tools import *
+from utils import get_fixture, remove_output_file
 
 
-import unittest
 
 
-from utils import get_file_path
-from librarian import dcparser
-from librarian import text, NoDublinCore
+def teardown_transform():
+    remove_output_file('text', 'asnyk_miedzy_nami.txt')
+
+
+@with_setup(None, teardown_transform)
+def test_transform():
+    output_file_path = get_fixture('text', 'asnyk_miedzy_nami.txt')
+    expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
+    
+    text.transform(
+        get_fixture('text', 'asnyk_miedzy_nami.xml'),
+        output_file_path,
+    )
+    
+    assert_equal(file(output_file_path).read(), file(expected_output_file_path).read())
 
 
 
 
-class TestXML(unittest.TestCase):
-    def test_no_dublincore(self):
-        try:
-            text.transform(get_file_path('text', 'asnyk_miedzy_nami.xml'),
-                           get_file_path('text', 'asnyk_miedzy_nami.txt'))
-            self.fail()
-        except NoDublinCore, e:
-            pass
+@with_setup(None, teardown_transform)
+@raises(NoDublinCore)
+def test_no_dublincore():
+    text.transform(
+        get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
+        get_fixture('text', 'asnyk_miedzy_nami_nodc.txt'),
+    )
 
 
 
 
-if __name__ == '__main__':
-    unittest.main()
+@with_setup(None, teardown_transform)
+def test_passing_parse_dublincore_to_transform():
+    """Passing parse_dublincore=False to transform omits DublinCore parsing."""
+    text.transform(
+        get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
+        get_fixture('text', 'asnyk_miedzy_nami.txt'),
+        parse_dublincore=False,
+    )
+    
\ No newline at end of file
index 1870a07..bea2038 100644 (file)
@@ -1,62 +1,25 @@
-from __future__ import with_statement
-
+from os.path import realpath, join, dirname
+import glob
 import os
 import os
-from distutils.core import Command
-from unittest import TextTestRunner, TestLoader
-from glob import glob
-from os.path import dirname, join, realpath, splitext, basename, walk
-from os import listdir
-import codecs
-
-class AutoTestMetaclass(type):
-
-    def __new__(cls, name, bases, class_dict):        
-        test_dir = class_dict.pop('TEST_DIR')
-        path = realpath( join(dirname(__file__), 'files', test_dir) )
 
 
-        for file in listdir(path):
-            base, ext = splitext(file)
-            if ext != '.xml':
-                continue
 
 
-            class_dict['test_'+base] = cls.make_test_runner(base, \
-                    join(path, base +'.xml'), join(path, base + '.out') )
+def get_fixture_dir(dir_name):
+    """Returns path to fixtures directory dir_name."""
+    return realpath(join(dirname(__file__), 'files', dir_name))
 
 
-        return type.__new__(cls, name, bases, class_dict)
-    
-    @staticmethod
-    def make_test_runner(name, inputf, outputf):
-        def runner(self):
-            with open(inputf, 'rb') as ifd:
-                with codecs.open(outputf, 'rb', encoding='utf-8') as ofd:
-                    self.run_auto_test(ifd.read(), ofd.read())            
-        return runner
 
 
+def get_fixture(dir_name, file_name):
+    """Returns path to fixture file_name in directory dir_name."""
+    return join(get_fixture_dir(dir_name), file_name)
 
 
-def get_file_path(dir_name, file_name):
-    return realpath(join(dirname(__file__), 'files', dir_name, file_name))
 
 
-class TestCommand(Command):
-    user_options = []
+def get_all_fixtures(dir_name, glob_pattern='*'):
+    """Returns list of paths for fixtures in directory dir_name matching the glob_pattern."""
+    return [get_fixture(dir_name, file_name) for file_name in glob.glob(join(get_fixture_dir(dir_name), glob_pattern))]
 
 
-    def initialize_options(self):
-        self._dir = os.getcwd()
 
 
-    def finalize_options(self):
+def remove_output_file(dir_name, file_name):
+    try:
+        os.remove(get_fixture(dir_name, file_name))
+    except:
         pass
         pass
-
-    def run(self):
-        '''
-        Finds all the tests modules in tests/, and runs them.
-        '''
-        testfiles = []
-        for t in glob(join(self._dir, 'tests', '*.py')):
-            module_name = splitext(basename(t))[0]
-            if module_name.startswith('test'):
-                testfiles.append('.'.join(['tests', module_name])
-                )
-
-        tests = TestLoader().loadTestsFromNames(testfiles)
-        t = TextTestRunner(verbosity=2)
-        t.run(tests)
-