converters interface changed: WLDocument in, OutputFile out

author Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>

Mon, 5 Dec 2011 16:06:51 +0000 (17:06 +0100)

committer Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>

Mon, 5 Dec 2011 16:11:03 +0000 (17:11 +0100)
author Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Mon, 5 Dec 2011 16:06:51 +0000 (17:06 +0100)
committer Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Mon, 5 Dec 2011 16:11:03 +0000 (17:11 +0100)
diff --git a/librarian/__init__.py b/librarian/__init__.py

index 8f5cf1a..fdd6b55 100644 (file)
--- a/librarian/__init__.py
+++ b/librarian/__init__.py
@@ -4,6 +4,8 @@
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  import os
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  import os
+import re
+import shutil
  
  class ParseError(Exception):
      def __str__(self):
  
  class ParseError(Exception):
      def __str__(self):
@@ -18,6 +20,11 @@ class ValidationError(Exception):
      pass
  
  class NoDublinCore(ValidationError):
      pass
  
  class NoDublinCore(ValidationError):
+    """There's no DublinCore section, and it's required."""
+    pass
+
+class NoProvider(Exception):
+    """There's no DocProvider specified, and it's needed."""
      pass
  
  class XMLNamespace(object):
      pass
  
  class XMLNamespace(object):
@@ -56,37 +63,61 @@ OPFNS = XMLNamespace("http://www.idpf.org/2007/opf")
  WLNS = EmptyNamespace()
  
  
  WLNS = EmptyNamespace()
  
  
+class WLURI(object):
+    """Represents a WL URI. Extracts slug and language from it."""
+
+    slug = None
+    language = None
+
+    _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/lektura/'
+            '(?P<slug>[-a-z]+)(/(?P<lang>[a-z]{3})/?)?')
+
+    def __init__(self, uri):
+        self.uri = uri
+        match = self._re_wl_uri.match(uri)
+        assert match
+        self.slug = match.group('slug')
+        self.language = match.group('lang')
+
+
  class DocProvider(object):
  class DocProvider(object):
-    """ Base class for a repository of XML files.
-        Used for generating joined files, like EPUBs
+    """Base class for a repository of XML files.
+
+    Used for generating joined files, like EPUBs.
      """
  
      """
  
-    def by_slug(self, slug):
-        raise NotImplemented
+    def by_slug_and_lang(self, slug, lang=None):
+        """Should return a file-like object with a WL document XML."""
+        raise NotImplementedError
  
  
-    def __getitem__(self, slug):
-        return self.by_slug(slug)
+    def by_slug(self, slug):
+        """Should return a file-like object with a WL document XML."""
+        return self.by_slug_and_lang(slug)
  
      def by_uri(self, uri):
  
      def by_uri(self, uri):
-        return self.by_slug(uri.rsplit('/', 1)[1])
+        """Should return a file-like object with a WL document XML."""
+        wluri = WLURI(uri)
+        return self.by_slug_and_lang(wluri.slug, wluri.language)
  
  
  class DirDocProvider(DocProvider):
      """ Serve docs from a directory of files in form <slug>.xml """
  
  
  
  class DirDocProvider(DocProvider):
      """ Serve docs from a directory of files in form <slug>.xml """
  
-    def __init__(self, dir):
-        self.dir = dir
+    def __init__(self, dir_):
+        self.dir = dir_
          self.files = {}
          self.files = {}
+        return super(DirDocProvider, self).__init__()
  
  
-    def by_slug(self, slug):
-        return open(os.path.join(self.dir, '%s.xml' % slug))
+    def by_slug_and_lang(self, slug, lang=None):
+        fname = "%s%s.xml" % (slug, ".%s" % lang if lang else "")
+        return open(os.path.join(self.dir, fname))
  
  
  import lxml.etree as etree
  import dcparser
  
  DEFAULT_BOOKINFO = dcparser.BookInfo(
  
  
  import lxml.etree as etree
  import dcparser
  
  DEFAULT_BOOKINFO = dcparser.BookInfo(
-        { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'}, \
+        { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
          { DCNS('creator'): [u'Some, Author'],
            DCNS('title'): [u'Some Title'],
            DCNS('subject.period'): [u'Unknown'],
          { DCNS('creator'): [u'Some, Author'],
            DCNS('title'): [u'Some Title'],
            DCNS('subject.period'): [u'Unknown'],
@@ -119,14 +150,15 @@ def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
          method='xml', encoding=unicode, pretty_print=True)
  
      return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
          method='xml', encoding=unicode, pretty_print=True)
  
      return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
-        u'\n</plain-text>\n</utwor>';
+        u'\n</plain-text>\n</utwor>'
  
  
  def serialize_raw(element):
      b = u'' + (element.text or '')
  
      for child in element.iterchildren():
  
  
  def serialize_raw(element):
      b = u'' + (element.text or '')
  
      for child in element.iterchildren():
-        e = etree.tostring(child, method='xml', encoding=unicode, pretty_print=True)
+        e = etree.tostring(child, method='xml', encoding=unicode,
+                pretty_print=True)
          b += e
  
      return b
          b += e
  
      return b
@@ -141,3 +173,73 @@ def serialize_children(element, format='raw'):
  def get_resource(path):
      return os.path.join(os.path.dirname(__file__), path)
  
  def get_resource(path):
      return os.path.join(os.path.dirname(__file__), path)
  
+
+class OutputFile(object):
+    """Represents a file returned by one of the converters."""
+
+    _string = None
+    _filename = None
+
+    def __del__(self):
+        if self._filename:
+            os.unlink(self._filename)
+
+    def __nonzero__(self):
+        return self._string is not None or self._filename is not None
+
+    @classmethod
+    def from_string(cls, string):
+        """Converter returns contents of a file as a string."""
+
+        instance = cls()
+        instance._string = string
+        return instance
+
+    @classmethod
+    def from_filename(cls, filename):
+        """Converter returns contents of a file as a named file."""
+
+        instance = cls()
+        instance._filename = filename
+        return instance
+
+    def get_string(self):
+        """Get file's contents as a string."""
+
+        if self._filename is not None:
+            with open(self._filename) as f:
+                return f.read()
+        else:
+            return self._string
+
+    def get_file(self):
+        """Get file as a file-like object."""
+
+        if self._string is not None:
+            from StringIO import StringIO
+            return StringIO(self._string)
+        elif self._filename is not None:
+            return open(self._filename)
+
+    def get_filename(self):
+        """Get file as a fs path."""
+
+        if self._filename is not None:
+            return self._filename
+        elif self._string is not None:
+            from tempfile import NamedTemporaryFile
+            temp = NamedTemporaryFile(prefix='librarian-', delete=False)
+            temp.write(self._string)
+            temp.close()
+            self._filename = temp.name
+            return self._filename
+        else:
+            return None
+
+    def save_as(self, path):
+        """Save file to a path. Create directories, if necessary."""
+
+        dirname = os.path.dirname(os.path.abspath(path))
+        if not os.path.isdir(dirname):
+            os.makedirs(dirname)
+        shutil.copy(self.get_filename(), path)
diff --git a/librarian/dcparser.py b/librarian/dcparser.py

index aa8f50d..5492f7a 100644 (file)
--- a/librarian/dcparser.py
+++ b/librarian/dcparser.py
@@ -7,7 +7,8 @@ from xml.parsers.expat import ExpatError
  from datetime import date
  import time
  
  from datetime import date
  import time
  
-from librarian import ValidationError, NoDublinCore, ParseError, DCNS, RDFNS
+from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
+                       WLURI)
  
  import lxml.etree as etree # ElementTree API using libxml2
  from lxml.etree import XMLSyntaxError
  
  import lxml.etree as etree # ElementTree API using libxml2
  from lxml.etree import XMLSyntaxError
@@ -150,7 +151,7 @@ class BookInfo(object):
  
      @property
      def slug(self):
  
      @property
      def slug(self):
-        return self.url.rsplit('/', 1)[1]
+        return WLURI(self.url).slug
  
      @classmethod
      def from_string(cls, xml):
  
      @classmethod
      def from_string(cls, xml):
diff --git a/librarian/epub.py b/librarian/epub.py

index bb3123d..b063380 100644 (file)
--- a/librarian/epub.py
+++ b/librarian/epub.py
@@ -12,13 +12,10 @@ from StringIO import StringIO
  from copy import deepcopy
  from lxml import etree
  import zipfile
  from copy import deepcopy
  from lxml import etree
  import zipfile
-from tempfile import mkdtemp
+from tempfile import mkdtemp, NamedTemporaryFile
  from shutil import rmtree
  
  from shutil import rmtree
  
-import sys
-
-from librarian import XMLNamespace, RDFNS, DCNS, WLNS, NCXNS, OPFNS, XHTMLNS, NoDublinCore
-from librarian.dcparser import BookInfo
+from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile
  
  from librarian import functions, get_resource
  
  
  from librarian import functions, get_resource
  
@@ -287,47 +284,40 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
      return output_html, toc, chars
  
  
      return output_html, toc, chars
  
  
-def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False,
+def transform(wldoc, verbose=False,
                style=None, html_toc=False,
                sample=None, cover=None, flags=None):
      """ produces a EPUB file
  
                style=None, html_toc=False,
                sample=None, cover=None, flags=None):
      """ produces a EPUB file
  
-    provider: a DocProvider
-    slug: slug of file to process, available by provider
-    output_file: file-like object or path to output file
-    output_dir: path to directory to save output file to; either this or output_file must be present
-    make_dir: writes output to <output_dir>/<author>/<slug>.epub instead of <output_dir>/<slug>.epub
      sample=n: generate sample e-book (with at least n paragraphs)
      cover: a cover.Cover object
      flags: less-advertising, without-fonts
      """
  
      sample=n: generate sample e-book (with at least n paragraphs)
      cover: a cover.Cover object
      flags: less-advertising, without-fonts
      """
  
-    def transform_file(input_xml, chunk_counter=1, first=True, sample=None):
+    def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
          """ processes one input file and proceeds to its children """
  
          """ processes one input file and proceeds to its children """
  
-        replace_characters(input_xml.getroot())
-
-        children = [child.text for child in input_xml.findall('.//'+DCNS('relation.hasPart'))]
+        replace_characters(wldoc.edoc.getroot())
  
          # every input file will have a TOC entry,
          # pointing to starting chunk
  
          # every input file will have a TOC entry,
          # pointing to starting chunk
-        toc = TOC(node_name(input_xml.find('.//'+DCNS('title'))), "part%d.html" % chunk_counter)
+        toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
          chars = set()
          if first:
              # write book title page
          chars = set()
          if first:
              # write book title page
-            html_tree = xslt(input_xml, get_resource('epub/xsltTitle.xsl'))
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
              chars = used_chars(html_tree.getroot())
              zip.writestr('OPS/title.html',
                   etree.tostring(html_tree, method="html", pretty_print=True))
              # add a title page TOC entry
              toc.add(u"Strona tytułowa", "title.html")
              chars = used_chars(html_tree.getroot())
              zip.writestr('OPS/title.html',
                   etree.tostring(html_tree, method="html", pretty_print=True))
              # add a title page TOC entry
              toc.add(u"Strona tytułowa", "title.html")
-        elif children:
+        elif wldoc.book_info.parts:
              # write title page for every parent
              if sample is not None and sample <= 0:
                  chars = set()
                  html_string = open(get_resource('epub/emptyChunk.html')).read()
              else:
              # write title page for every parent
              if sample is not None and sample <= 0:
                  chars = set()
                  html_string = open(get_resource('epub/emptyChunk.html')).read()
              else:
-                html_tree = xslt(input_xml, get_resource('epub/xsltChunkTitle.xsl'))
+                html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
                  chars = used_chars(html_tree.getroot())
                  html_string = etree.tostring(html_tree, method="html", pretty_print=True)
              zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
                  chars = used_chars(html_tree.getroot())
                  html_string = etree.tostring(html_tree, method="html", pretty_print=True)
              zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
@@ -335,12 +325,12 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
              add_to_spine(spine, chunk_counter)
              chunk_counter += 1
  
              add_to_spine(spine, chunk_counter)
              chunk_counter += 1
  
-        if len(input_xml.getroot()) > 1:
+        if len(wldoc.edoc.getroot()) > 1:
              # rdf before style master
              # rdf before style master
-            main_text = input_xml.getroot()[1]
+            main_text = wldoc.edoc.getroot()[1]
          else:
              # rdf in style master
          else:
              # rdf in style master
-            main_text = input_xml.getroot()[0]
+            main_text = wldoc.edoc.getroot()[0]
              if main_text.tag == RDFNS('RDF'):
                  main_text = None
  
              if main_text.tag == RDFNS('RDF'):
                  main_text = None
  
@@ -361,51 +351,28 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
                  add_to_spine(spine, chunk_counter)
                  chunk_counter += 1
  
                  add_to_spine(spine, chunk_counter)
                  chunk_counter += 1
  
-        if children:
-            for child in children:
-                child_xml = etree.parse(provider.by_uri(child))
-                child_toc, chunk_counter, chunk_chars, sample = transform_file(child_xml, chunk_counter, first=False, sample=sample)
-                toc.append(child_toc)
-                chars = chars.union(chunk_chars)
+        for child in wldoc.parts():
+            child_toc, chunk_counter, chunk_chars, sample = transform_file(
+                child, chunk_counter, first=False, sample=sample)
+            toc.append(child_toc)
+            chars = chars.union(chunk_chars)
  
          return toc, chunk_counter, chars, sample
  
  
          return toc, chunk_counter, chars, sample
  
-    # read metadata from the first file
-    if file_path:
-        if slug:
-            raise ValueError('slug or file_path should be specified, not both')
-        f = open(file_path, 'r')
-        input_xml = etree.parse(f)
-        f.close()
-    else:
-        if not slug:
-            raise ValueError('either slug or file_path should be specified')
-        input_xml = etree.parse(provider[slug])
+
+    document = deepcopy(wldoc)
+    del wldoc
  
      if flags:
          for flag in flags:
  
      if flags:
          for flag in flags:
-            input_xml.getroot().set(flag, 'yes')
-
-    metadata = input_xml.find('.//'+RDFNS('Description'))
-    if metadata is None:
-        raise NoDublinCore('Document has no DublinCore - which is required.')
-    book_info = BookInfo.from_element(input_xml)
-    metadata = etree.ElementTree(metadata)
-
-    # if output to dir, create the file
-    if output_dir is not None:
-        if make_dir:
-            author = unicode(book_info.author)
-            output_dir = os.path.join(output_dir, author)
-            try:
-                os.makedirs(output_dir)
-            except OSError:
-                pass
-        if slug:
-            output_file = open(os.path.join(output_dir, '%s.epub' % slug), 'w')
-        else:
-            output_file = open(os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.epub'), 'w')
+            document.edoc.getroot().set(flag, 'yes')
+
+    opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
+    manifest = opf.find('.//' + OPFNS('manifest'))
+    guide = opf.find('.//' + OPFNS('guide'))
+    spine = opf.find('.//' + OPFNS('spine'))
  
  
+    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
      zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
  
      # write static elements
      zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
  
      # write static elements
@@ -425,14 +392,10 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
          style = get_resource('epub/style.css')
      zip.write(style, os.path.join('OPS', 'style.css'))
  
          style = get_resource('epub/style.css')
      zip.write(style, os.path.join('OPS', 'style.css'))
  
-    opf = xslt(metadata, get_resource('epub/xsltContent.xsl'))
-    manifest = opf.find('.//' + OPFNS('manifest'))
-    guide = opf.find('.//' + OPFNS('guide'))
-    spine = opf.find('.//' + OPFNS('spine'))
  
      if cover:
          cover_file = StringIO()
  
      if cover:
          cover_file = StringIO()
-        c = cover(book_info.author.readable(), book_info.title)
+        c = cover(document.book_info.author.readable(), document.book_info.title)
          c.save(cover_file)
          c_name = 'cover.%s' % c.ext()
          zip.writestr(os.path.join('OPS', c_name), cover_file.getvalue())
          c.save(cover_file)
          c_name = 'cover.%s' % c.ext()
          zip.writestr(os.path.join('OPS', c_name), cover_file.getvalue())
@@ -468,7 +431,7 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
              '<itemref idref="html_toc" />'))
          guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
  
              '<itemref idref="html_toc" />'))
          guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
  
-    toc, chunk_counter, chars, sample = transform_file(input_xml, sample=sample)
+    toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
  
      if len(toc.children) < 2:
          toc.add(u"Początek utworu", "part1.html")
  
      if len(toc.children) < 2:
          toc.add(u"Początek utworu", "part1.html")
@@ -491,7 +454,7 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
          '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="last" />'))
          '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="last" />'))
-    html_tree = xslt(input_xml, get_resource('epub/xsltLast.xsl'))
+    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
      chars.update(used_chars(html_tree.getroot()))
      zip.writestr('OPS/last.html', etree.tostring(
                          html_tree, method="html", pretty_print=True))
      chars.update(used_chars(html_tree.getroot()))
      zip.writestr('OPS/last.html', etree.tostring(
                          html_tree, method="html", pretty_print=True))
@@ -517,8 +480,7 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
          os.chdir(cwd)
  
      zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
          os.chdir(cwd)
  
      zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
-    contents = []
-    title = node_name(etree.ETXPath('.//'+DCNS('title'))(input_xml)[0])
+    title = document.book_info.title
      attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
      for st in attributes:
          meta = toc_file.makeelement(NCXNS('meta'))
      attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
      for st in attributes:
          meta = toc_file.makeelement(NCXNS('meta'))
@@ -536,3 +498,5 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
      toc.write_to_xml(nav_map)
      zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
      zip.close()
      toc.write_to_xml(nav_map)
      zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
      zip.close()
+
+    return OutputFile.from_filename(output_file.name)
diff --git a/librarian/html.py b/librarian/html.py

index 5974d93..997f904 100644 (file)
--- a/librarian/html.py
+++ b/librarian/html.py
@@ -5,12 +5,10 @@
  #
  import os
  import cStringIO
  #
  import os
  import cStringIO
-import re
  import copy
  
  from lxml import etree
  import copy
  
  from lxml import etree
-from librarian.parser import WLDocument
-from librarian import XHTMLNS, ParseError
+from librarian import XHTMLNS, ParseError, OutputFile
  from librarian import functions
  
  from lxml.etree import XMLSyntaxError, XSLTApplyError
  from librarian import functions
  
  from lxml.etree import XMLSyntaxError, XSLTApplyError
@@ -30,9 +28,8 @@ def get_stylesheet(name):
  def html_has_content(text):
      return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
  
  def html_has_content(text):
      return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
  
-def transform(input, output_filename=None, is_file=True, \
-    parse_dublincore=True, stylesheet='legacy', options={}, flags=None):
-    """Transforms file input_filename in XML to output_filename in XHTML.
+def transform(wldoc, stylesheet='legacy', options=None, flags=None):
+    """Transforms the WL document to XHTML.
  
      If output_filename is None, returns an XML,
      otherwise returns True if file has been written,False if it hasn't.
  
      If output_filename is None, returns an XML,
      otherwise returns True if file has been written,False if it hasn't.
@@ -43,12 +40,9 @@ def transform(input, output_filename=None, is_file=True, \
          style_filename = get_stylesheet(stylesheet)
          style = etree.parse(style_filename)
  
          style_filename = get_stylesheet(stylesheet)
          style = etree.parse(style_filename)
  
-        if is_file:
-            document = WLDocument.from_file(input, True, \
-                parse_dublincore=parse_dublincore)
-        else:
-            document = WLDocument.from_string(input, True, \
-                parse_dublincore=parse_dublincore)
+        document = copy.deepcopy(wldoc)
+        del wldoc
+        document.swap_endlines()
  
          if flags:
              for flag in flags:
  
          if flags:
              for flag in flags:
@@ -56,6 +50,8 @@ def transform(input, output_filename=None, is_file=True, \
  
          document.clean_ed_note()
  
  
          document.clean_ed_note()
  
+        if not options:
+            options = {}
          result = document.transform(style, **options)
          del document # no longer needed large object :)
  
          result = document.transform(style, **options)
          del document # no longer needed large object :)
  
@@ -63,16 +59,10 @@ def transform(input, output_filename=None, is_file=True, \
              add_anchors(result.getroot())
              add_table_of_contents(result.getroot())
  
              add_anchors(result.getroot())
              add_table_of_contents(result.getroot())
  
-            if output_filename is not None:
-                result.write(output_filename, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8')
-            else:
-                return result
-            return True
+            return OutputFile.from_string(etree.tostring(result, method='html',
+                xml_declaration=False, pretty_print=True, encoding='utf-8'))
          else:
          else:
-            if output_filename is not None:
-                return False
-            else:
-                return "<empty />"
+            return None
      except KeyError:
          raise ValueError("'%s' is not a valid stylesheet.")
      except (XMLSyntaxError, XSLTApplyError), e:
      except KeyError:
          raise ValueError("'%s' is not a valid stylesheet.")
      except (XMLSyntaxError, XSLTApplyError), e:
diff --git a/librarian/mobi.py b/librarian/mobi.py

index cd894fe..a93315e 100755 (executable)
--- a/librarian/mobi.py
+++ b/librarian/mobi.py
@@ -4,60 +4,25 @@
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  import os
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  import os
-import os.path
  import subprocess
  from tempfile import NamedTemporaryFile
  import subprocess
  from tempfile import NamedTemporaryFile
-from lxml import etree
  
  
+from librarian import OutputFile
  from librarian.cover import WLCover
  from librarian.cover import WLCover
-from librarian import epub, get_resource, NoDublinCore, RDFNS
-from librarian.dcparser import BookInfo
+from librarian import get_resource
  
  
  
  
-def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False,
+def transform(wldoc, verbose=False,
                sample=None, cover=None, flags=None):
      """ produces a MOBI file
  
                sample=None, cover=None, flags=None):
      """ produces a MOBI file
  
-    provider: a DocProvider
-    slug: slug of file to process, available by provider
-    output_file: path to output file
-    output_dir: path to directory to save output file to; either this or output_file must be present
-    make_dir: writes output to <output_dir>/<author>/<slug>.mobi instead of <output_dir>/<slug>.mobi
+    wldoc: a WLDocument
      sample=n: generate sample e-book (with at least n paragraphs)
      cover: a cover.Cover object
      flags: less-advertising,
      """
  
      sample=n: generate sample e-book (with at least n paragraphs)
      cover: a cover.Cover object
      flags: less-advertising,
      """
  
-    # read metadata from the first file
-    if file_path:
-        if slug:
-            raise ValueError('slug or file_path should be specified, not both')
-        f = open(file_path, 'r')
-        input_xml = etree.parse(f)
-        f.close()
-    else:
-        if not slug:
-            raise ValueError('either slug or file_path should be specified')
-        input_xml = etree.parse(provider[slug])
-
-    metadata = input_xml.find('.//'+RDFNS('Description'))
-    if metadata is None:
-        raise NoDublinCore('Document has no DublinCore - which is required.')
-    book_info = BookInfo.from_element(input_xml)
-
-    # if output to dir, create the file
-    if output_dir is not None:
-        if make_dir:
-            author = unicode(book_info.author)
-            output_dir = os.path.join(output_dir, author)
-            try:
-                os.makedirs(output_dir)
-            except OSError:
-                pass
-        if slug:
-            output_file = os.path.join(output_dir, '%s.mobi' % slug)
-        else:
-            output_file = os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.mobi')
+    book_info = wldoc.book_info
  
      # provide a cover by default
      if not cover:
  
      # provide a cover by default
      if not cover:
@@ -66,19 +31,21 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
      c = cover(book_info.author.readable(), book_info.title)
      c.save(cover_file)
  
      c = cover(book_info.author.readable(), book_info.title)
      c.save(cover_file)
  
-    epub_file = NamedTemporaryFile(suffix='.epub', delete=False)
      if not flags:
          flags = []
      flags = list(flags) + ['without-fonts']
      if not flags:
          flags = []
      flags = list(flags) + ['without-fonts']
-    epub.transform(provider, file_path=file_path, output_file=epub_file, verbose=verbose,
-              sample=sample, html_toc=True, flags=flags, style=get_resource('mobi/style.css'))
+    epub = wldoc.as_epub(verbose=verbose, sample=sample, html_toc=True,
+            flags=flags, style=get_resource('mobi/style.css'))
  
      if verbose:
          kwargs = {}
      else:
          devnull = open("/dev/null", 'w')
          kwargs = {"stdout": devnull, "stderr": devnull}
  
      if verbose:
          kwargs = {}
      else:
          devnull = open("/dev/null", 'w')
          kwargs = {"stdout": devnull, "stderr": devnull}
-    subprocess.check_call(['ebook-convert', epub_file.name, output_file,
+
+    output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False)
+    output_file.close()
+    subprocess.check_call(['ebook-convert', epub.get_filename(), output_file.name,
              '--no-inline-toc', '--cover=%s' % cover_file.name], **kwargs)
              '--no-inline-toc', '--cover=%s' % cover_file.name], **kwargs)
-    os.unlink(epub_file.name)
      os.unlink(cover_file.name)
      os.unlink(cover_file.name)
+    return OutputFile.from_filename(output_file.name)
+\ No newline at end of file
diff --git a/librarian/packagers.py b/librarian/packagers.py

index 054f068..ebeb5b3 100644 (file)
--- a/librarian/packagers.py
+++ b/librarian/packagers.py
@@ -6,8 +6,8 @@
  import os
  from copy import deepcopy
  from lxml import etree
  import os
  from copy import deepcopy
  from lxml import etree
-from librarian import epub, pdf, DirDocProvider, ParseError, cover
-from librarian.dcparser import BookInfo
+from librarian import pdf, epub, DirDocProvider, ParseError, cover
+from librarian.parser import WLDocument
  
  
  class Packager(object):
  
  
  class Packager(object):
@@ -26,8 +26,11 @@ class Packager(object):
              except:
                  pass
          outfile = os.path.join(output_dir, slug + '.' + cls.ext)
              except:
                  pass
          outfile = os.path.join(output_dir, slug + '.' + cls.ext)
-        cls.converter.transform(provider, file_path=main_input, output_file=outfile,
+
+        doc = WLDocument.from_file(main_input, provider=provider)
+        output_file = cls.converter.transform(doc,
                  cover=cls.cover, flags=cls.flags)
                  cover=cls.cover, flags=cls.flags)
+        doc.save_output_file(output_file, output_path=outfile)
  
  
      @classmethod
  
  
      @classmethod
@@ -78,7 +81,6 @@ class VirtualoEpubPackager(Packager):
          """ truncates text to at most `limit' bytes in utf-8 """
          if text is None:
              return text
          """ truncates text to at most `limit' bytes in utf-8 """
          if text is None:
              return text
-        orig_text = text
          if len(text.encode('utf-8')) > limit:
              newlimit = limit - 3
              while len(text.encode('utf-8')) > newlimit:
          if len(text.encode('utf-8')) > limit:
              newlimit = limit - 3
              while len(text.encode('utf-8')) > newlimit:
@@ -116,7 +118,8 @@ class VirtualoEpubPackager(Packager):
                  outfile_dir = os.path.join(output_dir, slug)
                  os.makedirs(os.path.join(output_dir, slug))
  
                  outfile_dir = os.path.join(output_dir, slug)
                  os.makedirs(os.path.join(output_dir, slug))
  
-                info = BookInfo.from_file(main_input)
+                doc = WLDocument.from_file(main_input, provider=provider)
+                info = doc.book_info
  
                  product_elem = deepcopy(product)
                  product_elem[0].text = cls.utf_trunc(slug, 100)
  
                  product_elem = deepcopy(product)
                  product_elem[0].text = cls.utf_trunc(slug, 100)
@@ -133,8 +136,10 @@ class VirtualoEpubPackager(Packager):
                      ).save(os.path.join(outfile_dir, slug+'.jpg'))
                  outfile = os.path.join(outfile_dir, '1.epub')
                  outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
                      ).save(os.path.join(outfile_dir, slug+'.jpg'))
                  outfile = os.path.join(outfile_dir, '1.epub')
                  outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
-                epub.transform(provider, file_path=main_input, output_file=outfile)
-                epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
+                doc.save_output_file(epub.transform(doc),
+                        output_path=outfile)
+                doc.save_output_file(epub.transform(doc, sample=25), 
+                        output_path=outfile_sample)
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': main_input,
diff --git a/librarian/parser.py b/librarian/parser.py

index afc4f1a..469b7df 100644 (file)
--- a/librarian/parser.py
+++ b/librarian/parser.py
@@ -3,7 +3,7 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from librarian import ValidationError, NoDublinCore,  ParseError
+from librarian import ValidationError, NoDublinCore,  ParseError, NoProvider
  from librarian import RDFNS
  from librarian import dcparser
  
  from librarian import RDFNS
  from librarian import dcparser
  
@@ -11,14 +11,17 @@ from xml.parsers.expat import ExpatError
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
  
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
  
+import os
  import re
  from StringIO import StringIO
  
  class WLDocument(object):
  import re
  from StringIO import StringIO
  
  class WLDocument(object):
-    LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
+    LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
+    provider = None
  
  
-    def __init__(self, edoc, parse_dublincore=True):
+    def __init__(self, edoc, parse_dublincore=True, provider=None):
          self.edoc = edoc
          self.edoc = edoc
+        self.provider = provider
  
          root_elem = edoc.getroot()
  
  
          root_elem = edoc.getroot()
  
@@ -42,7 +45,7 @@ class WLDocument(object):
          return cls.from_file(StringIO(xml), *args, **kwargs)
  
      @classmethod
          return cls.from_file(StringIO(xml), *args, **kwargs)
  
      @classmethod
-    def from_file(cls, xmlfile, swap_endlines=False, parse_dublincore=True):
+    def from_file(cls, xmlfile, parse_dublincore=True, provider=None):
  
          # first, prepare for parsing
          if isinstance(xmlfile, basestring):
  
          # first, prepare for parsing
          if isinstance(xmlfile, basestring):
@@ -63,20 +66,17 @@ class WLDocument(object):
              parser = etree.XMLParser(remove_blank_text=False)
              tree = etree.parse(StringIO(data.encode('utf-8')), parser)
  
              parser = etree.XMLParser(remove_blank_text=False)
              tree = etree.parse(StringIO(data.encode('utf-8')), parser)
  
-            if swap_endlines:
-                cls.swap_endlines(tree)
-
-            return cls(tree, parse_dublincore=parse_dublincore)
+            return cls(tree, parse_dublincore=parse_dublincore, provider=provider)
          except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
              raise ParseError(e)
  
          except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
              raise ParseError(e)
  
-    @classmethod
-    def swap_endlines(cls, tree):
+    def swap_endlines(self):
+        """Converts line breaks in stanzas into <br/> tags."""
          # only swap inside stanzas
          # only swap inside stanzas
-        for elem in tree.iter('strofa'):
+        for elem in self.edoc.iter('strofa'):
              for child in list(elem):
                  if child.tail:
              for child in list(elem):
                  if child.tail:
-                    chunks = cls.LINE_SWAP_EXPR.split(child.tail)
+                    chunks = self.LINE_SWAP_EXPR.split(child.tail)
                      ins_index = elem.index(child) + 1
                      while len(chunks) > 1:
                          ins = etree.Element('br')
                      ins_index = elem.index(child) + 1
                      while len(chunks) > 1:
                          ins = etree.Element('br')
@@ -84,13 +84,22 @@ class WLDocument(object):
                          elem.insert(ins_index, ins)
                      child.tail = chunks.pop(0)
              if elem.text:
                          elem.insert(ins_index, ins)
                      child.tail = chunks.pop(0)
              if elem.text:
-                chunks = cls.LINE_SWAP_EXPR.split(elem.text)
+                chunks = self.LINE_SWAP_EXPR.split(elem.text)
                  while len(chunks) > 1:
                      ins = etree.Element('br')
                      ins.tail = chunks.pop()
                      elem.insert(0, ins)
                  elem.text = chunks.pop(0)
  
                  while len(chunks) > 1:
                      ins = etree.Element('br')
                      ins.tail = chunks.pop()
                      elem.insert(0, ins)
                  elem.text = chunks.pop(0)
  
+    def parts(self):
+        if self.provider is None:
+            raise NoProvider('No document provider supplied.')
+        if self.book_info is None:
+            raise NoDublinCore('No Dublin Core in document.')
+        for part_uri in self.book_info.parts:
+            yield self.from_file(self.provider.by_uri(part_uri),
+                    provider=self.provider)
+
      def chunk(self, path):
          # convert the path to XPath
          expr = self.path_to_xpath(path)
      def chunk(self, path):
          # convert the path to XPath
          expr = self.path_to_xpath(path)
@@ -152,3 +161,40 @@ class WLDocument(object):
              node.clear()
              node.tag = 'span'
              node.tail = tail
              node.clear()
              node.tag = 'span'
              node.tail = tail
+
+    # Converters
+
+    def as_html(self, *args, **kwargs):
+        from librarian import html
+        return html.transform(self, *args, **kwargs)
+
+    def as_text(self, *args, **kwargs):
+        from librarian import text
+        return text.transform(self, *args, **kwargs)
+
+    def as_epub(self, *args, **kwargs):
+        from librarian import epub
+        return epub.transform(self, *args, **kwargs)
+
+    def as_pdf(self, *args, **kwargs):
+        from librarian import pdf
+        return pdf.transform(self, *args, **kwargs)
+
+    def as_mobi(self, *args, **kwargs):
+        from librarian import mobi
+        return mobi.transform(self, *args, **kwargs)
+
+    def save_output_file(self, output_file, output_path=None,
+            output_dir_path=None, make_author_dir=False, ext=None):
+        if output_dir_path:
+            save_path = output_dir_path
+            if make_author_dir:
+                save_path = os.path.join(save_path,
+                        unicode(self.book_info.author).encode('utf-8'))
+            save_path = os.path.join(save_path, self.book_info.slug)
+            if ext:
+                save_path += '.%s' % ext
+        else:
+            save_path = output_path
+
+        output_file.save_as(save_path)
diff --git a/librarian/pdf.py b/librarian/pdf.py

index 1bfd949..02438a6 100644 (file)
--- a/librarian/pdf.py
+++ b/librarian/pdf.py
@@ -8,20 +8,18 @@ import os
  import os.path
  import shutil
  from StringIO import StringIO
  import os.path
  import shutil
  from StringIO import StringIO
-from tempfile import mkdtemp
+from tempfile import mkdtemp, NamedTemporaryFile
  import re
  from copy import deepcopy
  from subprocess import call, PIPE
  
  import re
  from copy import deepcopy
  from subprocess import call, PIPE
  
-import sys
-
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource
+from librarian import ParseError, DCNS, get_resource, OutputFile
  from librarian import functions
  
  
  from librarian import functions
  
  
@@ -173,17 +171,11 @@ def package_available(package, args='', verbose=False):
      return p == 0
  
  
      return p == 0
  
  
-def transform(provider, slug=None, file_path=None,
-              output_file=None, output_dir=None, make_dir=False, verbose=False, save_tex=None, morefloats=None,
+def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
                cover=None, flags=None, customizations=None):
      """ produces a PDF file with XeLaTeX
  
                cover=None, flags=None, customizations=None):
      """ produces a PDF file with XeLaTeX
  
-    provider: a DocProvider
-    slug: slug of file to process, available by provider
-    file_path can be provided instead of a slug
-    output_file: file-like object or path to output file
-    output_dir: path to directory to save output file to; either this or output_file must be present
-    make_dir: writes output to <output_dir>/<author>/<slug>.pdf istead of <output_dir>/<slug>.pdf
+    wldoc: a WLDocument
      verbose: prints all output from LaTeX
      save_tex: path to save the intermediary LaTeX file to
      morefloats (old/new/none): force specific morefloats
      verbose: prints all output from LaTeX
      save_tex: path to save the intermediary LaTeX file to
      morefloats (old/new/none): force specific morefloats
@@ -194,14 +186,7 @@ def transform(provider, slug=None, file_path=None,
  
      # Parse XSLT
      try:
  
      # Parse XSLT
      try:
-        if file_path:
-            if slug:
-                raise ValueError('slug or file_path should be specified, not both')
-            document = load_including_children(provider, file_path=file_path)
-        else:
-            if not slug:
-                raise ValueError('either slug or file_path should be specified')
-            document = load_including_children(provider, slug=slug)
+        document = load_including_children(wldoc)
  
          if cover:
              document.edoc.getroot().set('data-cover-width', str(cover.width))
  
          if cover:
              document.edoc.getroot().set('data-cover-width', str(cover.width))
@@ -227,11 +212,6 @@ def transform(provider, slug=None, file_path=None,
          substitute_hyphens(document.edoc)
          fix_hanging(document.edoc)
  
          substitute_hyphens(document.edoc)
          fix_hanging(document.edoc)
  
-        # find output dir
-        if make_dir and output_dir is not None:
-            author = unicode(document.book_info.author)
-            output_dir = os.path.join(output_dir, author)
-
          # wl -> TeXML
          style_filename = get_stylesheet("wl2tex")
          style = etree.parse(style_filename)
          # wl -> TeXML
          style_filename = get_stylesheet("wl2tex")
          style = etree.parse(style_filename)
@@ -273,56 +253,38 @@ def transform(provider, slug=None, file_path=None,
  
          os.chdir(cwd)
  
  
          os.chdir(cwd)
  
-        # save the PDF
+        output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
          pdf_path = os.path.join(temp, 'doc.pdf')
          pdf_path = os.path.join(temp, 'doc.pdf')
-        if output_dir is not None:
-            try:
-                os.makedirs(output_dir)
-            except OSError:
-                pass
-            if slug:
-                output_path = os.path.join(output_dir, '%s.pdf' % slug)
-            else:
-                output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.pdf')
-            shutil.move(pdf_path, output_path)
-        else:
-            if hasattr(output_file, 'write'):
-                # file-like object
-                with open(pdf_path) as f:
-                    output_file.write(f.read())
-                output_file.close()
-            else:
-                # path to output file
-                shutil.copy(pdf_path, output_file)
+        shutil.move(pdf_path, output_file.name)
          shutil.rmtree(temp)
          shutil.rmtree(temp)
+        return OutputFile.from_filename(output_file.name)
  
      except (XMLSyntaxError, XSLTApplyError), e:
          raise ParseError(e)
  
  
  
      except (XMLSyntaxError, XSLTApplyError), e:
          raise ParseError(e)
  
  
-def load_including_children(provider, slug=None, uri=None, file_path=None):
-    """ makes one big xml file with children inserted at end
-    either slug or uri must be provided
+def load_including_children(wldoc=None, provider=None, uri=None):
+    """ Makes one big xml file with children inserted at end.
+    
+    Either wldoc or provider and URI must be provided.
      """
  
      """
  
-    if uri:
+    if uri and provider:
          f = provider.by_uri(uri)
          f = provider.by_uri(uri)
-    elif slug:
-        f = provider[slug]
-    elif file_path:
-        f = open(file_path, 'r')
+        text = f.read().decode('utf-8')
+        f.close()
+    elif wldoc is not None:
+        text = etree.tostring(wldoc.edoc, encoding=unicode)
+        provider = wldoc.provider
      else:
      else:
-        raise ValueError('Neither slug, URI nor file path provided for a book.')
+        raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
  
  
-    text = f.read().decode('utf-8')
      text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
  
      text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
  
-    document = WLDocument.from_string(text, True,
-        parse_dublincore=True)
+    document = WLDocument.from_string(text, parse_dublincore=True)
+    document.swap_endlines()
  
  
-    f.close()
      for child_uri in document.book_info.parts:
      for child_uri in document.book_info.parts:
-        print child_uri
-        child = load_including_children(provider, uri=child_uri)
+        child = load_including_children(provider=provider, uri=child_uri)
          document.edoc.getroot().append(child.edoc.getroot())
      return document
          document.edoc.getroot().append(child.edoc.getroot())
      return document
diff --git a/librarian/text.py b/librarian/text.py

index c23bcd6..d99e7cf 100644 (file)
--- a/librarian/text.py
+++ b/librarian/text.py
@@ -3,7 +3,8 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from librarian import dcparser, parser, functions
+import copy
+from librarian import functions, OutputFile
  from lxml import etree
  import os
  
  from lxml import etree
  import os
  
@@ -28,7 +29,7 @@ Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowocz
  %(description)s%(contributors)s
  """
  
  %(description)s%(contributors)s
  """
  
-def transform(input_file, output_file, parse_dublincore=True, flags=None, **options):
+def transform(wldoc, flags=None, **options):
      """
      Transforms input_file in XML to output_file in TXT.
      possible flags: raw-text,
      """
      Transforms input_file in XML to output_file in TXT.
      possible flags: raw-text,
@@ -37,7 +38,9 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti
      style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt')
      style = etree.parse(style_filename)
  
      style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt')
      style = etree.parse(style_filename)
  
-    document = parser.WLDocument.from_file(input_file, True, parse_dublincore=parse_dublincore)
+    document = copy.deepcopy(wldoc)
+    del wldoc
+    document.swap_endlines()
  
      if flags:
          for flag in flags:
  
      if flags:
          for flag in flags:
@@ -46,10 +49,10 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti
      result = document.transform(style, **options)
  
      if not flags or 'raw-text' not in flags:
      result = document.transform(style, **options)
  
      if not flags or 'raw-text' not in flags:
-        if parse_dublincore:
-            parsed_dc = dcparser.BookInfo.from_element(document.edoc)
+        if document.book_info:
+            parsed_dc = document.book_info
              description = parsed_dc.description
              description = parsed_dc.description
-            url = parsed_dc.url
+            url = document.book_info.url
      
              license_description = parsed_dc.license_description
              license = parsed_dc.license
      
              license_description = parsed_dc.license_description
              license = parsed_dc.license
@@ -75,7 +78,7 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti
              license_description = ""
              source = ""
              contributors = ""
              license_description = ""
              source = ""
              contributors = ""
-        output_file.write((TEMPLATE % {
+        return OutputFile.from_string((TEMPLATE % {
              'description': description,
              'url': url,
              'license_description': license_description,
              'description': description,
              'url': url,
              'license_description': license_description,
@@ -84,5 +87,5 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti
              'contributors': contributors,
          }).encode('utf-8'))
      else:
              'contributors': contributors,
          }).encode('utf-8'))
      else:
-        output_file.write(unicode(result).encode('utf-8'))
+        return OutputFile.from_string(unicode(result).encode('utf-8'))
  
  
diff --git a/scripts/book2epub b/scripts/book2epub

index 9adf4b4..9af3692 100755 (executable)
--- a/scripts/book2epub
+++ b/scripts/book2epub
@@ -7,7 +7,8 @@
  import os.path
  import optparse
  
  import os.path
  import optparse
  
-from librarian import epub, DirDocProvider, ParseError
+from librarian import DirDocProvider, ParseError
+from librarian.parser import WLDocument
  
  
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
@@ -37,18 +38,20 @@ if __name__ == '__main__':
          for main_input in input_filenames:
              if options.verbose:
                  print main_input
          for main_input in input_filenames:
              if options.verbose:
                  print main_input
+
              path, fname = os.path.realpath(main_input).rsplit('/', 1)
              provider = DirDocProvider(path)
              path, fname = os.path.realpath(main_input).rsplit('/', 1)
              provider = DirDocProvider(path)
-
-            output_dir = output_file = None
-            if options.output_dir:
-                output_dir = options.output_dir
-            elif options.output_file:
-                output_file = options.output_file
+            if not (options.output_file or options.output_dir):
+                output_file = os.path.splitext(main_input)[0] + '.epub'
              else:
              else:
-                output_dir = path
+                output_file = None
+
+            doc = WLDocument.from_file(main_input, provider=provider)
+            epub = doc.as_epub()
+
+            doc.save_output_file(epub,
+                output_file, options.output_dir, options.make_dir, 'epub')
  
  
-            epub.transform(provider, file_path=main_input, output_dir=output_dir, output_file=output_file, make_dir=options.make_dir)
      except ParseError, e:
          print '%(file)s:%(name)s:%(message)s' % {
              'file': main_input,
      except ParseError, e:
          print '%(file)s:%(name)s:%(message)s' % {
              'file': main_input,
diff --git a/scripts/book2html b/scripts/book2html

index d61b299..1e88823 100755 (executable)
--- a/scripts/book2html
+++ b/scripts/book2html
@@ -7,7 +7,8 @@
  import os
  import optparse
  
  import os
  import optparse
  
-from librarian import html, ParseError
+from librarian import ParseError
+from librarian.parser import WLDocument
  
  
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
@@ -35,7 +36,10 @@ if __name__ == '__main__':
  
          output_filename = os.path.splitext(input_filename)[0] + '.html'
          try:
  
          output_filename = os.path.splitext(input_filename)[0] + '.html'
          try:
-            html.transform(input_filename, output_filename, parse_dublincore=options.parse_dublincore, flags=('full-page',))
+            doc = WLDocument.from_file(input_filename,
+                parse_dublincore=options.parse_dublincore)
+            html = doc.as_html(flags=('full-page',))
+            doc.save_output_file(html, output_path=output_filename)
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': input_filename,
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': input_filename,
diff --git a/scripts/book2ihtml b/scripts/book2ihtml

index 97d8ebd..779f245 100755 (executable)
--- a/scripts/book2ihtml
+++ b/scripts/book2ihtml
@@ -7,7 +7,8 @@
  import os
  import optparse
  
  import os
  import optparse
  
-from librarian import html, ParseError
+from librarian import ParseError
+from librarian.parser import WLDocument
  
  
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
@@ -35,8 +36,10 @@ if __name__ == '__main__':
  
          output_filename = os.path.splitext(input_filename)[0] + '.html'
          try:
  
          output_filename = os.path.splitext(input_filename)[0] + '.html'
          try:
-            html.transform(input_filename, output_filename, parse_dublincore=options.parse_dublincore,\
-                stylesheet='partial')
+            doc = WLDocument.from_file(input_filename,
+                parse_dublincore=options.parse_dublincore)
+            html = doc.as_html(flags=('full-page',), stylesheet='partial')
+            doc.save_output_file(html, output_path=output_filename)
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': input_filename,
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': input_filename,
diff --git a/scripts/book2mobi b/scripts/book2mobi

index 1c00b51..665dcfa 100755 (executable)
--- a/scripts/book2mobi
+++ b/scripts/book2mobi
@@ -7,7 +7,8 @@
  import os.path
  import optparse
  
  import os.path
  import optparse
  
-from librarian import mobi, DirDocProvider, ParseError
+from librarian import DirDocProvider, ParseError
+from librarian.parser import WLDocument
  
  
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
@@ -35,20 +36,18 @@ if __name__ == '__main__':
      # Do some real work
      try:
          for main_input in input_filenames:
      # Do some real work
      try:
          for main_input in input_filenames:
-            if options.verbose:
-                print main_input
              path, fname = os.path.realpath(main_input).rsplit('/', 1)
              provider = DirDocProvider(path)
              path, fname = os.path.realpath(main_input).rsplit('/', 1)
              provider = DirDocProvider(path)
-
-            output_dir = output_file = None
-            if options.output_dir:
-                output_dir = options.output_dir
-            elif options.output_file:
-                output_file = options.output_file
+            if not (options.output_file or options.output_dir):
+                output_file = os.path.splitext(main_input)[0] + '.mobi'
              else:
              else:
-                output_dir = path
+                output_file = None
+
+            doc = WLDocument.from_file(main_input, provider=provider)
+            mobi = doc.as_mobi()
  
  
-            mobi.transform(provider, file_path=main_input, output_dir=output_dir, output_file=output_file, make_dir=options.make_dir)
+            doc.save_output_file(mobi,
+                output_file, options.output_dir, options.make_dir, 'mobi')
      except ParseError, e:
          print '%(file)s:%(name)s:%(message)s' % {
              'file': main_input,
      except ParseError, e:
          print '%(file)s:%(name)s:%(message)s' % {
              'file': main_input,
diff --git a/scripts/book2pdf b/scripts/book2pdf

index d10f400..171264b 100755 (executable)
--- a/scripts/book2pdf
+++ b/scripts/book2pdf
@@ -6,7 +6,10 @@
  #
  import os.path
  from optparse import OptionParser
  #
  import os.path
  from optparse import OptionParser
-from librarian import pdf, DirDocProvider, ParseError
+
+from librarian import DirDocProvider, ParseError
+from librarian.parser import WLDocument
+
  
  if __name__ == '__main__':
      usage = """Usage: %prog [options] SOURCE [SOURCE...]
  
  if __name__ == '__main__':
      usage = """Usage: %prog [options] SOURCE [SOURCE...]
@@ -31,33 +34,25 @@ if __name__ == '__main__':
          parser.print_help()
          exit(1)
  
          parser.print_help()
          exit(1)
  
-    try:
-        if options.output_dir and options.output_file:
-            raise ValueError("Either --output-dir or --output file should be specified")
+    if options.output_dir and options.output_file:
+        raise ValueError("Either --output-dir or --output file should be specified")
  
  
+    try:
          for main_input in args:
          for main_input in args:
-            if options.verbose:
-                print main_input
              path, fname = os.path.realpath(main_input).rsplit('/', 1)
              provider = DirDocProvider(path)
              path, fname = os.path.realpath(main_input).rsplit('/', 1)
              provider = DirDocProvider(path)
-
-            output_file = output_dir = None
-            if options.output_dir:
-                output_dir = options.output_dir
-            elif options.output_file:
-                output_file = options.output_file
+            output_file, output_dir = options.output_file, options.output_dir
+            if not (options.output_file or options.output_dir):
+                output_file = os.path.splitext(main_input)[0] + '.pdf'
              else:
              else:
-                output_dir = path
+                output_file = None
+
+            doc = WLDocument.from_file(main_input, provider=provider)
+            pdf = doc.as_pdf(save_tex=options.save_tex,
+                        morefloats=options.morefloats)
  
  
-            pdf.transform(provider,
-                file_path=main_input,
-                output_file=output_file,
-                output_dir=output_dir,
-                verbose=options.verbose,
-                make_dir=options.make_dir,
-                save_tex=options.save_tex,
-                morefloats=options.morefloats
-                )
+            doc.save_output_file(pdf,
+                output_file, options.output_dir, options.make_dir, 'pdf')
      except ParseError, e:
          print '%(file)s:%(name)s:%(message)s; use -v to see more output' % {
              'file': main_input,
      except ParseError, e:
          print '%(file)s:%(name)s:%(message)s; use -v to see more output' % {
              'file': main_input,
diff --git a/scripts/book2txt b/scripts/book2txt

index d56d6ff..9cfdef2 100755 (executable)
--- a/scripts/book2txt
+++ b/scripts/book2txt
@@ -7,8 +7,8 @@
  import os
  import optparse
  
  import os
  import optparse
  
-from librarian import text
-from librarian import dcparser, ParseError
+from librarian import ParseError
+from librarian.parser import WLDocument
  
  
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
@@ -38,9 +38,10 @@ if __name__ == '__main__':
  
          output_filename = os.path.splitext(input_filename)[0] + '.txt'
          try:
  
          output_filename = os.path.splitext(input_filename)[0] + '.txt'
          try:
-            output_file = open(output_filename, 'w')
-            text.transform(open(input_filename), output_file, parse_dublincore=options.parse_dublincore,
-                wrapping=str(options.wrapping))
+            doc = WLDocument.from_file(input_filename,
+                parse_dublincore=options.parse_dublincore)
+            html = doc.as_text(wrapping=str(options.wrapping))
+            doc.save_output_file(html, output_path=output_filename)
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': input_filename,
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
                  'file': input_filename,
diff --git a/setup.py b/setup.py

index 1394643..023c943 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@ def whole_tree(prefix, path):
  
  setup(
      name='librarian',
  
  setup(
      name='librarian',
-    version='1.3',
+    version='1.4',
      description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
      author="Marek Stępniowski",
      author_email='marek@stepniowski.com',
      description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
      author="Marek Stępniowski",
      author_email='marek@stepniowski.com',
diff --git a/tests/files/text/asnyk_miedzy_nami.xml b/tests/files/text/asnyk_miedzy_nami.xml

deleted file mode 100644 (file)

index 36d8df6..0000000
--- a/tests/files/text/asnyk_miedzy_nami.xml
+++ /dev/null
@@ -1,65 +0,0 @@
-<?xml version='1.0' encoding='utf-8'?>
-<utwor>
-  <liryka_lp>
-
-<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
-<rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Asnyk/Między_nami_nic_nie_było">
-<dc:creator xml:lang="pl">Asnyk, Adam</dc:creator>
-<dc:title xml:lang="pl">Między nami nic nie było</dc:title>
-<dc:contributor.editor xml:lang="pl" />
-<dc:contributor.editor xml:lang="pl">Sekuła, Aleksandra</dc:contributor.editor>
-<dc:contributor.technical_editor xml:lang="pl">Sutkowska, Olga</dc:contributor.technical_editor>
-<dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
-<dc:subject.period xml:lang="pl">Pozytywizm</dc:subject.period>
-<dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
-<dc:subject.genre xml:lang="pl">Wiersz</dc:subject.genre>
-<dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
-<dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo</dc:identifier.url>
-<dc:source.URL xml:lang="pl">http://www.polona.pl/Content/5164</dc:source.URL>
-<dc:source xml:lang="pl">(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898</dc:source>
-<dc:rights xml:lang="pl">Domena publiczna - Adam Asnyk zm. 1897</dc:rights>
-<dc:date.pd xml:lang="pl">1897</dc:date.pd>
-<dc:format xml:lang="pl">xml</dc:format>
-<dc:type xml:lang="pl">text</dc:type>
-<dc:type xml:lang="en">text</dc:type>
-<dc:date xml:lang="pl">2007-09-06</dc:date>
-<dc:audience xml:lang="pl">L</dc:audience>
-<dc:language xml:lang="pl">pol</dc:language>
-</rdf:Description>
-</rdf:RDF>
-
-
-<autor_utworu>Adam Asnyk</autor_utworu>
-
-<nazwa_utworu><begin id="b1189062500041"/><motyw id="m1189062500041">Miłość platoniczna</motyw>Między nami nic nie było</nazwa_utworu>
-
-
-
-<strofa>Między nami nic nie było!/
-Żadnych zwierzeń, wyznań żadnych!/
-Nic nas z sobą nie łączyło ---/
-Prócz wiosennych marzeń zdradnych;</strofa>
-
-
-
-<strofa><begin id="b1189062528872"/><motyw id="m1189062528872">Natura</motyw>Prócz tych woni, barw i blasków,/
-Unoszących się w przestrzeni;/
-Prócz szumiących śpiewem lasków/
-I tej świeżej łąk zieleni;</strofa>
-
-
-
-<strofa>Prócz tych kaskad i potoków,/
-Zraszających każdy parów,/
-Prócz girlandy tęcz, obłoków,/
-Prócz natury słodkich czarów;</strofa>
-
-
-
-<strofa>Prócz tych wspólnych, jasnych zdrojów,/
-Z których serce zachwyt piło;/
-Prócz pierwiosnków i powojów,---/
-Między nami nic nie było!<end id="e1189062528872"/><end id="e1189062500041"/></strofa>
-
-</liryka_lp>
-</utwor>
diff --git a/tests/files/text/asnyk_zbior.xml b/tests/files/text/asnyk_zbior.xml

new file mode 100755 (executable)

index 0000000..c585a8b
--- /dev/null
+++ b/tests/files/text/asnyk_zbior.xml
@@ -0,0 +1,29 @@
+<?xml version='1.0' encoding='utf-8'?>
+<utwor>
+
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<rdf:Description rdf:about="http://redakcja.wolnelektury.pl/documents/book/asnyk-poezye/">
+<dc:creator xml:lang="pl">Asnyk, Adam</dc:creator>
+<dc:title xml:lang="pl">Poezye</dc:title>
+<dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+<dc:subject.period xml:lang="pl">Pozytywizm</dc:subject.period>
+<dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
+<dc:subject.genre xml:lang="pl">Wiersz</dc:subject.genre>
+<dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+<dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/poezye</dc:identifier.url>
+<dc:relation.hasPart xml:lang="pl">http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo</dc:relation.hasPart>
+<dc:source.URL xml:lang="pl">http://www.polona.pl/Content/5164</dc:source.URL>
+<dc:source xml:lang="pl">(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898</dc:source>
+<dc:rights xml:lang="pl">Domena publiczna - Adam Asnyk zm. 1897</dc:rights>
+<dc:date.pd xml:lang="pl">1897</dc:date.pd>
+<dc:format xml:lang="pl">xml</dc:format>
+<dc:type xml:lang="pl">text</dc:type>
+<dc:type xml:lang="en">text</dc:type>
+<dc:date xml:lang="pl">2007-09-06</dc:date>
+<dc:audience xml:lang="pl">L</dc:audience>
+<dc:language xml:lang="pl">pol</dc:language>
+</rdf:Description>
+</rdf:RDF>
+
+
+</utwor>
diff --git a/tests/files/text/miedzy-nami-nic-nie-bylo.xml b/tests/files/text/miedzy-nami-nic-nie-bylo.xml

new file mode 100644 (file)

index 0000000..124940e
--- /dev/null
+++ b/tests/files/text/miedzy-nami-nic-nie-bylo.xml
@@ -0,0 +1,65 @@
+<?xml version='1.0' encoding='utf-8'?>
+<utwor>
+  <liryka_lp>
+
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<rdf:Description rdf:about="http://redakcja.wolnelektury.pl/documents/book/miedzy-nami-nic-nie-bylo/">
+<dc:creator xml:lang="pl">Asnyk, Adam</dc:creator>
+<dc:title xml:lang="pl">Między nami nic nie było</dc:title>
+<dc:contributor.editor xml:lang="pl" />
+<dc:contributor.editor xml:lang="pl">Sekuła, Aleksandra</dc:contributor.editor>
+<dc:contributor.technical_editor xml:lang="pl">Sutkowska, Olga</dc:contributor.technical_editor>
+<dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+<dc:subject.period xml:lang="pl">Pozytywizm</dc:subject.period>
+<dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
+<dc:subject.genre xml:lang="pl">Wiersz</dc:subject.genre>
+<dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+<dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo</dc:identifier.url>
+<dc:source.URL xml:lang="pl">http://www.polona.pl/Content/5164</dc:source.URL>
+<dc:source xml:lang="pl">(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898</dc:source>
+<dc:rights xml:lang="pl">Domena publiczna - Adam Asnyk zm. 1897</dc:rights>
+<dc:date.pd xml:lang="pl">1897</dc:date.pd>
+<dc:format xml:lang="pl">xml</dc:format>
+<dc:type xml:lang="pl">text</dc:type>
+<dc:type xml:lang="en">text</dc:type>
+<dc:date xml:lang="pl">2007-09-06</dc:date>
+<dc:audience xml:lang="pl">L</dc:audience>
+<dc:language xml:lang="pl">pol</dc:language>
+</rdf:Description>
+</rdf:RDF>
+
+
+<autor_utworu>Adam Asnyk</autor_utworu>
+
+<nazwa_utworu><begin id="b1189062500041"/><motyw id="m1189062500041">Miłość platoniczna</motyw>Między nami nic nie było</nazwa_utworu>
+
+
+
+<strofa>Między nami nic nie było!/
+Żadnych zwierzeń, wyznań żadnych!/
+Nic nas z sobą nie łączyło ---/
+Prócz wiosennych marzeń zdradnych;</strofa>
+
+
+
+<strofa><begin id="b1189062528872"/><motyw id="m1189062528872">Natura</motyw>Prócz tych woni, barw i blasków,/
+Unoszących się w przestrzeni;/
+Prócz szumiących śpiewem lasków/
+I tej świeżej łąk zieleni;</strofa>
+
+
+
+<strofa>Prócz tych kaskad i potoków,/
+Zraszających każdy parów,/
+Prócz girlandy tęcz, obłoków,/
+Prócz natury słodkich czarów;</strofa>
+
+
+
+<strofa>Prócz tych wspólnych, jasnych zdrojów,/
+Z których serce zachwyt piło;/
+Prócz pierwiosnków i powojów,---/
+Między nami nic nie było!<end id="e1189062528872"/><end id="e1189062500041"/></strofa>
+
+</liryka_lp>
+</utwor>
diff --git a/tests/test_epub.py b/tests/test_epub.py

new file mode 100644 (file)

index 0000000..9fc5637
--- /dev/null
+++ b/tests/test_epub.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from librarian import DirDocProvider
+from librarian.parser import WLDocument
+from nose.tools import *
+from utils import get_fixture
+
+
+def test_transform():
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_zbior.xml'),
+            provider=DirDocProvider(get_fixture('text', ''))
+        ).as_epub(flags=['without_fonts'])
diff --git a/tests/test_html.py b/tests/test_html.py

index 5187e06..51d6acd 100644 (file)
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -3,44 +3,38 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from librarian import html, NoDublinCore
+from librarian import NoDublinCore
+from librarian.parser import WLDocument
  from nose.tools import *
  from nose.tools import *
-from utils import get_fixture, remove_output_file
+from utils import get_fixture
  
  
-def teardown_transform():
-    remove_output_file('text', 'asnyk_miedzy_nami.html')
  
  
-
-@with_setup(None, teardown_transform)
  def test_transform():
  def test_transform():
-    output_file_path = get_fixture('text', 'asnyk_miedzy_nami.html')
      expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html')
  
      expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html')
  
-    html.transform(
-        get_fixture('text', 'asnyk_miedzy_nami.xml'),
-        output_file_path,
-    )
+    html = WLDocument.from_file(
+            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+        ).as_html().get_string()
  
  
-    assert_equal(file(output_file_path).read(), file(expected_output_file_path).read())
+    assert_equal(html, file(expected_output_file_path).read())
  
  
  
  
-@with_setup(None, teardown_transform)
  @raises(NoDublinCore)
  def test_no_dublincore():
  @raises(NoDublinCore)
  def test_no_dublincore():
-    html.transform(
-        get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
-        get_fixture('text', 'asnyk_miedzy_nami.html'),
-    )
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
+        ).as_html()
  
  
  
  
-@with_setup(None, teardown_transform)
  def test_passing_parse_dublincore_to_transform():
      """Passing parse_dublincore=False to transform omits DublinCore parsing."""
  def test_passing_parse_dublincore_to_transform():
      """Passing parse_dublincore=False to transform omits DublinCore parsing."""
-    html.transform(
-        get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
-        get_fixture('text', 'asnyk_miedzy_nami.html'),
-        parse_dublincore=False,
-    )
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
+            parse_dublincore=False,
+        ).as_html()
  
  def test_empty():
  
  def test_empty():
-    assert html.transform('<utwor />', is_file=False, parse_dublincore=False).find('empty')
+    assert not WLDocument.from_string(
+            '<utwor />',
+            parse_dublincore=False,
+        ).as_html()
diff --git a/tests/test_text.py b/tests/test_text.py

index 7ff94ca..70dfb60 100644 (file)
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -3,42 +3,32 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from librarian import text, NoDublinCore
+from librarian import NoDublinCore
+from librarian.parser import WLDocument
  from nose.tools import *
  from nose.tools import *
-from utils import get_fixture, remove_output_file
+from utils import get_fixture
  
  
  
  
-def teardown_transform():
-    remove_output_file('text', 'asnyk_miedzy_nami.txt')
-
-
-@with_setup(None, teardown_transform)
  def test_transform():
  def test_transform():
-    output_file_path = get_fixture('text', 'asnyk_miedzy_nami.txt')
      expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
  
      expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
  
-    text.transform(
-        open(get_fixture('text', 'asnyk_miedzy_nami.xml')),
-        open(output_file_path, 'w'),
-    )
+    text = WLDocument.from_file(
+            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+        ).as_text().get_string()
  
  
-    assert_equal(file(output_file_path).read(), file(expected_output_file_path).read())
+    assert_equal(text, file(expected_output_file_path).read())
  
  
  
  
-@with_setup(None, teardown_transform)
  @raises(NoDublinCore)
  def test_no_dublincore():
  @raises(NoDublinCore)
  def test_no_dublincore():
-    text.transform(
-        open(get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')),
-        open(get_fixture('text', 'asnyk_miedzy_nami.txt'), 'w'),
-    )
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
+        ).as_text()
  
  
  
  
-@with_setup(None, teardown_transform)
  def test_passing_parse_dublincore_to_transform():
  def test_passing_parse_dublincore_to_transform():
-    """Passing parse_dublincore=False to transform omits DublinCore parsing."""
-    text.transform(
-        open(get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')),
-        open(get_fixture('text', 'asnyk_miedzy_nami.txt'), 'w'),
-        parse_dublincore=False,
-    )
+    """Passing parse_dublincore=False to the constructor omits DublinCore parsing."""
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
+            parse_dublincore=False,
+        ).as_text()
diff --git a/tests/utils.py b/tests/utils.py

index b112066..3b1f4f5 100644 (file)
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -21,10 +21,3 @@ def get_fixture(dir_name, file_name):
  def get_all_fixtures(dir_name, glob_pattern='*'):
      """Returns list of paths for fixtures in directory dir_name matching the glob_pattern."""
      return [get_fixture(dir_name, file_name) for file_name in glob.glob(join(get_fixture_dir(dir_name), glob_pattern))]
  def get_all_fixtures(dir_name, glob_pattern='*'):
      """Returns list of paths for fixtures in directory dir_name matching the glob_pattern."""
      return [get_fixture(dir_name, file_name) for file_name in glob.glob(join(get_fixture_dir(dir_name), glob_pattern))]
-
-
-def remove_output_file(dir_name, file_name):
-    try:
-        os.remove(get_fixture(dir_name, file_name))
-    except:
-        pass
author	Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
	Mon, 5 Dec 2011 16:06:51 +0000 (17:06 +0100)
committer	Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
	Mon, 5 Dec 2011 16:11:03 +0000 (17:11 +0100)
librarian/__init__.py		patch \| blob \| history
librarian/dcparser.py		patch \| blob \| history
librarian/epub.py		patch \| blob \| history
librarian/html.py		patch \| blob \| history
librarian/mobi.py		patch \| blob \| history
librarian/packagers.py		patch \| blob \| history
librarian/parser.py		patch \| blob \| history
librarian/pdf.py		patch \| blob \| history
librarian/text.py		patch \| blob \| history
scripts/book2epub		patch \| blob \| history
scripts/book2html		patch \| blob \| history
scripts/book2ihtml		patch \| blob \| history
scripts/book2mobi		patch \| blob \| history
scripts/book2pdf		patch \| blob \| history
scripts/book2txt		patch \| blob \| history
setup.py		patch \| blob \| history
tests/files/text/asnyk_miedzy_nami.xml	[deleted file]	patch \| blob \| history
tests/files/text/asnyk_zbior.xml	[new file with mode: 0755]	patch \| blob
tests/files/text/miedzy-nami-nic-nie-bylo.xml	[new file with mode: 0644]	patch \| blob
tests/test_epub.py	[new file with mode: 0644]	patch \| blob
tests/test_html.py		patch \| blob \| history
tests/test_text.py		patch \| blob \| history
tests/utils.py		patch \| blob \| history