Basic biblical tools.

[librarian.git] / src / librarian / __init__.py
diff --git a/src/librarian/__init__.py b/src/librarian/__init__.py

index 119b6b1..4fbed92 100644 (file)
--- a/src/librarian/__init__.py
+++ b/src/librarian/__init__.py
@@ -1,47 +1,50 @@
-# -*- coding: utf-8 -*-
-#
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
  #
  #
-from __future__ import print_function, unicode_literals
-
+import io
  import os
  import re
  import shutil
  from tempfile import NamedTemporaryFile
  import urllib
  from lxml import etree
  import os
  import re
  import shutil
  from tempfile import NamedTemporaryFile
  import urllib
  from lxml import etree
-import six
-from six.moves.urllib.request import FancyURLopener
+from urllib.request import FancyURLopener
  from .util import makedirs
  
  from .util import makedirs
  
+# Compatibility imports.
+from .meta.types.wluri import WLURI
+
  
  
-@six.python_2_unicode_compatible
  class UnicodeException(Exception):
      def __str__(self):
          """ Dirty workaround for Python Unicode handling problems. """
          args = self.args[0] if len(self.args) == 1 else self.args
          try:
  class UnicodeException(Exception):
      def __str__(self):
          """ Dirty workaround for Python Unicode handling problems. """
          args = self.args[0] if len(self.args) == 1 else self.args
          try:
-            message = six.text_type(args)
+            message = str(args)
          except UnicodeDecodeError:
          except UnicodeDecodeError:
-            message = six.text_type(args, encoding='utf-8', errors='ignore')
+            message = str(args, encoding='utf-8', errors='ignore')
          return message
  
          return message
  
+
  class ParseError(UnicodeException):
      pass
  
  class ParseError(UnicodeException):
      pass
  
+
  class ValidationError(UnicodeException):
      pass
  
  class ValidationError(UnicodeException):
      pass
  
+
  class NoDublinCore(ValidationError):
      """There's no DublinCore section, and it's required."""
      pass
  
  class NoDublinCore(ValidationError):
      """There's no DublinCore section, and it's required."""
      pass
  
+
  class NoProvider(UnicodeException):
      """There's no DocProvider specified, and it's needed."""
      pass
  
  class NoProvider(UnicodeException):
      """There's no DocProvider specified, and it's needed."""
      pass
  
-class XMLNamespace(object):
+
+class XMLNamespace:
      '''A handy structure to repsent names in an XML namespace.'''
  
      def __init__(self, uri):
      '''A handy structure to repsent names in an XML namespace.'''
  
      def __init__(self, uri):
@@ -59,6 +62,7 @@ class XMLNamespace(object):
      def __str__(self):
          return '%s' % self.uri
  
      def __str__(self):
          return '%s' % self.uri
  
+
  class EmptyNamespace(XMLNamespace):
      def __init__(self):
          super(EmptyNamespace, self).__init__('')
  class EmptyNamespace(XMLNamespace):
      def __init__(self):
          super(EmptyNamespace, self).__init__('')
@@ -66,60 +70,19 @@ class EmptyNamespace(XMLNamespace):
      def __call__(self, tag):
          return tag
  
      def __call__(self, tag):
          return tag
  
+
  # some common namespaces we use
  XMLNS = XMLNamespace('http://www.w3.org/XML/1998/namespace')
  RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
  DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
  # some common namespaces we use
  XMLNS = XMLNamespace('http://www.w3.org/XML/1998/namespace')
  RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
  DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
-XINS = XMLNamespace("http://www.w3.org/2001/XInclude")
  XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
  XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
-NCXNS = XMLNamespace("http://www.daisy.org/z3986/2005/ncx/")
-OPFNS = XMLNamespace("http://www.idpf.org/2007/opf")
  PLMETNS = XMLNamespace("http://dl.psnc.pl/schemas/plmet/")
  PLMETNS = XMLNamespace("http://dl.psnc.pl/schemas/plmet/")
-
+FB2NS = XMLNamespace("http://www.gribuser.ru/xml/fictionbook/2.0")
+XLINKNS = XMLNamespace("http://www.w3.org/1999/xlink")
  WLNS = EmptyNamespace()
  
  
  WLNS = EmptyNamespace()
  
  
-@six.python_2_unicode_compatible
-class WLURI(object):
-    """Represents a WL URI. Extracts slug from it."""
-    slug = None
-
-    example = 'http://wolnelektury.pl/katalog/lektura/template/'
-    _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektur[ay]/'
-            '(?P<slug>[-a-z0-9]+)/?$')
-
-    def __init__(self, uri):
-        uri = six.text_type(uri)
-        self.uri = uri
-        self.slug = uri.rstrip('/').rsplit('/', 1)[-1]
-
-    @classmethod
-    def strict(cls, uri):
-        match = cls._re_wl_uri.match(uri)
-        if not match:
-            raise ValidationError(u'Invalid URI (%s). Should match: %s' % (
-                        uri, cls._re_wl_uri.pattern))
-        return cls(uri)
-
-    @classmethod
-    def from_slug(cls, slug):
-        """Contructs an URI from slug.
-
-        >>> print(WLURI.from_slug('a-slug').uri)
-        http://wolnelektury.pl/katalog/lektura/a-slug/
-
-        """
-        uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
-        return cls(uri)
-
-    def __str__(self):
-        return self.uri
-
-    def __eq__(self, other):
-        return self.slug == other.slug
-
-
-class DocProvider(object):
+class DocProvider:
      """Base class for a repository of XML files.
  
      Used for generating joined files, like EPUBs.
      """Base class for a repository of XML files.
  
      Used for generating joined files, like EPUBs.
@@ -129,11 +92,6 @@ class DocProvider(object):
          """Should return a file-like object with a WL document XML."""
          raise NotImplementedError
  
          """Should return a file-like object with a WL document XML."""
          raise NotImplementedError
  
-    def by_uri(self, uri, wluri=WLURI):
-        """Should return a file-like object with a WL document XML."""
-        wluri = wluri(uri)
-        return self.by_slug(wluri.slug)
-
  
  class DirDocProvider(DocProvider):
      """ Serve docs from a directory of files in form <slug>.xml """
  
  class DirDocProvider(DocProvider):
      """ Serve docs from a directory of files in form <slug>.xml """
@@ -147,66 +105,11 @@ class DirDocProvider(DocProvider):
          return open(os.path.join(self.dir, fname), 'rb')
  
  
          return open(os.path.join(self.dir, fname), 'rb')
  
  
-from . import dcparser
-
-DEFAULT_BOOKINFO = dcparser.BookInfo(
-        { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
-        { DCNS('creator'): [u'Some, Author'],
-          DCNS('title'): [u'Some Title'],
-          DCNS('subject.period'): [u'Unknown'],
-          DCNS('subject.type'): [u'Unknown'],
-          DCNS('subject.genre'): [u'Unknown'],
-          DCNS('date'): ['1970-01-01'],
-          DCNS('language'): [u'pol'],
-          # DCNS('date'): [creation_date],
-          DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
-          DCNS('description'):
-          [u"""Publikacja zrealizowana w ramach projektu
-             Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
-             wykonana przez Bibliotekę Narodową z egzemplarza
-             pochodzącego ze zbiorów BN."""],
-          DCNS('identifier.url'): [WLURI.example],
-          DCNS('rights'):
-            [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })
-
-def xinclude_forURI(uri):
-    e = etree.Element(XINS("include"))
-    e.set("href", uri)
-    return etree.tostring(e, encoding='unicode')
-
-def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
-    """Wrap the text within the minimal XML structure with a DC template."""
-    bookinfo.created_at = creation_date
-
-    dcstring = etree.tostring(bookinfo.to_etree(), \
-        method='xml', encoding='unicode', pretty_print=True)
-
-    return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
-        u'\n</plain-text>\n</utwor>'
-
-
-def serialize_raw(element):
-    b = u'' + (element.text or '')
-
-    for child in element.iterchildren():
-        e = etree.tostring(child, method='xml', encoding='unicode',
-                pretty_print=True)
-        b += e
-
-    return b
-
-SERIALIZERS = {
-    'raw': serialize_raw,
-}
-
-def serialize_children(element, format='raw'):
-    return SERIALIZERS[format](element)
-
  def get_resource(path):
      return os.path.join(os.path.dirname(__file__), path)
  
  
  def get_resource(path):
      return os.path.join(os.path.dirname(__file__), path)
  
  
-class OutputFile(object):
+class OutputFile:
      """Represents a file returned by one of the converters."""
  
      _bytes = None
      """Represents a file returned by one of the converters."""
  
      _bytes = None
@@ -248,7 +151,7 @@ class OutputFile(object):
          """Get file as a file-like object."""
  
          if self._bytes is not None:
          """Get file as a file-like object."""
  
          if self._bytes is not None:
-            return six.BytesIO(self._bytes)
+            return io.BytesIO(self._bytes)
          elif self._filename is not None:
              return open(self._filename, 'rb')
  
          elif self._filename is not None:
              return open(self._filename, 'rb')
  
@@ -275,5 +178,7 @@ class OutputFile(object):
  
  
  class URLOpener(FancyURLopener):
  
  
  class URLOpener(FancyURLopener):
-    version = 'FNP Librarian (http://github.com/fnp/librarian)'
+    version = 'WL Librarian (http://github.com/fnp/librarian)'
+
+
  urllib._urlopener = URLOpener()
  urllib._urlopener = URLOpener()