New mobi builder.
authorRadek Czajka <rczajka@rczajka.pl>
Mon, 20 Dec 2021 10:50:38 +0000 (11:50 +0100)
committerRadek Czajka <rczajka@rczajka.pl>
Mon, 20 Dec 2021 10:50:38 +0000 (11:50 +0100)
src/librarian/builders/__init__.py
src/librarian/builders/epub.py
src/librarian/builders/mobi.py [new file with mode: 0644]
src/librarian/elements/__init__.py
src/librarian/elements/base.py
src/librarian/elements/styles/tytul_dziela.py
src/librarian/hyphenator.py

index e359cd6..d8acb82 100644 (file)
@@ -4,6 +4,7 @@ from .html import HtmlBuilder, StandaloneHtmlBuilder, DaisyHtmlBuilder
 from .sanitize import Sanitizer
 from .daisy import DaisyBuilder
 from .epub import EpubBuilder
 from .sanitize import Sanitizer
 from .daisy import DaisyBuilder
 from .epub import EpubBuilder
+from .mobi import MobiBuilder
 from .pdf import PdfBuilder
 
 
 from .pdf import PdfBuilder
 
 
@@ -16,5 +17,6 @@ builders = OrderedDict([
     ("sanitizer", Sanitizer),
 
     ("epub", EpubBuilder),
     ("sanitizer", Sanitizer),
 
     ("epub", EpubBuilder),
+    ("mobi", MobiBuilder),
     ("pdf", PdfBuilder),
 ])
     ("pdf", PdfBuilder),
 ])
index 4471e30..401136f 100644 (file)
@@ -29,10 +29,11 @@ class Xhtml:
 class Builder:
     file_extension = None
 
 class Builder:
     file_extension = None
 
-    def __init__(self, base_url=None, fundraising=None):
+    def __init__(self, base_url=None, fundraising=None, cover=None):
         self._base_url = base_url or 'file:///home/rczajka/for/fnp/librarian/temp~/maly/img/'
         self.fundraising = fundraising
         self.footnotes = etree.Element('div', id='footnotes')
         self._base_url = base_url or 'file:///home/rczajka/for/fnp/librarian/temp~/maly/img/'
         self.fundraising = fundraising
         self.footnotes = etree.Element('div', id='footnotes')
+        self.make_cover = cover or make_cover
 
         self.cursors = {
 #            None: None,
 
         self.cursors = {
 #            None: None,
@@ -78,6 +79,7 @@ class Builder:
 
 class EpubBuilder(Builder):
     file_extension = 'epub'
 
 class EpubBuilder(Builder):
     file_extension = 'epub'
+    isbn_field = 'isbn_epub'
 
     def __init__(self, *args, **kwargs):
         self.chars = set()
 
     def __init__(self, *args, **kwargs):
         self.chars = set()
@@ -109,7 +111,6 @@ class EpubBuilder(Builder):
 
         self.set_metadata()
         
 
         self.set_metadata()
         
-
         self.add_cover()
         
         self.add_title_page()
         self.add_cover()
         
         self.add_title_page()
@@ -212,10 +213,10 @@ class EpubBuilder(Builder):
 
         e = self.document.tree.find('//autor_utworu')
         if e is not None:
 
         e = self.document.tree.find('//autor_utworu')
         if e is not None:
-            etree.SubElement(tp, 'h2', **{'class': 'author'}).text = e.raw_printable_text()
+            etree.SubElement(tp, 'h2', **{'class': 'author'}).text = e.raw_printable_text(self)
         e = self.document.tree.find('//nazwa_utworu')
         if e is not None:
         e = self.document.tree.find('//nazwa_utworu')
         if e is not None:
-            etree.SubElement(tp, 'h1', **{'class': 'title'}).text = e.raw_printable_text()
+            etree.SubElement(tp, 'h1', **{'class': 'title'}).text = e.raw_printable_text(self)
 
         if not len(tp):
             for author in self.document.meta.authors:
 
         if not len(tp):
             for author in self.document.meta.authors:
@@ -251,8 +252,8 @@ class EpubBuilder(Builder):
           </p>
         """))
 
           </p>
         """))
 
-        if self.document.meta.isbn_epub:
-            etree.SubElement(tp, 'p', **{"class": "info"}).text = self.document.meta.isbn_epub
+        if getattr(self.document.meta, self.isbn_field):
+            etree.SubElement(tp, 'p', **{"class": "info"}).text = getattr(self.document.meta, self.isbn_field)
 
         tp.append(etree.XML("""<p class="footer info">
             <a href="http://www.wolnelektury.pl/"><img src="logo_wolnelektury.png" alt="WolneLektury.pl" /></a>
 
         tp.append(etree.XML("""<p class="footer info">
             <a href="http://www.wolnelektury.pl/"><img src="logo_wolnelektury.png" alt="WolneLektury.pl" /></a>
@@ -589,8 +590,8 @@ class EpubBuilder(Builder):
             else:
                 p.text += m.cover_by
             
             else:
                 p.text += m.cover_by
             
-        if m.isbn_epub:
-            newp().text = m.isbn_epub
+        if getattr(m, self.isbn_field):
+            newp().text = getattr(m, self.isbn_field)
 
         newp().text = '\u00a0'
 
 
         newp().text = '\u00a0'
 
@@ -644,10 +645,10 @@ class EpubBuilder(Builder):
     def add_cover(self):
         # TODO: allow other covers
 
     def add_cover(self):
         # TODO: allow other covers
 
-        cover_maker = make_cover
+        cover_maker = self.make_cover
 
         cover_file = six.BytesIO()
 
         cover_file = six.BytesIO()
-        cover = cover_maker(self.document.meta)
+        cover = cover_maker(self.document.meta, width=600)
         cover.save(cover_file)
         cover_name = 'cover.%s' % cover.ext()
 
         cover.save(cover_file)
         cover_name = 'cover.%s' % cover.ext()
 
diff --git a/src/librarian/builders/mobi.py b/src/librarian/builders/mobi.py
new file mode 100644 (file)
index 0000000..19b5036
--- /dev/null
@@ -0,0 +1,45 @@
+import os
+import six
+import subprocess
+from tempfile import NamedTemporaryFile
+from librarian import functions, get_resource, OutputFile
+from librarian.hyphenator import Hyphenator
+from .epub import EpubBuilder
+
+
+class MobiBuilder(EpubBuilder):
+    file_extension = 'mobi'
+    isbn_field = 'isbn_mobi'
+
+    def build(self, document, use_kindlegen=False, converter_path=None, **kwargs):
+        bibl_lng = document.meta.language
+        short_lng = functions.lang_code_3to2(bibl_lng)
+        try:
+            self.hyphenator = Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
+                                       short_lng + '.dic'))
+        except:
+            pass
+
+        epub = super().build(document, **kwargs)
+
+        devnull = open("/dev/null", 'w')
+        gen_kwargs = {"stdout": devnull, "stderr": devnull}
+
+        output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi',
+                                     delete=False)
+        output_file.close()
+
+        if use_kindlegen:
+            output_file_basename = os.path.basename(output_file.name)
+            subprocess.check_call([converter_path or 'kindlegen',
+                               '-c2', epub.get_filename(),
+                               '-o', output_file_basename], **gen_kwargs)
+        else:
+            subprocess.check_call([converter_path or 'ebook-convert',
+                               epub.get_filename(),
+                               output_file.name, '--no-inline-toc',
+                               '--mobi-file-type=both',
+                               '--mobi-ignore-margins',
+                               ], **gen_kwargs)
+        return OutputFile.from_filename(output_file.name)
+
index 6ec0ba7..c934299 100644 (file)
@@ -115,7 +115,6 @@ WL_ELEMENTS = {
     "animacja": figures.Animacja,
     "ilustr": figures.Ilustr,
 
     "animacja": figures.Animacja,
     "ilustr": figures.Ilustr,
 
-    "ref": etree.ElementBase,
     # Section
     "wywiad_pyt": blocks.WywiadPyt,
     "wywiad_odp": blocks.WywiadOdp,
     # Section
     "wywiad_pyt": blocks.WywiadPyt,
     "wywiad_odp": blocks.WywiadOdp,
index 3e0f898..46ae29f 100644 (file)
@@ -3,7 +3,6 @@
 import re
 from lxml import etree
 from librarian import dcparser, RDFNS
 import re
 from lxml import etree
 from librarian import dcparser, RDFNS
-from librarian.html import raw_printable_text
 from librarian.util import get_translation
 
 
 from librarian.util import get_translation
 
 
@@ -76,25 +75,35 @@ class WLElement(etree.ElementBase):
         if parent is not None:
             parent.signal(signal)
     
         if parent is not None:
             parent.signal(signal)
     
-    def raw_printable_text(self):
+    def raw_printable_text(self, builder):
+        from librarian.html import raw_printable_text
+
         # TODO: podtagi, wyroznienia, etc
         t = ''
         # TODO: podtagi, wyroznienia, etc
         t = ''
-        t += self.normalize_text(self.text)
+        t += self.normalize_text(self.text, builder)
         for c in self:
             if not isinstance(c, WLElement):
                 continue
             if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'):
         for c in self:
             if not isinstance(c, WLElement):
                 continue
             if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'):
-                t += c.raw_printable_text()
-            t += self.normalize_text(c.tail)
+                t += c.raw_printable_text(builder)
+            t += self.normalize_text(c.tail, builder)
         return t
     
         return t
     
-    def normalize_text(self, text):
+    def normalize_text(self, text, builder):
         text = text or ''
         for e, s in self.text_substitutions:
             text = text.replace(e, s)
             # FIXME: TEmporary turnoff
 #        text = re.sub(r'\s+', ' ', text)
 ### TODO: Added now for epub
         text = text or ''
         for e, s in self.text_substitutions:
             text = text.replace(e, s)
             # FIXME: TEmporary turnoff
 #        text = re.sub(r'\s+', ' ', text)
 ### TODO: Added now for epub
+
+        if getattr(builder, 'hyphenator', None) is not None:
+            newt = ''
+            wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
+            for w in wlist:
+                newt += builder.hyphenator.inserted(w, u'\u00AD')
+            text = newt
+
         text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text)
 
         return text
         text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text)
 
         return text
@@ -102,7 +111,7 @@ class WLElement(etree.ElementBase):
     def _build_inner(self, builder, build_method):
         child_count = len(self)
         if self.CAN_HAVE_TEXT and self.text:
     def _build_inner(self, builder, build_method):
         child_count = len(self)
         if self.CAN_HAVE_TEXT and self.text:
-            text = self.normalize_text(self.text)
+            text = self.normalize_text(self.text, builder)
             if self.STRIP:
                 text = text.lstrip()
                 if not child_count:
             if self.STRIP:
                 text = text.lstrip()
                 if not child_count:
@@ -112,7 +121,7 @@ class WLElement(etree.ElementBase):
             if isinstance(child, WLElement):
                 getattr(child, build_method)(builder)
             if self.CAN_HAVE_TEXT and child.tail:
             if isinstance(child, WLElement):
                 getattr(child, build_method)(builder)
             if self.CAN_HAVE_TEXT and child.tail:
-                text = self.normalize_text(child.tail)
+                text = self.normalize_text(child.tail, builder)
                 if self.STRIP and i == child_count - 1:
                     text = text.rstrip()
                 builder.push_text(text)
                 if self.STRIP and i == child_count - 1:
                     text = text.rstrip()
                 builder.push_text(text)
@@ -187,7 +196,7 @@ class WLElement(etree.ElementBase):
 
             builder.add_toc_entry(
                 fragment,
 
             builder.add_toc_entry(
                 fragment,
-                self.raw_printable_text(),
+                self.raw_printable_text(builder),
                 self.SECTION_PRECEDENCE
             )
             
                 self.SECTION_PRECEDENCE
             )
             
index 906d98c..77852d5 100644 (file)
@@ -6,8 +6,8 @@ class TytulDziela(WLElement):
     EPUB_TAG = HTML_TAG = 'em'
     EPUB_CLASS = HTML_CLASS = 'book-title'
 
     EPUB_TAG = HTML_TAG = 'em'
     EPUB_CLASS = HTML_CLASS = 'book-title'
 
-    def normalize_text(self, text):
-        txt = super(TytulDziela, self).normalize_text(text)
+    def normalize_text(self, text, builder):
+        txt = super(TytulDziela, self).normalize_text(text, builder)
         if self.attrib.get('typ') == '1':
             txt = '„{txt}”'.format(txt=txt)
         return txt
         if self.attrib.get('typ') == '1':
             txt = '„{txt}”'.format(txt=txt)
         return txt
index aa5b4c3..fdd50d4 100644 (file)
@@ -81,13 +81,13 @@ class Hyph_dict(object):
     """
     def __init__(self, filename):
         self.patterns = {}
     """
     def __init__(self, filename):
         self.patterns = {}
-        f = open(filename)
+        f = open(filename, 'rb')
         charset = f.readline().strip()
         charset = f.readline().strip()
-        if charset.startswith('charset '):
+        if charset.startswith(b'charset '):
             charset = charset[8:].strip()
 
         for pat in f:
             charset = charset[8:].strip()
 
         for pat in f:
-            pat = pat.decode(charset).strip()
+            pat = pat.decode(charset.decode('latin1')).strip()
             if not pat or pat[0] == '%': continue
             # replace ^^hh with the real character
             pat = parse_hex(hexrepl, pat)
             if not pat or pat[0] == '%': continue
             # replace ^^hh with the real character
             pat = parse_hex(hexrepl, pat)
@@ -211,7 +211,7 @@ class Hyphenator(object):
         the string 'let-ter-gre-pen'. The hyphen string to use can be
         given as the second parameter, that defaults to '-'.
         """
         the string 'let-ter-gre-pen'. The hyphen string to use can be
         given as the second parameter, that defaults to '-'.
         """
-        if isinstance(word, str):
+        if isinstance(word, bytes):
             word = word.decode('latin1')
         l = list(word)
         for p in reversed(self.positions(word)):
             word = word.decode('latin1')
         l = list(word)
         for p in reversed(self.positions(word)):