from .sanitize import Sanitizer
from .daisy import DaisyBuilder
from .epub import EpubBuilder
+from .mobi import MobiBuilder
from .pdf import PdfBuilder
("sanitizer", Sanitizer),
("epub", EpubBuilder),
+ ("mobi", MobiBuilder),
("pdf", PdfBuilder),
])
class Builder:
file_extension = None
- def __init__(self, base_url=None, fundraising=None):
+ def __init__(self, base_url=None, fundraising=None, cover=None):
self._base_url = base_url or 'file:///home/rczajka/for/fnp/librarian/temp~/maly/img/'
self.fundraising = fundraising
self.footnotes = etree.Element('div', id='footnotes')
+ self.make_cover = cover or make_cover
self.cursors = {
# None: None,
class EpubBuilder(Builder):
file_extension = 'epub'
+ isbn_field = 'isbn_epub'
def __init__(self, *args, **kwargs):
self.chars = set()
self.set_metadata()
-
self.add_cover()
self.add_title_page()
e = self.document.tree.find('//autor_utworu')
if e is not None:
- etree.SubElement(tp, 'h2', **{'class': 'author'}).text = e.raw_printable_text()
+ etree.SubElement(tp, 'h2', **{'class': 'author'}).text = e.raw_printable_text(self)
e = self.document.tree.find('//nazwa_utworu')
if e is not None:
- etree.SubElement(tp, 'h1', **{'class': 'title'}).text = e.raw_printable_text()
+ etree.SubElement(tp, 'h1', **{'class': 'title'}).text = e.raw_printable_text(self)
if not len(tp):
for author in self.document.meta.authors:
</p>
"""))
- if self.document.meta.isbn_epub:
- etree.SubElement(tp, 'p', **{"class": "info"}).text = self.document.meta.isbn_epub
+ if getattr(self.document.meta, self.isbn_field):
+ etree.SubElement(tp, 'p', **{"class": "info"}).text = getattr(self.document.meta, self.isbn_field)
tp.append(etree.XML("""<p class="footer info">
<a href="http://www.wolnelektury.pl/"><img src="logo_wolnelektury.png" alt="WolneLektury.pl" /></a>
else:
p.text += m.cover_by
- if m.isbn_epub:
- newp().text = m.isbn_epub
+ if getattr(m, self.isbn_field):
+ newp().text = getattr(m, self.isbn_field)
newp().text = '\u00a0'
def add_cover(self):
# TODO: allow other covers
- cover_maker = make_cover
+ cover_maker = self.make_cover
cover_file = six.BytesIO()
- cover = cover_maker(self.document.meta)
+ cover = cover_maker(self.document.meta, width=600)
cover.save(cover_file)
cover_name = 'cover.%s' % cover.ext()
--- /dev/null
+import os
+import six
+import subprocess
+from tempfile import NamedTemporaryFile
+from librarian import functions, get_resource, OutputFile
+from librarian.hyphenator import Hyphenator
+from .epub import EpubBuilder
+
+
+class MobiBuilder(EpubBuilder):
+ file_extension = 'mobi'
+ isbn_field = 'isbn_mobi'
+
+ def build(self, document, use_kindlegen=False, converter_path=None, **kwargs):
+ bibl_lng = document.meta.language
+ short_lng = functions.lang_code_3to2(bibl_lng)
+ try:
+ self.hyphenator = Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
+ short_lng + '.dic'))
+ except:
+ pass
+
+ epub = super().build(document, **kwargs)
+
+ devnull = open("/dev/null", 'w')
+ gen_kwargs = {"stdout": devnull, "stderr": devnull}
+
+ output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi',
+ delete=False)
+ output_file.close()
+
+ if use_kindlegen:
+ output_file_basename = os.path.basename(output_file.name)
+ subprocess.check_call([converter_path or 'kindlegen',
+ '-c2', epub.get_filename(),
+ '-o', output_file_basename], **gen_kwargs)
+ else:
+ subprocess.check_call([converter_path or 'ebook-convert',
+ epub.get_filename(),
+ output_file.name, '--no-inline-toc',
+ '--mobi-file-type=both',
+ '--mobi-ignore-margins',
+ ], **gen_kwargs)
+ return OutputFile.from_filename(output_file.name)
+
"animacja": figures.Animacja,
"ilustr": figures.Ilustr,
- "ref": etree.ElementBase,
# Section
"wywiad_pyt": blocks.WywiadPyt,
"wywiad_odp": blocks.WywiadOdp,
import re
from lxml import etree
from librarian import dcparser, RDFNS
-from librarian.html import raw_printable_text
from librarian.util import get_translation
if parent is not None:
parent.signal(signal)
- def raw_printable_text(self):
+ def raw_printable_text(self, builder):
+ from librarian.html import raw_printable_text
+
# TODO: podtagi, wyroznienia, etc
t = ''
- t += self.normalize_text(self.text)
+ t += self.normalize_text(self.text, builder)
for c in self:
if not isinstance(c, WLElement):
continue
if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'):
- t += c.raw_printable_text()
- t += self.normalize_text(c.tail)
+ t += c.raw_printable_text(builder)
+ t += self.normalize_text(c.tail, builder)
return t
- def normalize_text(self, text):
+ def normalize_text(self, text, builder):
text = text or ''
for e, s in self.text_substitutions:
text = text.replace(e, s)
# FIXME: TEmporary turnoff
# text = re.sub(r'\s+', ' ', text)
### TODO: Added now for epub
+
+ if getattr(builder, 'hyphenator', None) is not None:
+ newt = ''
+ wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
+ for w in wlist:
+ newt += builder.hyphenator.inserted(w, u'\u00AD')
+ text = newt
+
text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text)
return text
def _build_inner(self, builder, build_method):
child_count = len(self)
if self.CAN_HAVE_TEXT and self.text:
- text = self.normalize_text(self.text)
+ text = self.normalize_text(self.text, builder)
if self.STRIP:
text = text.lstrip()
if not child_count:
if isinstance(child, WLElement):
getattr(child, build_method)(builder)
if self.CAN_HAVE_TEXT and child.tail:
- text = self.normalize_text(child.tail)
+ text = self.normalize_text(child.tail, builder)
if self.STRIP and i == child_count - 1:
text = text.rstrip()
builder.push_text(text)
builder.add_toc_entry(
fragment,
- self.raw_printable_text(),
+ self.raw_printable_text(builder),
self.SECTION_PRECEDENCE
)
EPUB_TAG = HTML_TAG = 'em'
EPUB_CLASS = HTML_CLASS = 'book-title'
- def normalize_text(self, text):
- txt = super(TytulDziela, self).normalize_text(text)
+ def normalize_text(self, text, builder):
+ txt = super(TytulDziela, self).normalize_text(text, builder)
if self.attrib.get('typ') == '1':
txt = '„{txt}”'.format(txt=txt)
return txt
"""
def __init__(self, filename):
self.patterns = {}
- f = open(filename)
+ f = open(filename, 'rb')
charset = f.readline().strip()
- if charset.startswith('charset '):
+ if charset.startswith(b'charset '):
charset = charset[8:].strip()
for pat in f:
- pat = pat.decode(charset).strip()
+ pat = pat.decode(charset.decode('latin1')).strip()
if not pat or pat[0] == '%': continue
# replace ^^hh with the real character
pat = parse_hex(hexrepl, pat)
the string 'let-ter-gre-pen'. The hyphen string to use can be
given as the second parameter, that defaults to '-'.
"""
- if isinstance(word, str):
+ if isinstance(word, bytes):
word = word.decode('latin1')
l = list(word)
for p in reversed(self.positions(word)):