fnp
/
librarian.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
fix font
[librarian.git]
/
src
/
librarian
/
html.py
diff --git
a/src/librarian/html.py
b/src/librarian/html.py
index
f0f11db
..
d6482dc
100644
(file)
--- a/
src/librarian/html.py
+++ b/
src/librarian/html.py
@@
-1,21
+1,19
@@
-# -*- coding: utf-8 -*-
-#
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja
Nowoczesna Polska
. See NOTICE for more information.
+# Copyright © Fundacja
Wolne Lektury
. See NOTICE for more information.
#
#
-from __future__ import print_function, unicode_literals
-
+import io
import os
import re
import copy
import os
import re
import copy
+import urllib.parse
+import urllib.request
from lxml import etree
from lxml import etree
-from librarian import XHTMLNS, ParseError, OutputFile
+from librarian import XHTMLNS,
DCNS,
ParseError, OutputFile
from librarian import functions
from PIL import Image
from lxml.etree import XMLSyntaxError, XSLTApplyError
from librarian import functions
from PIL import Image
from lxml.etree import XMLSyntaxError, XSLTApplyError
-import six
functions.reg_substitute_entities()
functions.reg_substitute_entities()
@@
-23,8
+21,6
@@
functions.reg_person_name()
STYLESHEETS = {
'legacy': 'xslt/book2html.xslt',
STYLESHEETS = {
'legacy': 'xslt/book2html.xslt',
- 'full': 'xslt/wl2html_full.xslt',
- 'partial': 'xslt/wl2html_partial.xslt'
}
}
@@
-42,8
+38,8
@@
def transform_abstrakt(abstrakt_element):
style_filename = get_stylesheet('legacy')
style = etree.parse(style_filename)
xml = etree.tostring(abstrakt_element, encoding='unicode')
style_filename = get_stylesheet('legacy')
style = etree.parse(style_filename)
xml = etree.tostring(abstrakt_element, encoding='unicode')
- document = etree.parse(
six
.StringIO(
- xml.replace('
abstrakt', '
dlugi_cytat')
+ document = etree.parse(
io
.StringIO(
+ xml.replace('
<abstrakt', '<dlugi_cytat').replace('</abstrakt', '</
dlugi_cytat')
)) # HACK
result = document.xslt(style)
html = re.sub('<a name="sec[0-9]*"/>', '',
)) # HACK
result = document.xslt(style)
html = re.sub('<a name="sec[0-9]*"/>', '',
@@
-53,12
+49,16
@@
def transform_abstrakt(abstrakt_element):
def add_image_sizes(tree, gallery_path, gallery_url, base_url):
widths = [360, 600, 1200, 1800, 2400]
def add_image_sizes(tree, gallery_path, gallery_url, base_url):
widths = [360, 600, 1200, 1800, 2400]
+ try:
+ os.makedirs(gallery_path)
+ except:
+ pass
for i, ilustr in enumerate(tree.findall('//ilustr')):
rel_path = ilustr.attrib['src']
for i, ilustr in enumerate(tree.findall('//ilustr')):
rel_path = ilustr.attrib['src']
- img_url =
six.moves.
urllib.parse.urljoin(base_url, rel_path)
+ img_url = urllib.parse.urljoin(base_url, rel_path)
- f =
six.moves.
urllib.request.urlopen(img_url)
+ f = urllib.request.urlopen(img_url)
img = Image.open(f)
ext = {'GIF': 'gif', 'PNG': 'png'}.get(img.format, 'jpg')
img = Image.open(f)
ext = {'GIF': 'gif', 'PNG': 'png'}.get(img.format, 'jpg')
@@
-112,9
+112,15
@@
def transform(wldoc, stylesheet='legacy', options=None, flags=None, css=None, ga
for flag in flags:
document.edoc.getroot().set(flag, 'yes')
for flag in flags:
document.edoc.getroot().set(flag, 'yes')
+ ltag = document.edoc.find('//' + DCNS('language'))
+ lang = functions.lang_code_3to2(ltag.text) or 'pl'
+ document.edoc.getroot().set('lang', lang)
+
document.clean_ed_note()
document.clean_ed_note('abstrakt')
document.clean_ed_note()
document.clean_ed_note('abstrakt')
-
+ document.fix_pa_akap()
+ document.hebr_protect()
+
if not options:
options = {}
if not options:
options = {}
@@
-150,8
+156,7
@@
def transform(wldoc, stylesheet='legacy', options=None, flags=None, css=None, ga
raise ParseError(e)
raise ParseError(e)
-@six.python_2_unicode_compatible
-class Fragment(object):
+class Fragment:
def __init__(self, id, themes):
super(Fragment, self).__init__()
self.id = id
def __init__(self, id, themes):
super(Fragment, self).__init__()
self.id = id
@@
-179,7
+184,7
@@
class Fragment(object):
result = []
for event, element in self.closed_events():
if event == 'start':
result = []
for event, element in self.closed_events():
if event == 'start':
- result.append(
u
'<%s %s>' % (
+ result.append('<%s %s>' % (
element.tag,
' '.join(
'%s="%s"' % (k, v)
element.tag,
' '.join(
'%s="%s"' % (k, v)
@@
-189,7
+194,7
@@
class Fragment(object):
if element.text:
result.append(element.text)
elif event == 'end':
if element.text:
result.append(element.text)
elif event == 'end':
- result.append(
u
'</%s>' % element.tag)
+ result.append('</%s>' % element.tag)
if element.tail:
result.append(element.tail)
else:
if element.tail:
result.append(element.tail)
else:
@@
-208,7
+213,7
@@
def extract_fragments(input_filename):
# iterparse would die on a HTML document
parser = etree.HTMLParser(encoding='utf-8')
# iterparse would die on a HTML document
parser = etree.HTMLParser(encoding='utf-8')
- buf =
six
.BytesIO()
+ buf =
io
.BytesIO()
buf.write(etree.tostring(
etree.parse(input_filename, parser).getroot()[0][0],
encoding='utf-8'
buf.write(etree.tostring(
etree.parse(input_filename, parser).getroot()[0][0],
encoding='utf-8'
@@
-240,7
+245,8
@@
def extract_fragments(input_filename):
for parent in parents:
fragment.append('start', parent)
for parent in parents:
fragment.append('start', parent)
- open_fragments[fragment.id] = fragment
+ if fragment.id not in open_fragments:
+ open_fragments[fragment.id] = fragment
# Close existing fragment
else:
# Close existing fragment
else:
@@
-292,13
+298,13
@@
def add_anchor(element, prefix, with_link=True, with_target=True,
link_text = prefix
anchor = etree.Element('a', href='#%s' % prefix)
anchor.set('class', 'anchor')
link_text = prefix
anchor = etree.Element('a', href='#%s' % prefix)
anchor.set('class', 'anchor')
- anchor.text = s
ix.text_type
(link_text)
+ anchor.text = s
tr
(link_text)
parent.insert(index, anchor)
if with_target:
anchor_target = etree.Element('a', name='%s' % prefix)
anchor_target.set('class', 'target')
parent.insert(index, anchor)
if with_target:
anchor_target = etree.Element('a', name='%s' % prefix)
anchor_target.set('class', 'target')
- anchor_target.text =
u
' '
+ anchor_target.text = ' '
parent.insert(index, anchor_target)
parent.insert(index, anchor_target)
@@
-310,7
+316,9
@@
def any_ancestor(element, test):
def add_anchors(root):
def add_anchors(root):
- counter = 1
+ link_prefix = "f"
+ counter = {"f": 1}
+ visible_counter = 1
for element in root.iterdescendants():
def f(e):
return (
for element in root.iterdescendants():
def f(e):
return (
@@
-319,17
+327,30
@@
def add_anchors(root):
)
or e.get('id') == 'nota_red'
or e.tag == 'blockquote'
)
or e.get('id') == 'nota_red'
or e.tag == 'blockquote'
+ or e.get('id') == 'footnotes'
)
)
+
+ if element.get('class') == 'numeracja':
+ try:
+ visible_counter = int(element.get('data-start'))
+ except ValueError:
+ visible_counter = 1
+ if element.get("data-link"):
+ link_prefix = element.get("data-link")
+ counter[link_prefix] = 1
+
if any_ancestor(element, f):
continue
if element.tag == 'div' and 'verse' in element.get('class', ''):
if any_ancestor(element, f):
continue
if element.tag == 'div' and 'verse' in element.get('class', ''):
- if counter == 1 or counter % 5 == 0:
- add_anchor(element, "f%d" % counter, link_text=counter)
- counter += 1
+ if visible_counter == 1 or visible_counter % 5 == 0:
+ add_anchor(element, "%s%d" % (link_prefix, counter[link_prefix]), link_text=visible_counter)
+ counter[link_prefix] += 1
+ visible_counter += 1
elif 'paragraph' in element.get('class', ''):
elif 'paragraph' in element.get('class', ''):
- add_anchor(element, "f%d" % counter, link_text=counter)
- counter += 1
+ add_anchor(element, "%s%d" % (link_prefix, counter[link_prefix]), link_text=visible_counter)
+ counter[link_prefix] += 1
+ visible_counter += 1
def raw_printable_text(element):
def raw_printable_text(element):
@@
-366,7
+387,7
@@
def add_table_of_contents(root):
toc = etree.Element('div')
toc.set('id', 'toc')
toc_header = etree.SubElement(toc, 'h2')
toc = etree.Element('div')
toc.set('id', 'toc')
toc_header = etree.SubElement(toc, 'h2')
- toc_header.text =
u
'Spis treści'
+ toc_header.text = 'Spis treści'
toc_list = etree.SubElement(toc, 'ol')
for n, section, text, subsections in sections:
toc_list = etree.SubElement(toc, 'ol')
for n, section, text, subsections in sections: