fixes for edge cases
[librarian.git] / src / librarian / builders / html.py
index 40d7777..7504a66 100644 (file)
@@ -1,6 +1,10 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+try:
+    from urllib.request import urlopen
+except ImportError:
+    from urllib2 import urlopen
 from lxml import etree
 from librarian.html import add_anchors, add_table_of_contents, add_table_of_themes
 from librarian import OutputFile
 from lxml import etree
 from librarian.html import add_anchors, add_table_of_contents, add_table_of_themes
 from librarian import OutputFile
@@ -8,13 +12,19 @@ from librarian import OutputFile
 
 class HtmlBuilder:
     file_extension = "html"
 
 class HtmlBuilder:
     file_extension = "html"
-    identifier = "html"
+    with_anchors = True
+    with_themes = True
+    with_toc = True
+    with_footnotes = True
+    with_nota_red = True
+    no_externalities = False
+    orphans = True
 
 
-    def __init__(self, image_location='https://wolnelektury.pl/media/book/pictures/marcos-historia-kolorow/'):
-        self.image_location = image_location
+    def __init__(self, base_url=None):
+        self._base_url = base_url
 
         self.tree = text = etree.Element('div', **{'id': 'book-text'})
 
         self.tree = text = etree.Element('div', **{'id': 'book-text'})
-        self.header = etree.SubElement(text, 'h1')
+        self.header = etree.Element('h1')
 
         self.footnotes = etree.Element('div', id='footnotes')
         self.footnote_counter = 0
 
         self.footnotes = etree.Element('div', id='footnotes')
         self.footnote_counter = 0
@@ -27,18 +37,22 @@ class HtmlBuilder:
             'footnotes': self.footnotes,
             'nota_red': self.nota_red,
         }
             'footnotes': self.footnotes,
             'nota_red': self.nota_red,
         }
-        self.current_cursors = [None]
+        self.current_cursors = [text]
 
     @property
 
     @property
-    def cursor(self):
-        return self.cursors[self.current_cursors[-1]]
+    def base_url(self):
+        if self._base_url is not None:
+            return self._base_url
+        else:
+            return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug)
 
 
-    @cursor.setter
-    def cursor(self, value):
-        self.cursors[self.current_cursors[-1]] = value
+    @property
+    def cursor(self):
+        return self.current_cursors[-1]
 
     def enter_fragment(self, fragment):
 
     def enter_fragment(self, fragment):
-        self.current_cursors.append(fragment)
+        cursor = self.cursors.get(fragment, self.cursor)
+        self.current_cursors.append(cursor)
 
     def exit_fragment(self):
         self.current_cursors.pop()
 
     def exit_fragment(self):
         self.current_cursors.pop()
@@ -54,11 +68,15 @@ class HtmlBuilder:
         document._compat_assign_ordered_ids()
         document._compat_assign_section_ids()
 
         document._compat_assign_ordered_ids()
         document._compat_assign_section_ids()
 
-    def build(self, document):
+    def build(self, document, **kwargs):
+        self.document = document
+
         self.preprocess(document)
         document.tree.getroot().html_build(self)
         self.postprocess(document)
         self.preprocess(document)
         document.tree.getroot().html_build(self)
         self.postprocess(document)
+        return self.output()
 
 
+    def output(self):
         return OutputFile.from_bytes(
             etree.tostring(
                 self.tree,
         return OutputFile.from_bytes(
             etree.tostring(
                 self.tree,
@@ -83,12 +101,18 @@ class HtmlBuilder:
             )
             self.exit_fragment()
 
             )
             self.exit_fragment()
 
-        add_anchors(self.tree)
-        if len(self.nota_red):
+        if self.with_anchors:
+            add_anchors(self.tree)
+        if self.with_nota_red and len(self.nota_red):
             self.tree.append(self.nota_red)
             self.tree.append(self.nota_red)
-        add_table_of_themes(self.tree)
-        add_table_of_contents(self.tree)
-
+        if self.with_themes:
+            add_table_of_themes(self.tree)
+        if self.with_toc:
+            add_table_of_contents(self.tree)
+
+        if len(self.header):
+            self.tree.insert(0, self.header)
+            
         if self.footnote_counter:
             fnheader = etree.Element("h3")
             fnheader.text = _("Footnotes")
         if self.footnote_counter:
             fnheader = etree.Element("h3")
             fnheader.text = _("Footnotes")
@@ -96,19 +120,16 @@ class HtmlBuilder:
             self.tree.append(self.footnotes)
 
     def start_element(self, tag, attrib=None):
             self.tree.append(self.footnotes)
 
     def start_element(self, tag, attrib=None):
-        self.cursor = etree.SubElement(
+        self.current_cursors.append(etree.SubElement(
             self.cursor,
             tag,
             **(attrib or {})
             self.cursor,
             tag,
             **(attrib or {})
-        )
+        ))
 
     def end_element(self):
 
     def end_element(self):
-        self.cursor = self.cursor.getparent()
+        self.current_cursors.pop()
 
     def push_text(self, text):
 
     def push_text(self, text):
-        if text == 'Między nami nic nie było':
-            print(self.cursors)
-            print(self.current_cursors)
         cursor = self.cursor
         if len(cursor):
             cursor[-1].tail = (cursor[-1].tail or '') + text
         cursor = self.cursor
         if len(cursor):
             cursor[-1].tail = (cursor[-1].tail or '') + text
@@ -117,6 +138,8 @@ class HtmlBuilder:
 
 
 class StandaloneHtmlBuilder(HtmlBuilder):
 
 
 class StandaloneHtmlBuilder(HtmlBuilder):
+    css_url = "https://static.wolnelektury.pl/css/compressed/book_text.css"
+
     def postprocess(self, document):
         super(StandaloneHtmlBuilder, self).postprocess(document)
 
     def postprocess(self, document):
         super(StandaloneHtmlBuilder, self).postprocess(document)
 
@@ -139,21 +162,60 @@ class StandaloneHtmlBuilder(HtmlBuilder):
             content="width=device-width, initial-scale=1, maximum-scale=1"
         )
 
             content="width=device-width, initial-scale=1, maximum-scale=1"
         )
 
-        etree.SubElement(
-            head,
-            'link',
-            href="https://static.wolnelektury.pl/css/compressed/book_text.css",
-            rel="stylesheet",
-            type="text/css",
-        )
+        if self.no_externalities:
+            etree.SubElement(
+                head, 'style',
+            ).text = urlopen(self.css_url).read().decode('utf-8')
+        else:
+            etree.SubElement(
+                head,
+                'link',
+                href=self.css_url,
+                rel="stylesheet",
+                type="text/css",
+            )
 
 
-        etree.SubElement(
-            body, 'script',
-            src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"
-        )
+            etree.SubElement(
+                body, 'script',
+                src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"
+            )
 
 
-        etree.SubElement(
-            body,
-            "script",
-            src="http://malsup.github.io/min/jquery.cycle2.min.js"
+            etree.SubElement(
+                body,
+                "script",
+                src="http://malsup.github.io/min/jquery.cycle2.min.js"
+            )
+
+
+class SnippetHtmlBuilder(HtmlBuilder):
+    with_anchors = False
+    with_themes = False
+    with_toc = False
+    with_footnotes = False
+    with_nota_red = False
+    with_refs = False
+
+            
+class DaisyHtmlBuilder(StandaloneHtmlBuilder):
+    file_extension = 'xhtml'
+    with_anchors = False
+    with_themes = False
+    with_toc = False
+    with_footnotes = False
+    with_nota_red = False
+    with_deep_identifiers = False
+    no_externalities = True
+
+    def output(self):
+        tree = etree.ElementTree(self.tree)
+        tree.docinfo.public_id = '-//W3C//DTD XHTML 1.0 Transitional//EN'
+        tree.docinfo.system_url = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
+        return OutputFile.from_bytes(
+            etree.tostring(
+                tree,
+                encoding='utf-8',
+                pretty_print=True,
+                xml_declaration=True
+            )
         )
         )
+