Allow no editors info in pdf, epub.
[librarian.git] / librarian / html.py
index 8703150..0eeb76b 100644 (file)
@@ -134,14 +134,17 @@ def extract_fragments(input_filename):
                 fragment = Fragment(id=element.get('fid'), themes=element.text)
 
                 # Append parents
-                if element.getparent().get('id', None) != 'book-text':
-                    parents = [element.getparent()]
-                    while parents[-1].getparent().get('id', None) != 'book-text':
-                        parents.append(parents[-1].getparent())
-
-                    parents.reverse()
-                    for parent in parents:
-                        fragment.append('start', parent)
+                parent = element.getparent()
+                parents = []
+                while parent.get('id', None) != 'book-text':
+                    cparent = copy.deepcopy(parent)
+                    cparent.text = None
+                    parents.append(cparent)
+                    parent = parent.getparent()
+
+                parents.reverse()
+                for parent in parents:
+                    fragment.append('start', parent)
 
                 open_fragments[fragment.id] = fragment
 
@@ -222,7 +225,7 @@ def add_anchors(root):
 def raw_printable_text(element):
     working = copy.deepcopy(element)
     for e in working.findall('a'):
-        if e.get('class') == 'annotation':
+        if e.get('class') in ('annotation', 'theme-begin'):
             e.text = ''
     return etree.tostring(working, method='text', encoding=unicode).strip()