add some links and rights info in header,
[librarian.git] / librarian / parser.py
index b470957..341eaf8 100644 (file)
@@ -57,6 +57,8 @@ class WLDocument(object):
         if not isinstance(data, unicode):
             data = data.decode('utf-8')
 
+        data = data.replace(u'\ufeff', '')
+
         if swap_endlines:
             sub = u'<br/>'
             if preserve_lines:
@@ -119,4 +121,14 @@ class WLDocument(object):
             except Exception, e:
                 unmerged.append( repr( (key, xpath, e) ) )
 
-        return unmerged
\ No newline at end of file
+        return unmerged
+
+    def clean_ed_note(self):
+        """ deletes forbidden tags from nota_red """
+
+        for node in self.edoc.xpath('|'.join('//nota_red//%s' % tag for tag in
+                    ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))):
+            tail = node.tail
+            node.clear()
+            node.tag = 'span'
+            node.tail = tail