X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/223fd8f247b4a588d263afaf798dca4cb9ffa639..5329a22cd6643da657dd24546b382ada9e048b68:/librarian/parser.py?ds=sidebyside diff --git a/librarian/parser.py b/librarian/parser.py index b470957..341eaf8 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -57,6 +57,8 @@ class WLDocument(object): if not isinstance(data, unicode): data = data.decode('utf-8') + data = data.replace(u'\ufeff', '') + if swap_endlines: sub = u'
' if preserve_lines: @@ -119,4 +121,14 @@ class WLDocument(object): except Exception, e: unmerged.append( repr( (key, xpath, e) ) ) - return unmerged \ No newline at end of file + return unmerged + + def clean_ed_note(self): + """ deletes forbidden tags from nota_red """ + + for node in self.edoc.xpath('|'.join('//nota_red//%s' % tag for tag in + ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))): + tail = node.tail + node.clear() + node.tag = 'span' + node.tail = tail