From f33381c4b0468544123ff05295033da7cc4bf61f Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Thu, 2 Dec 2010 14:43:03 +0100 Subject: [PATCH 1/1] strip BOM --- librarian/epub.py | 3 ++- librarian/parser.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/librarian/epub.py b/librarian/epub.py index e25ce8f..3bdd1cd 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -82,7 +82,8 @@ def replace_characters(node): def replace_chars(text): if text is None: return None - return text.replace("---", u"\u2014")\ + return text.replace(u"\ufeff", u"")\ + .replace("---", u"\u2014")\ .replace("--", u"\u2013")\ .replace(",,", u"\u201E")\ .replace('"', u"\u201D")\ diff --git a/librarian/parser.py b/librarian/parser.py index 3f9f710..341eaf8 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -57,6 +57,8 @@ class WLDocument(object): if not isinstance(data, unicode): data = data.decode('utf-8') + data = data.replace(u'\ufeff', '') + if swap_endlines: sub = u'
' if preserve_lines: -- 2.20.1