strip BOM
authorRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 2 Dec 2010 13:43:03 +0000 (14:43 +0100)
committerRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 2 Dec 2010 13:43:03 +0000 (14:43 +0100)
librarian/epub.py
librarian/parser.py

index e25ce8f..3bdd1cd 100644 (file)
@@ -82,7 +82,8 @@ def replace_characters(node):
     def replace_chars(text):
         if text is None:
             return None
     def replace_chars(text):
         if text is None:
             return None
-        return text.replace("---", u"\u2014")\
+        return text.replace(u"\ufeff", u"")\
+                   .replace("---", u"\u2014")\
                    .replace("--", u"\u2013")\
                    .replace(",,", u"\u201E")\
                    .replace('"', u"\u201D")\
                    .replace("--", u"\u2013")\
                    .replace(",,", u"\u201E")\
                    .replace('"', u"\u201D")\
index 3f9f710..341eaf8 100644 (file)
@@ -57,6 +57,8 @@ class WLDocument(object):
         if not isinstance(data, unicode):
             data = data.decode('utf-8')
 
         if not isinstance(data, unicode):
             data = data.decode('utf-8')
 
+        data = data.replace(u'\ufeff', '')
+
         if swap_endlines:
             sub = u'<br/>'
             if preserve_lines:
         if swap_endlines:
             sub = u'<br/>'
             if preserve_lines: