encode unicode for lxml
authorRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Tue, 8 Mar 2011 12:07:08 +0000 (13:07 +0100)
committerRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Tue, 8 Mar 2011 12:07:08 +0000 (13:07 +0100)
librarian/parser.py

index 4cdaa79..afc4f1a 100644 (file)
@@ -61,7 +61,7 @@ class WLDocument(object):
 
         try:
             parser = etree.XMLParser(remove_blank_text=False)
 
         try:
             parser = etree.XMLParser(remove_blank_text=False)
-            tree = etree.parse(StringIO(data), parser)
+            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
 
             if swap_endlines:
                 cls.swap_endlines(tree)
 
             if swap_endlines:
                 cls.swap_endlines(tree)