extract annotations: return anchor to annotation, not the word in text
[librarian.git] / librarian / parser.py
index 4cdaa79..afc4f1a 100644 (file)
@@ -61,7 +61,7 @@ class WLDocument(object):
 
         try:
             parser = etree.XMLParser(remove_blank_text=False)
 
         try:
             parser = etree.XMLParser(remove_blank_text=False)
-            tree = etree.parse(StringIO(data), parser)
+            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
 
             if swap_endlines:
                 cls.swap_endlines(tree)
 
             if swap_endlines:
                 cls.swap_endlines(tree)