- for event, element in etree.iterparse(input_filename, events=('start', 'end')):
+ # iterparse would die on a HTML document
+ parser = etree.HTMLParser(encoding='utf-8')
+ buf = cStringIO.StringIO()
+ buf.write(etree.tostring(etree.parse(input_filename, parser).getroot()[0][0], encoding='utf-8'))
+ buf.seek(0)
+
+ for event, element in etree.iterparse(buf, events=('start', 'end')):