# -*- encoding: utf-8 -*- from lxml import etree from lxml.html import HTMLParser def test(): content = u"абвгд" string = u"
"+content+"
" doc = etree.HTML(string.encode('cp1251'),parser = HTMLParser(encoding="cp1251")) assert doc.text_content() == content assert doc.text_content() == content.encode('cp1251') assert doc[0][0].text == content assert doc[0][0].text == content.encode('cp1251') if __name__ == '__main__': test()