[Lxml-checkins] r45025 - lxml/trunk/src/lxml
scoder at codespeak.net
scoder at codespeak.net
Fri Jul 13 15:42:56 CEST 2007
Author: scoder
Date: Fri Jul 13 15:42:56 2007
New Revision: 45025
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
work around libxml2 not being able to detect BOM-less UTF-16LE
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri Jul 13 15:42:56 2007
@@ -156,6 +156,15 @@
l = python.PyUnicode_GET_DATA_SIZE(utext)
buffer = python.PyUnicode_AS_DATA(utext)
enc = _findEncodingName(buffer, l)
+ if enc == NULL:
+ # apparently, libxml2 can't detect UTF16LE on some systems
+ if l >= 4 and \
+ buffer[0] == c'<' and buffer[1] == c'\0' and \
+ buffer[2] == c't' and buffer[3] == c'\0':
+ enc = "UTF16LE"
+ else:
+ # not my fault, it's YOUR broken system :)
+ return
enchandler = tree.xmlFindCharEncodingHandler(enc)
if enchandler is not NULL:
global _UNICODE_ENCODING
@@ -174,6 +183,8 @@
return "UCS-4LE"
elif enc == tree.XML_CHAR_ENCODING_UCS4BE:
return "UCS-4BE"
+ elif enc == tree.XML_CHAR_ENCODING_NONE:
+ return NULL
else:
return tree.xmlGetCharEncodingName(enc)
More information about the lxml-checkins
mailing list