[Lxml-checkins] r45112 - lxml/trunk/src/lxml

scoder at codespeak.net scoder at codespeak.net
Sun Jul 15 23:08:08 CEST 2007


Author: scoder
Date: Sun Jul 15 23:08:07 2007
New Revision: 45112

Modified:
   lxml/trunk/src/lxml/parser.pxi
Log:
use UTF16 encoding names that libxml2 understands natively

Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi	(original)
+++ lxml/trunk/src/lxml/parser.pxi	Sun Jul 15 23:08:07 2007
@@ -157,14 +157,8 @@
     buffer = python.PyUnicode_AS_DATA(utext)
     enc = _findEncodingName(buffer, l)
     if enc == NULL:
-        # apparently, libxml2 can't detect UTF16LE on some systems
-        if l >= 4 and \
-               buffer[0] == c'<' and buffer[1] == c'\0' and \
-               buffer[2] == c't' and buffer[3] == c'\0':
-            enc = "UTF16LE"
-        else:
-            # not my fault, it's YOUR broken system :)
-            return
+        # not my fault, it's YOUR broken system :)
+        return
     enchandler = tree.xmlFindCharEncodingHandler(enc)
     if enchandler is not NULL:
         global _UNICODE_ENCODING
@@ -176,9 +170,9 @@
     cdef tree.xmlCharEncoding enc
     enc = tree.xmlDetectCharEncoding(buffer, size)
     if enc == tree.XML_CHAR_ENCODING_UTF16LE:
-        return "UTF16LE"
+        return "UTF-16LE"
     elif enc == tree.XML_CHAR_ENCODING_UTF16BE:
-        return "UTF16BE"
+        return "UTF-16BE"
     elif enc == tree.XML_CHAR_ENCODING_UCS4LE:
         return "UCS-4LE"
     elif enc == tree.XML_CHAR_ENCODING_UCS4BE:


More information about the lxml-checkins mailing list