[Lxml-checkins] r50334 - in lxml/trunk: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Fri Jan 4 19:22:01 CET 2008
Author: scoder
Date: Fri Jan 4 19:22:01 2008
New Revision: 50334
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
Log:
r3205 at delle: sbehnel | 2008-01-04 19:21:48 +0100
check entity/character references in Entity() factory
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Jan 4 19:22:01 2008
@@ -8,6 +8,9 @@
Features added
--------------
+* Invalid entity names and character references will now be rejected
+ by the ``Entity()`` factory.
+
* ``entity.text`` now returns the textual representation of the
entity, e.g. ``&``.
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Fri Jan 4 19:22:01 2008
@@ -1043,22 +1043,41 @@
c_name = c_name + 1
return 1
+cdef bint _characterReferenceIsValid(char* c_name):
+ cdef bint is_hex
+ if c_name[0] == c'x':
+ c_name += 1
+ is_hex = 1
+ else:
+ is_hex = 0
+ if c_name[0] == c'\0':
+ return 0
+ while c_name[0] != c'\0':
+ if c_name[0] < c'0' or c_name[0] > c'9':
+ if not is_hex:
+ return 0
+ if not (c_name[0] >= c'a' and c_name[0] <= c'f'):
+ if not (c_name[0] >= c'A' and c_name[0] <= c'F'):
+ return 0
+ c_name += 1
+ return 1
+
cdef int _tagValidOrRaise(tag_utf) except -1:
if not _pyXmlNameIsValid(tag_utf):
- raise ValueError, "Invalid tag name %r" % \
- python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict')
+ raise ValueError("Invalid tag name %r" % \
+ python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict'))
return 0
cdef int _htmlTagValidOrRaise(tag_utf) except -1:
if not _pyHtmlNameIsValid(tag_utf):
- raise ValueError, "Invalid HTML tag name %r" % \
- python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict')
+ raise ValueError("Invalid HTML tag name %r" % \
+ python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict'))
return 0
cdef int _attributeValidOrRaise(name_utf) except -1:
if not _pyXmlNameIsValid(name_utf):
- raise ValueError, "Invalid attribute name %r" % \
- python.PyUnicode_FromEncodedObject(name_utf, 'UTF-8', 'strict')
+ raise ValueError("Invalid attribute name %r" % \
+ python.PyUnicode_FromEncodedObject(name_utf, 'UTF-8', 'strict'))
return 0
cdef object _namespacedName(xmlNode* c_node):
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Jan 4 19:22:01 2008
@@ -2110,18 +2110,26 @@
PI = ProcessingInstruction
def Entity(name):
- """Entity factory. This factory function creates a special element that
- will be serialized as an XML entity. Note, however, that the entity will
- not be automatically declared in the document. A document that uses
- entities requires a DTD.
+ """Entity factory. This factory function creates a special element
+ that will be serialized as an XML entity reference or character
+ reference. Note, however, that entities will not be automatically
+ declared in the document. A document that uses entity references
+ requires a DTD to define the entities.
"""
cdef _Document doc
cdef xmlNode* c_node
cdef xmlDoc* c_doc
- name = _utf8(name)
+ cdef char* c_name
+ name_utf = _utf8(name)
+ c_name = _cstr(name_utf)
+ if c_name[0] == c'#':
+ if not _characterReferenceIsValid(c_name + 1):
+ raise ValueError("Invalid character reference: '%s'" % name)
+ elif not _xmlNameIsValid(c_name):
+ raise ValueError("Invalid entity reference: '%s'" % name)
c_doc = _newDoc()
doc = _documentFactory(c_doc, None)
- c_node = _createEntity(c_doc, _cstr(name))
+ c_node = _createEntity(c_doc, c_name)
tree.xmlAddChild(<xmlNode*>c_doc, c_node)
return _elementFactory(doc, c_node)
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri Jan 4 19:22:01 2008
@@ -605,6 +605,21 @@
self.assertEquals('<root>&test;</root>',
tostring(root))
+ def test_entity_values(self):
+ Entity = self.etree.Entity
+ self.assertEquals(Entity("test").text, '&test;')
+ self.assertEquals(Entity("#17683").text, '䔓')
+ self.assertEquals(Entity("#x1768").text, 'ᝨ')
+ self.assertEquals(Entity("#x98AF").text, '颯')
+
+ def test_entity_error(self):
+ Entity = self.etree.Entity
+ self.assertRaises(ValueError, Entity, 'a b c')
+ self.assertRaises(ValueError, Entity, 'a,b')
+ self.assertRaises(AssertionError, Entity, 'a\0b')
+ self.assertRaises(ValueError, Entity, '#abc')
+ self.assertRaises(ValueError, Entity, '#xxyz')
+
# TypeError in etree, AssertionError in ElementTree;
def test_setitem_assert(self):
Element = self.etree.Element
More information about the lxml-checkins
mailing list