[wwwsearch-commits] r33200 - wwwsearch/ClientForm/trunk
jjlee at codespeak.net
jjlee at codespeak.net
Thu Oct 12 00:23:36 CEST 2006
Author: jjlee
Date: Thu Oct 12 00:23:34 2006
New Revision: 33200
Modified:
wwwsearch/ClientForm/trunk/ClientForm.py
wwwsearch/ClientForm/trunk/test.py
Log:
Handle line endings in element content the same way browsers do; Convert TEXTAREA content to DOS line ending convention, again following the major browsers (possibly they also do this line ending normalization in some other case(s), I haven't checked)
Modified: wwwsearch/ClientForm/trunk/ClientForm.py
==============================================================================
--- wwwsearch/ClientForm/trunk/ClientForm.py (original)
+++ wwwsearch/ClientForm/trunk/ClientForm.py Thu Oct 12 00:23:34 2006
@@ -126,6 +126,10 @@
_compress_re = re.compile(r"\s+")
def compress_text(text): return _compress_re.sub(" ", text.strip())
+def normalize_line_endings(text):
+ return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text)
+
+
# This version of urlencode is from my Python 1.5.2 back-port of the
# Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
# of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
@@ -639,6 +643,16 @@
def handle_data(self, data):
debug("%s", data)
+
+ # according to http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1
+ # line break immediately after start tags or immediately before end
+ # tags must be ignored, but real browsers only ignore a line break
+ # after a start tag, so we'll do that.
+ if data[0:2] == "\r\n":
+ data = data[2:]
+ if data[0:1] in ["\n", "\r"]:
+ data = data[1:]
+
if self._option is not None:
# self._option is a dictionary of the OPTION element's HTML
# attributes, but it has two special keys, one of which is the
@@ -649,6 +663,7 @@
elif self._textarea is not None:
map = self._textarea
key = "value"
+ data = normalize_line_endings(data)
# not if within option or textarea
elif self._current_label is not None:
map = self._current_label
Modified: wwwsearch/ClientForm/trunk/test.py
==============================================================================
--- wwwsearch/ClientForm/trunk/test.py (original)
+++ wwwsearch/ClientForm/trunk/test.py Thu Oct 12 00:23:34 2006
@@ -288,7 +288,7 @@
self.assertEqual(form.action, "http://localhost/abc&"+u"\u2014".encode('utf8')+"d")
control = form.find_control(type="textarea", nr=0)
self.assert_(control.name is None)
- self.assert_(control.value == "blah, blah,\nRhubarb.\n\n")
+ self.assert_(control.value == "blah, blah,\r\nRhubarb.\r\n\r\n")
empty_control = form.find_control(type="textarea", nr=1)
self.assert_(str(empty_control) == "<TextareaControl(<None>=)>")
@@ -621,6 +621,17 @@
single_control = form.find_control(type="select", nr=1)
self.assert_(single_control.value == ["1"])
+ def test_close_base_tag(self):
+ # Benji York: a single newline immediately after a start tag is
+ # stripped by browsers, but not one immediately before an end tag.
+ # TEXTAREA content is converted to the DOS newline convention.
+ forms = ClientForm.ParseFile(
+ StringIO("<form><textarea>\n\nblah\n</textarea></form>"),
+ "http://example.com/",
+ )
+ ctl = forms[0].find_control(type="textarea")
+ self.assertEqual(ctl.value, "\r\nblah\r\n")
+
class DisabledTests(TestCase):
def testOptgroup(self):
@@ -3054,6 +3065,32 @@
self.assertEqual(req.ah, not auh)
+class FunctionTests(TestCase):
+
+ def test_normalize_line_endings(self):
+ def check(text, expected):
+ got = ClientForm.normalize_line_endings(text)
+ self.assertEqual(got, expected)
+
+ # unix
+ check("foo\nbar", "foo\r\nbar")
+ check("foo\nbar\n", "foo\r\nbar\r\n")
+ # mac
+ check("foo\rbar", "foo\r\nbar")
+ check("foo\rbar\r", "foo\r\nbar\r\n")
+ # dos
+ check("foo\r\nbar", "foo\r\nbar")
+ check("foo\r\nbar\r\n", "foo\r\nbar\r\n")
+
+ # inconsistent -- we just blithely convert anything that looks like a
+ # line ending to the DOS convention, following Firefox's behaviour when
+ # normalizing textarea content
+ check("foo\r\nbar\nbaz\rblah\r\n", "foo\r\nbar\r\nbaz\r\nblah\r\n")
+
+ # pathological ;-O
+ check("\r\n\n\r\r\r\n", "\r\n"*5)
+
+
def startswith(string, initial):
if len(initial) > len(string): return False
return string[:len(initial)] == initial
More information about the wwwsearch-commits
mailing list