[wwwsearch-commits] r21638 - wwwsearch/pullparser/trunk
jjlee at codespeak.net
jjlee at codespeak.net
Mon Jan 2 19:30:35 CET 2006
Author: jjlee
Date: Mon Jan 2 19:30:34 2006
New Revision: 21638
Modified:
wwwsearch/pullparser/trunk/pullparser.py
Log:
Formatting nits
Modified: wwwsearch/pullparser/trunk/pullparser.py
==============================================================================
--- wwwsearch/pullparser/trunk/pullparser.py (original)
+++ wwwsearch/pullparser/trunk/pullparser.py Mon Jan 2 19:30:34 2006
@@ -102,12 +102,12 @@
return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
def unescape(data, entities, encoding):
- if data is None or '&' not in data:
+ if data is None or "&" not in data:
return data
def replace_entities(match):
ent = match.group()
- if ent[1] == '#':
+ if ent[1] == "#":
return unescape_charref(ent[2:-1], encoding)
repl = entities.get(ent)
@@ -122,11 +122,11 @@
return repl
- return re.sub(r'&#?\S+?;', replace_entities, data)
+ return re.sub(r"&#?\S+?;", replace_entities, data)
def unescape_charref(data, encoding):
name, base = data, 10
- if name.startswith('x'):
+ if name.startswith("x"):
name, base= name[1:], 16
t = unichr(int(name, base)).encode(encoding)
return t
@@ -151,7 +151,7 @@
encoding: encoding used to encode numeric character references by
.get_text() and .get_compressed_text() ("ascii" by default)
- entitydefs: mapping like {'&': '&', ...} containing HTML entity
+ entitydefs: mapping like {"&": "&", ...} containing HTML entity
definitions (a sensible default is used). This is used to unescape
entities in .get_text() (and .get_compressed_text()) and attribute
values. If the encoding can not represent the character, the entity
@@ -287,7 +287,7 @@
if tok.type == "data":
text.append(tok.data)
elif tok.type == "entityref":
- t = unescape('&%s;'%tok.data, self._entitydefs, self.encoding)
+ t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
text.append(t)
elif tok.type == "charref":
t = unescape_charref(tok.data, self.encoding)
@@ -362,7 +362,7 @@
import sgmllib
# monkeypatch to fix http://www.python.org/sf/803422 :-(
-sgmllib.charref = re.compile('&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]')
+sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
def __init__(self, *args, **kwds):
sgmllib.SGMLParser.__init__(self)
More information about the wwwsearch-commits
mailing list