[wwwsearch-commits] r21638 - wwwsearch/pullparser/trunk

jjlee at codespeak.net jjlee at codespeak.net
Mon Jan 2 19:30:35 CET 2006


Author: jjlee
Date: Mon Jan  2 19:30:34 2006
New Revision: 21638

Modified:
   wwwsearch/pullparser/trunk/pullparser.py
Log:
Formatting nits

Modified: wwwsearch/pullparser/trunk/pullparser.py
==============================================================================
--- wwwsearch/pullparser/trunk/pullparser.py	(original)
+++ wwwsearch/pullparser/trunk/pullparser.py	Mon Jan  2 19:30:34 2006
@@ -102,12 +102,12 @@
     return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
 
 def unescape(data, entities, encoding):
-    if data is None or '&' not in data:
+    if data is None or "&" not in data:
         return data
 
     def replace_entities(match):
         ent = match.group()
-        if ent[1] == '#':
+        if ent[1] == "#":
             return unescape_charref(ent[2:-1], encoding)
 
         repl = entities.get(ent)
@@ -122,11 +122,11 @@
 
         return repl
 
-    return re.sub(r'&#?\S+?;', replace_entities, data)
+    return re.sub(r"&#?\S+?;", replace_entities, data)
 
 def unescape_charref(data, encoding):
     name, base = data, 10
-    if name.startswith('x'):
+    if name.startswith("x"):
         name, base= name[1:], 16
     t = unichr(int(name, base)).encode(encoding)
     return t
@@ -151,7 +151,7 @@
         encoding: encoding used to encode numeric character references by
          .get_text() and .get_compressed_text() ("ascii" by default)
 
-        entitydefs: mapping like {'&': '&', ...} containing HTML entity
+        entitydefs: mapping like {"&": "&", ...} containing HTML entity
          definitions (a sensible default is used).  This is used to unescape
          entities in .get_text() (and .get_compressed_text()) and attribute
          values.  If the encoding can not represent the character, the entity
@@ -287,7 +287,7 @@
             if tok.type == "data":
                 text.append(tok.data)
             elif tok.type == "entityref":
-                t = unescape('&%s;'%tok.data, self._entitydefs, self.encoding)
+                t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
                 text.append(t)
             elif tok.type == "charref":
                 t = unescape_charref(tok.data, self.encoding)
@@ -362,7 +362,7 @@
 
 import sgmllib
 # monkeypatch to fix http://www.python.org/sf/803422 :-(
-sgmllib.charref = re.compile('&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]')
+sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
 class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
     def __init__(self, *args, **kwds):
         sgmllib.SGMLParser.__init__(self)


More information about the wwwsearch-commits mailing list