[Lxml-checkins] r44083 - in lxml/trunk: . src/lxml
scoder at codespeak.net
scoder at codespeak.net
Thu Jun 7 12:53:27 CEST 2007
Author: scoder
Date: Thu Jun 7 12:53:26 2007
New Revision: 44083
Modified:
lxml/trunk/TODO.txt
lxml/trunk/src/lxml/parser.pxi
Log:
network access disabled by default for parsers, some cleanup in parser option code
Modified: lxml/trunk/TODO.txt
==============================================================================
--- lxml/trunk/TODO.txt (original)
+++ lxml/trunk/TODO.txt Thu Jun 7 12:53:26 2007
@@ -73,4 +73,8 @@
* clean support for entities (maybe an Entity element class?)
-* disable network access in parsers by default
+
+Changes in 2.0
+--------------
+
+* network access in parsers disabled by default
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Thu Jun 7 12:53:26 2007
@@ -665,8 +665,9 @@
cdef int _XML_DEFAULT_PARSE_OPTIONS
_XML_DEFAULT_PARSE_OPTIONS = (
- xmlparser.XML_PARSE_NOENT |
+ xmlparser.XML_PARSE_NOENT |
xmlparser.XML_PARSE_NOCDATA |
+ xmlparser.XML_PARSE_NONET |
xmlparser.XML_PARSE_COMPACT
)
@@ -685,19 +686,19 @@
* attribute_defaults - read default attributes from DTD
* dtd_validation - validate (if DTD is available)
* load_dtd - use DTD for parsing
- * no_network - prevent network access
+ * no_network - prevent network access (default: True)
* ns_clean - clean up redundant namespace declarations
* recover - try hard to parse through broken XML
* remove_blank_text - discard blank text nodes
- * compact - safe memory for short text content (default: on)
- * resolve_entities - replace entities by their text value (default: on)
+ * compact - safe memory for short text content (default: True)
+ * resolve_entities - replace entities by their text value (default: True)
Note that you should avoid sharing parsers between threads. While this is
not harmful, it is more efficient to use separate parsers. This does not
apply to the default parser.
"""
def __init__(self, attribute_defaults=False, dtd_validation=False,
- load_dtd=False, no_network=False, ns_clean=False,
+ load_dtd=False, no_network=True, ns_clean=False,
recover=False, remove_blank_text=False, compact=True,
resolve_entities=True):
cdef int parse_options
@@ -712,14 +713,14 @@
if attribute_defaults:
parse_options = parse_options | xmlparser.XML_PARSE_DTDATTR | \
xmlparser.XML_PARSE_DTDLOAD
- if no_network:
- parse_options = parse_options | xmlparser.XML_PARSE_NONET
if ns_clean:
parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN
if recover:
parse_options = parse_options | xmlparser.XML_PARSE_RECOVER
if remove_blank_text:
parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
+ if not no_network:
+ parse_options = parse_options ^ xmlparser.XML_PARSE_NONET
if not compact:
parse_options = parse_options ^ xmlparser.XML_PARSE_COMPACT
if not resolve_entities:
@@ -777,7 +778,15 @@
__GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER)
-def setDefaultParser(_BaseParser parser=None):
+def setDefaultParser(parser):
+ "Deprecated, please use set_default_parser instead."
+ set_default_parser(parser)
+
+def getDefaultParser():
+ "Deprecated, please use get_default_parser instead."
+ return get_default_parser()
+
+def set_default_parser(_BaseParser parser=None):
"""Set a default parser for the current thread. This parser is used
globally whenever no parser is supplied to the various parse functions of
the lxml API. If this function is called without a parser (or if it is
@@ -791,24 +800,19 @@
parser = __DEFAULT_XML_PARSER
__GLOBAL_PARSER_CONTEXT.setDefaultParser(parser)
-def getDefaultParser():
- return __GLOBAL_PARSER_CONTEXT.getDefaultParser()
-
-def set_default_parser(parser):
- "Deprecated, please use setDefaultParser instead."
- setDefaultParser(parser)
-
def get_default_parser():
- "Deprecated, please use getDefaultParser instead."
- return getDefaultParser()
+ return __GLOBAL_PARSER_CONTEXT.getDefaultParser()
############################################################
## HTML parser
############################################################
cdef int _HTML_DEFAULT_PARSE_OPTIONS
-_HTML_DEFAULT_PARSE_OPTIONS = \
+_HTML_DEFAULT_PARSE_OPTIONS = (
+ htmlparser.HTML_PARSE_RECOVER |
+ htmlparser.HTML_PARSE_NONET |
htmlparser.HTML_PARSE_COMPACT
+ )
cdef class HTMLParser(_BaseParser):
"""The HTML parser. This parser allows reading HTML into a normal XML
@@ -817,25 +821,25 @@
Available boolean keyword arguments:
* recover - try hard to parse through broken HTML (default: True)
- * no_network - prevent network access
+ * no_network - prevent network access (default: True)
* remove_blank_text - discard empty text nodes
- * compact - safe memory for short text content (default: on)
+ * compact - safe memory for short text content (default: True)
- Note that you should avoid sharing parsers between threads for parformance
+ Note that you should avoid sharing parsers between threads for performance
reasons.
"""
- def __init__(self, recover=True, no_network=False, remove_blank_text=False,
+ def __init__(self, recover=True, no_network=True, remove_blank_text=False,
compact=True):
cdef int parse_options
_BaseParser.__init__(self)
parse_options = _HTML_DEFAULT_PARSE_OPTIONS
- if recover:
- parse_options = parse_options | htmlparser.HTML_PARSE_RECOVER
- if no_network:
- parse_options = parse_options | htmlparser.HTML_PARSE_NONET
if remove_blank_text:
parse_options = parse_options | htmlparser.HTML_PARSE_NOBLANKS
+ if not recover:
+ parse_options = parse_options ^ htmlparser.HTML_PARSE_RECOVER
+ if not no_network:
+ parse_options = parse_options ^ htmlparser.HTML_PARSE_NONET
if not compact:
parse_options = parse_options ^ htmlparser.HTML_PARSE_COMPACT
More information about the lxml-checkins
mailing list