[icalendar-checkins] r19534 -
iCalendar/branch/sidnei-schooltool-compat/src/icalendar
dreamcatcher at codespeak.net
dreamcatcher at codespeak.net
Sat Nov 5 00:48:50 CET 2005
Author: dreamcatcher
Date: Sat Nov 5 00:48:23 2005
New Revision: 19534
Modified:
iCalendar/branch/sidnei-schooltool-compat/src/icalendar/parser.py
iCalendar/branch/sidnei-schooltool-compat/src/icalendar/prop.py
Log:
- Extra validation for params, keys and param values
- Strict-Mode for a cornerish case of the spec
Modified: iCalendar/branch/sidnei-schooltool-compat/src/icalendar/parser.py
==============================================================================
--- iCalendar/branch/sidnei-schooltool-compat/src/icalendar/parser.py (original)
+++ iCalendar/branch/sidnei-schooltool-compat/src/icalendar/parser.py Sat Nov 5 00:48:23 2005
@@ -1,11 +1,11 @@
# -*- coding: latin-1 -*-
"""
-This module parses and generates contentlines as defined in RFC 2445
-(iCalendar), but will probably work for other MIME types with similar syntax.
+This module parses and generates contentlines as defined in RFC 2445
+(iCalendar), but will probably work for other MIME types with similar syntax.
Eg. RFC 2426 (vCard)
-It is stupid in the sense that it treats the content purely as strings. No type
+It is stupid in the sense that it treats the content purely as strings. No type
conversion is attempted.
Copyright, 2005: Max M <maxm at mxm.dk>
@@ -29,6 +29,24 @@
return q_join(val)
return dQuote(val)
+# Could be improved
+NAME = re.compile('[\w-]+')
+UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F",:;]')
+QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F"]')
+
+def validate_token(name):
+ match = NAME.findall(name)
+ if len(match) == 1 and name == match[0]:
+ return
+ raise ValueError, name
+
+def validate_param_value(value, quoted=True):
+ validator = UNSAFE_CHAR
+ if quoted:
+ validator = QUNSAFE_CHAR
+ if validator.findall(value):
+ raise ValueError, value
+
QUOTABLE = re.compile('[,;:].')
def dQuote(val):
"""
@@ -74,69 +92,73 @@
'Max,Moller,"Rasmussen, Max"'
"""
return sep.join([dQuote(itm) for itm in lst])
-
+
class Parameters(CaselessDict):
"""
- Parser and generator of Property parameter strings. It knows nothing of
+ Parser and generator of Property parameter strings. It knows nothing of
datatypes. It's main concern is textual structure.
-
-
+
+
Simple parameter:value pair
>>> p = Parameters(parameter1='Value1')
>>> str(p)
'PARAMETER1=Value1'
-
-
+
+
keys are converted to upper
>>> p.keys()
['PARAMETER1']
-
-
+
+
Parameters are case insensitive
>>> p['parameter1']
'Value1'
>>> p['PARAMETER1']
'Value1'
-
-
+
+
Parameter with list of values must be seperated by comma
>>> p = Parameters({'parameter1':['Value1', 'Value2']})
>>> str(p)
'PARAMETER1=Value1,Value2'
-
-
+
+
Multiple parameters must be seperated by a semicolon
>>> p = Parameters({'RSVP':'TRUE', 'ROLE':'REQ-PARTICIPANT'})
>>> str(p)
'ROLE=REQ-PARTICIPANT;RSVP=TRUE'
-
-
+
+
Parameter values containing ',;:' must be double quoted
>>> p = Parameters({'ALTREP':'http://www.wiz.org'})
>>> str(p)
'ALTREP="http://www.wiz.org"'
-
-
+
+
list items must be quoted seperately
>>> p = Parameters({'MEMBER':['MAILTO:projectA at host.com', 'MAILTO:projectB at host.com', ]})
>>> str(p)
'MEMBER="MAILTO:projectA at host.com","MAILTO:projectB at host.com"'
-
+
Now the whole sheebang
>>> p = Parameters({'parameter1':'Value1', 'parameter2':['Value2', 'Value3'],\
'ALTREP':['http://www.wiz.org', 'value4']})
>>> str(p)
'ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3'
-
+
We can also parse parameter strings
>>> Parameters.from_string('PARAMETER1=Value 1;param2=Value 2')
Parameters({'PARAMETER1': 'Value 1', 'PARAM2': 'Value 2'})
-
+
+ Including empty strings
+ >>> Parameters.from_string('param=')
+ Parameters({'PARAM': ''})
+
We can also parse parameter strings
>>> Parameters.from_string('MEMBER="MAILTO:projectA at host.com","MAILTO:projectB at host.com"')
Parameters({'MEMBER': ['MAILTO:projectA at host.com', 'MAILTO:projectB at host.com']})
-
+
We can also parse parameter strings
>>> Parameters.from_string('ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3')
Parameters({'PARAMETER1': 'Value1', 'ALTREP': ['http://www.wiz.org', 'value4'], 'PARAMETER2': ['Value2', 'Value3']})
@@ -145,7 +167,7 @@
def params(self):
"""
- in rfc2445 keys are called parameters, so this is to be consitent with
+ in rfc2445 keys are called parameters, so this is to be consitent with
the naming conventions
"""
return self.keys()
@@ -163,7 +185,7 @@
### if encode:
### value = self._encode(name, value, encode)
### self[name] = value
-###
+###
### def decoded(self, name):
### "returns a decoded value, or list of same"
@@ -181,19 +203,36 @@
return ';'.join(result)
- def from_string(st):
+ def from_string(st, strict=False):
"Parses the parameter format from ical text format"
try:
# parse into strings
result = Parameters()
for param in q_split(st, ';'):
key, val = q_split(param, '=')
- # parsed and " stripped, but just strings
- vals = [v.strip('"') for v in q_split(val, ',')]
- if len(vals) == 1:
- result[key] = vals[0]
+ validate_token(key)
+ param_values = [v for v in q_split(val, ',')]
+ # Property parameter values that are not in quoted
+ # strings are case insensitive.
+ vals = []
+ for v in param_values:
+ if v.startswith('"') and v.endswith('"'):
+ v = v.strip('"')
+ validate_param_value(v, quoted=True)
+ vals.append(v)
+ else:
+ validate_param_value(v, quoted=False)
+ if strict:
+ vals.append(v.upper())
+ else:
+ vals.append(v)
+ if not vals:
+ result[key] = val
else:
- result[key] = vals
+ if len(vals) == 1:
+ result[key] = vals[0]
+ else:
+ result[key] = vals
return result
except:
raise ValueError, 'Not a valid parameter string'
@@ -206,61 +245,61 @@
class Contentline(str):
"""
- A content line is basically a string that can be folded and parsed into
+ A content line is basically a string that can be folded and parsed into
parts.
-
+
>>> c = Contentline('Si meliora dies, ut vina, poemata reddit')
>>> str(c)
'Si meliora dies, ut vina, poemata reddit'
-
+
A long line gets folded
>>> c = Contentline(''.join(['123456789 ']*10))
>>> str(c)
'123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 '
-
+
A folded line gets unfolded
>>> c = Contentline.from_string(str(c))
>>> c
'123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 '
-
+
It can parse itself into parts. Which is a tuple of (name, params, vals)
-
+
>>> c = Contentline('dtstart:20050101T120000')
>>> c.parts()
('dtstart', Parameters({}), '20050101T120000')
-
+
>>> c = Contentline('dtstart;value=datetime:20050101T120000')
>>> c.parts()
('dtstart', Parameters({'VALUE': 'datetime'}), '20050101T120000')
-
+
>>> c = Contentline('ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm at example.com')
>>> c.parts()
('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm at example.com')
>>> str(c)
'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm at example.com'
-
+
and back again
>>> parts = ('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm at example.com')
>>> Contentline.from_parts(parts)
'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm at example.com'
-
+
and again
>>> parts = ('ATTENDEE', Parameters(), 'MAILTO:maxm at example.com')
>>> Contentline.from_parts(parts)
'ATTENDEE:MAILTO:maxm at example.com'
-
+
A value can also be any of the types defined in PropertyValues
>>> from icalendar.prop import vText
>>> parts = ('ATTENDEE', Parameters(), vText('MAILTO:test at example.com'))
>>> Contentline.from_parts(parts)
'ATTENDEE:MAILTO:test at example.com'
-
+
A value can also be unicode
>>> from icalendar.prop import vText
>>> parts = ('SUMMARY', Parameters(), vText(u'INternational char æ ø å'))
>>> Contentline.from_parts(parts)
'SUMMARY:INternational char \\xc3\\xa6 \\xc3\\xb8 \\xc3\\xa5'
-
+
Traversing could look like this.
>>> name, params, vals = c.parts()
>>> name
@@ -278,9 +317,35 @@
Traceback (most recent call last):
...
ValueError: Content line could not be parsed into parts
-
+
+ Another failure:
+ >>> c = Contentline(':maxm at example.com')
+ >>> c.parts()
+ Traceback (most recent call last):
+ ...
+ ValueError: Content line could not be parsed into parts
+
+ >>> c = Contentline('key;param=:value')
+ >>> c.parts()
+ ('key', Parameters({'PARAM': ''}), 'value')
+
+ >>> c = Contentline('key;param="pvalue":value')
+ >>> c.parts()
+ ('key', Parameters({'PARAM': 'pvalue'}), 'value')
+
+ Should bomb on missing param:
+ >>> c = Contentline.from_string("k;:no param")
+ >>> c.parts()
+ Traceback (most recent call last):
+ ...
+ ValueError: Content line could not be parsed into parts
"""
-
+
+ def __new__(cls, st, strict=False):
+ self = str.__new__(cls, st)
+ setattr(self, 'strict', strict)
+ return self
+
def from_parts(parts):
"Turns a tuple of parts into a content line"
(name, params, values) = [str(p) for p in parts]
@@ -289,13 +354,14 @@
return Contentline('%s;%s:%s' % (name, params, values))
return Contentline('%s:%s' % (name, values))
except:
- raise ValueError, 'Property: %s Wrong values "%s" or "%s"' % (repr(name),
- repr(params),
- repr(values))
+ raise ValueError(
+ 'Property: %s Wrong values "%s" or "%s"' % (repr(name),
+ repr(params),
+ repr(values)))
from_parts = staticmethod(from_parts)
def parts(self):
- """ Splits the content line up into (name, parameters, values) parts
+ """ Splits the content line up into (name, parameters, values) parts
"""
try:
name_split = None
@@ -311,18 +377,24 @@
if ch == '"':
inquotes = not inquotes
name = self[:name_split]
- params = Parameters.from_string(self[name_split+1:value_split])
+ if not name:
+ raise ValueError, 'Key name is required'
+ validate_token(name)
+ if name_split+1 == value_split:
+ raise ValueError, 'Invalid content line'
+ params = Parameters.from_string(self[name_split+1:value_split],
+ strict=self.strict)
values = self[value_split+1:]
return (name, params, values)
except:
raise ValueError, 'Content line could not be parsed into parts'
- def from_string(st):
+ def from_string(st, strict=False):
"Unfolds the content lines in an iCalendar into long content lines"
try:
# a fold is carriage return followed by either a space or a tab
a_fold = re.compile('\r\n[ \t]{1}')
- return Contentline(a_fold.sub('', st))
+ return Contentline(a_fold.sub('', st), strict=strict)
except:
raise ValueError, 'Expected StringType with content line'
from_string = staticmethod(from_string)
@@ -340,20 +412,20 @@
class Contentlines(list):
"""
- I assume that iCalendar files generally are a few kilobytes in size. Then
- this should be efficient. for Huge files, an iterator should probably be
+ I assume that iCalendar files generally are a few kilobytes in size. Then
+ this should be efficient. for Huge files, an iterator should probably be
used instead.
-
+
>>> c = Contentlines([Contentline('BEGIN:VEVENT\\r\\n')])
>>> str(c)
'BEGIN:VEVENT\\r\\n'
-
+
Lets try appending it with a 100 charater wide string
>>> c.append(Contentline(''.join(['123456789 ']*10)+'\\r\\n'))
>>> str(c)
'BEGIN:VEVENT\\r\\n\\r\\n123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 \\r\\n'
-
- Notice that there is an extra empty string in the end of the content lines.
+
+ Notice that there is an extra empty string in the end of the content lines.
That is so they can be easily joined with: '\r\n'.join(contentlines)).
>>> Contentlines.from_string('A short line\\r\\n')
['A short line', '']
@@ -362,11 +434,11 @@
>>> Contentlines.from_string('A faked\\r\\n long line\\r\\nAnd another lin\\r\\n\\te that is folded\\r\\n')
['A faked long line', 'And another line that is folded', '']
"""
-
+
def __str__(self):
"Simply join self."
return '\r\n'.join(map(str, self))
-
+
def from_string(st):
"Parses a string into content lines"
try:
@@ -381,7 +453,7 @@
from_string = staticmethod(from_string)
-# ran this:
+# ran this:
# sample = open('./samples/test.ics', 'rb').read() # binary file in windows!
# lines = Contentlines.from_string(sample)
# for line in lines[:-1]:
Modified: iCalendar/branch/sidnei-schooltool-compat/src/icalendar/prop.py
==============================================================================
--- iCalendar/branch/sidnei-schooltool-compat/src/icalendar/prop.py (original)
+++ iCalendar/branch/sidnei-schooltool-compat/src/icalendar/prop.py Sat Nov 5 00:48:23 2005
@@ -52,7 +52,12 @@
from icalendar.caselessdict import CaselessDict
from icalendar.parser import Parameters
-
+DATE_PART = r'(\d+)D'
+TIME_PART = r'T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?'
+DATETIME_PART = '(?:%s)?(?:%s)?' % (DATE_PART, TIME_PART)
+WEEKS_PART = r'(\d+)W'
+DURATION_REGEX = re.compile(r'([-+]?)P(?:%s|%s)$'
+ % (WEEKS_PART, DATETIME_PART))
class vBinary:
"""
@@ -443,13 +448,7 @@
Parses the data format from ical text format.
"""
try:
- date_part = r'(\d+)D'
- time_part = r'T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?'
- datetime_part = '(?:%s)?(?:%s)?' % (date_part, time_part)
- weeks_part = r'(\d+)W'
- duration_rx = re.compile(r'([-+]?)P(?:%s|%s)$'
- % (weeks_part, datetime_part))
- match = duration_rx.match(ical)
+ match = DURATION_REGEX.match(ical)
sign, weeks, days, hours, minutes, seconds = match.groups()
if weeks:
value = timedelta(weeks=int(weeks))
More information about the icalendar-checkins
mailing list