[icalendar-checkins] r19534 - iCalendar/branch/sidnei-schooltool-compat/src/icalendar

dreamcatcher at codespeak.net dreamcatcher at codespeak.net
Sat Nov 5 00:48:50 CET 2005


Author: dreamcatcher
Date: Sat Nov  5 00:48:23 2005
New Revision: 19534

Modified:
   iCalendar/branch/sidnei-schooltool-compat/src/icalendar/parser.py
   iCalendar/branch/sidnei-schooltool-compat/src/icalendar/prop.py
Log:

- Extra validation for params, keys and param values
- Strict-Mode for a cornerish case of the spec


Modified: iCalendar/branch/sidnei-schooltool-compat/src/icalendar/parser.py
==============================================================================
--- iCalendar/branch/sidnei-schooltool-compat/src/icalendar/parser.py	(original)
+++ iCalendar/branch/sidnei-schooltool-compat/src/icalendar/parser.py	Sat Nov  5 00:48:23 2005
@@ -1,11 +1,11 @@
 # -*- coding: latin-1 -*-
 
 """
-This module parses and generates contentlines as defined in RFC 2445 
-(iCalendar), but will probably work for other MIME types with similar syntax. 
+This module parses and generates contentlines as defined in RFC 2445
+(iCalendar), but will probably work for other MIME types with similar syntax.
 Eg. RFC 2426 (vCard)
 
-It is stupid in the sense that it treats the content purely as strings. No type 
+It is stupid in the sense that it treats the content purely as strings. No type
 conversion is attempted.
 
 Copyright, 2005: Max M <maxm at mxm.dk>
@@ -29,6 +29,24 @@
         return q_join(val)
     return dQuote(val)
 
+# Could be improved
+NAME = re.compile('[\w-]+')
+UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F",:;]')
+QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F"]')
+
+def validate_token(name):
+    match = NAME.findall(name)
+    if len(match) == 1 and name == match[0]:
+        return
+    raise ValueError, name
+
+def validate_param_value(value, quoted=True):
+    validator = UNSAFE_CHAR
+    if quoted:
+        validator = QUNSAFE_CHAR
+    if validator.findall(value):
+        raise ValueError, value
+
 QUOTABLE = re.compile('[,;:].')
 def dQuote(val):
     """
@@ -74,69 +92,73 @@
     'Max,Moller,"Rasmussen, Max"'
     """
     return sep.join([dQuote(itm) for itm in lst])
-    
+
 class Parameters(CaselessDict):
 
     """
-    Parser and generator of Property parameter strings. It knows nothing of 
+    Parser and generator of Property parameter strings. It knows nothing of
     datatypes. It's main concern is textual structure.
-    
-    
+
+
     Simple parameter:value pair
     >>> p = Parameters(parameter1='Value1')
     >>> str(p)
     'PARAMETER1=Value1'
-    
-    
+
+
     keys are converted to upper
     >>> p.keys()
     ['PARAMETER1']
-    
-    
+
+
     Parameters are case insensitive
     >>> p['parameter1']
     'Value1'
     >>> p['PARAMETER1']
     'Value1'
-    
-    
+
+
     Parameter with list of values must be seperated by comma
     >>> p = Parameters({'parameter1':['Value1', 'Value2']})
     >>> str(p)
     'PARAMETER1=Value1,Value2'
-    
-    
+
+
     Multiple parameters must be seperated by a semicolon
     >>> p = Parameters({'RSVP':'TRUE', 'ROLE':'REQ-PARTICIPANT'})
     >>> str(p)
     'ROLE=REQ-PARTICIPANT;RSVP=TRUE'
-    
-    
+
+
     Parameter values containing ',;:' must be double quoted
     >>> p = Parameters({'ALTREP':'http://www.wiz.org'})
     >>> str(p)
     'ALTREP="http://www.wiz.org"'
-    
-    
+
+
     list items must be quoted seperately
     >>> p = Parameters({'MEMBER':['MAILTO:projectA at host.com', 'MAILTO:projectB at host.com', ]})
     >>> str(p)
     'MEMBER="MAILTO:projectA at host.com","MAILTO:projectB at host.com"'
-    
+
     Now the whole sheebang
     >>> p = Parameters({'parameter1':'Value1', 'parameter2':['Value2', 'Value3'],\
                           'ALTREP':['http://www.wiz.org', 'value4']})
     >>> str(p)
     'ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3'
-    
+
     We can also parse parameter strings
     >>> Parameters.from_string('PARAMETER1=Value 1;param2=Value 2')
     Parameters({'PARAMETER1': 'Value 1', 'PARAM2': 'Value 2'})
-    
+
+    Including empty strings
+    >>> Parameters.from_string('param=')
+    Parameters({'PARAM': ''})
+
     We can also parse parameter strings
     >>> Parameters.from_string('MEMBER="MAILTO:projectA at host.com","MAILTO:projectB at host.com"')
     Parameters({'MEMBER': ['MAILTO:projectA at host.com', 'MAILTO:projectB at host.com']})
-    
+
     We can also parse parameter strings
     >>> Parameters.from_string('ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3')
     Parameters({'PARAMETER1': 'Value1', 'ALTREP': ['http://www.wiz.org', 'value4'], 'PARAMETER2': ['Value2', 'Value3']})
@@ -145,7 +167,7 @@
 
     def params(self):
         """
-        in rfc2445 keys are called parameters, so this is to be consitent with 
+        in rfc2445 keys are called parameters, so this is to be consitent with
         the naming conventions
         """
         return self.keys()
@@ -163,7 +185,7 @@
 ###        if encode:
 ###            value = self._encode(name, value, encode)
 ###        self[name] = value
-###        
+###
 ###    def decoded(self, name):
 ###        "returns a decoded value, or list of same"
 
@@ -181,19 +203,36 @@
         return ';'.join(result)
 
 
-    def from_string(st):
+    def from_string(st, strict=False):
         "Parses the parameter format from ical text format"
         try:
             # parse into strings
             result = Parameters()
             for param in q_split(st, ';'):
                 key, val =  q_split(param, '=')
-                # parsed and " stripped, but just strings
-                vals = [v.strip('"') for v in q_split(val, ',')]
-                if len(vals) == 1:
-                    result[key] = vals[0]
+                validate_token(key)
+                param_values = [v for v in q_split(val, ',')]
+                # Property parameter values that are not in quoted
+                # strings are case insensitive.
+                vals = []
+                for v in param_values:
+                    if v.startswith('"') and v.endswith('"'):
+                        v = v.strip('"')
+                        validate_param_value(v, quoted=True)
+                        vals.append(v)
+                    else:
+                        validate_param_value(v, quoted=False)
+                        if strict:
+                            vals.append(v.upper())
+                        else:
+                            vals.append(v)
+                if not vals:
+                    result[key] = val
                 else:
-                    result[key] = vals
+                    if len(vals) == 1:
+                        result[key] = vals[0]
+                    else:
+                        result[key] = vals
             return result
         except:
             raise ValueError, 'Not a valid parameter string'
@@ -206,61 +245,61 @@
 class Contentline(str):
 
     """
-    A content line is basically a string that can be folded and parsed into 
+    A content line is basically a string that can be folded and parsed into
     parts.
-    
+
     >>> c = Contentline('Si meliora dies, ut vina, poemata reddit')
     >>> str(c)
     'Si meliora dies, ut vina, poemata reddit'
-    
+
     A long line gets folded
     >>> c = Contentline(''.join(['123456789 ']*10))
     >>> str(c)
     '123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 '
-    
+
     A folded line gets unfolded
     >>> c = Contentline.from_string(str(c))
     >>> c
     '123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 '
-    
+
     It can parse itself into parts. Which is a tuple of (name, params, vals)
-    
+
     >>> c = Contentline('dtstart:20050101T120000')
     >>> c.parts()
     ('dtstart', Parameters({}), '20050101T120000')
-    
+
     >>> c = Contentline('dtstart;value=datetime:20050101T120000')
     >>> c.parts()
     ('dtstart', Parameters({'VALUE': 'datetime'}), '20050101T120000')
-    
+
     >>> c = Contentline('ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm at example.com')
     >>> c.parts()
     ('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm at example.com')
     >>> str(c)
     'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm at example.com'
-    
+
     and back again
     >>> parts = ('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm at example.com')
     >>> Contentline.from_parts(parts)
     'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm at example.com'
-    
+
     and again
     >>> parts = ('ATTENDEE', Parameters(), 'MAILTO:maxm at example.com')
     >>> Contentline.from_parts(parts)
     'ATTENDEE:MAILTO:maxm at example.com'
-    
+
     A value can also be any of the types defined in PropertyValues
     >>> from icalendar.prop import vText
     >>> parts = ('ATTENDEE', Parameters(), vText('MAILTO:test at example.com'))
     >>> Contentline.from_parts(parts)
     'ATTENDEE:MAILTO:test at example.com'
-    
+
     A value can also be unicode
     >>> from icalendar.prop import vText
     >>> parts = ('SUMMARY', Parameters(), vText(u'INternational char æ ø å'))
     >>> Contentline.from_parts(parts)
     'SUMMARY:INternational char \\xc3\\xa6 \\xc3\\xb8 \\xc3\\xa5'
-    
+
     Traversing could look like this.
     >>> name, params, vals = c.parts()
     >>> name
@@ -278,9 +317,35 @@
     Traceback (most recent call last):
         ...
     ValueError: Content line could not be parsed into parts
-    
+
+    Another failure:
+    >>> c = Contentline(':maxm at example.com')
+    >>> c.parts()
+    Traceback (most recent call last):
+        ...
+    ValueError: Content line could not be parsed into parts
+
+    >>> c = Contentline('key;param=:value')
+    >>> c.parts()
+    ('key', Parameters({'PARAM': ''}), 'value')
+
+    >>> c = Contentline('key;param="pvalue":value')
+    >>> c.parts()
+    ('key', Parameters({'PARAM': 'pvalue'}), 'value')
+
+    Should bomb on missing param:
+    >>> c = Contentline.from_string("k;:no param")
+    >>> c.parts()
+    Traceback (most recent call last):
+        ...
+    ValueError: Content line could not be parsed into parts
     """
-    
+
+    def __new__(cls, st, strict=False):
+        self = str.__new__(cls, st)
+        setattr(self, 'strict', strict)
+        return self
+
     def from_parts(parts):
         "Turns a tuple of parts into a content line"
         (name, params, values) = [str(p) for p in parts]
@@ -289,13 +354,14 @@
                 return Contentline('%s;%s:%s' % (name, params, values))
             return Contentline('%s:%s' %  (name, values))
         except:
-            raise ValueError, 'Property: %s Wrong values "%s" or "%s"' % (repr(name),
-                                                                          repr(params),
-                                                                          repr(values))
+            raise ValueError(
+                'Property: %s Wrong values "%s" or "%s"' % (repr(name),
+                                                            repr(params),
+                                                            repr(values)))
     from_parts = staticmethod(from_parts)
 
     def parts(self):
-        """ Splits the content line up into (name, parameters, values) parts 
+        """ Splits the content line up into (name, parameters, values) parts
         """
         try:
             name_split = None
@@ -311,18 +377,24 @@
                 if ch == '"':
                     inquotes = not inquotes
             name = self[:name_split]
-            params = Parameters.from_string(self[name_split+1:value_split])
+            if not name:
+                raise ValueError, 'Key name is required'
+            validate_token(name)
+            if name_split+1 == value_split:
+                raise ValueError, 'Invalid content line'
+            params = Parameters.from_string(self[name_split+1:value_split],
+                                            strict=self.strict)
             values = self[value_split+1:]
             return (name, params, values)
         except:
             raise ValueError, 'Content line could not be parsed into parts'
 
-    def from_string(st):
+    def from_string(st, strict=False):
         "Unfolds the content lines in an iCalendar into long content lines"
         try:
             # a fold is carriage return followed by either a space or a tab
             a_fold = re.compile('\r\n[ \t]{1}')
-            return Contentline(a_fold.sub('', st))
+            return Contentline(a_fold.sub('', st), strict=strict)
         except:
             raise ValueError, 'Expected StringType with content line'
     from_string = staticmethod(from_string)
@@ -340,20 +412,20 @@
 class Contentlines(list):
 
     """
-    I assume that iCalendar files generally are a few kilobytes in size. Then 
-    this should be efficient. for Huge files, an iterator should probably be 
+    I assume that iCalendar files generally are a few kilobytes in size. Then
+    this should be efficient. for Huge files, an iterator should probably be
     used instead.
-    
+
     >>> c = Contentlines([Contentline('BEGIN:VEVENT\\r\\n')])
     >>> str(c)
     'BEGIN:VEVENT\\r\\n'
-    
+
     Lets try appending it with a 100 charater wide string
     >>> c.append(Contentline(''.join(['123456789 ']*10)+'\\r\\n'))
     >>> str(c)
     'BEGIN:VEVENT\\r\\n\\r\\n123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 \\r\\n'
-    
-    Notice that there is an extra empty string in the end of the content lines. 
+
+    Notice that there is an extra empty string in the end of the content lines.
     That is so they can be easily joined with: '\r\n'.join(contentlines)).
     >>> Contentlines.from_string('A short line\\r\\n')
     ['A short line', '']
@@ -362,11 +434,11 @@
     >>> Contentlines.from_string('A faked\\r\\n  long line\\r\\nAnd another lin\\r\\n\\te that is folded\\r\\n')
     ['A faked long line', 'And another line that is folded', '']
     """
-    
+
     def __str__(self):
         "Simply join self."
         return '\r\n'.join(map(str, self))
-    
+
     def from_string(st):
         "Parses a string into content lines"
         try:
@@ -381,7 +453,7 @@
     from_string = staticmethod(from_string)
 
 
-# ran this:    
+# ran this:
 #    sample = open('./samples/test.ics', 'rb').read() # binary file in windows!
 #    lines = Contentlines.from_string(sample)
 #    for line in lines[:-1]:

Modified: iCalendar/branch/sidnei-schooltool-compat/src/icalendar/prop.py
==============================================================================
--- iCalendar/branch/sidnei-schooltool-compat/src/icalendar/prop.py	(original)
+++ iCalendar/branch/sidnei-schooltool-compat/src/icalendar/prop.py	Sat Nov  5 00:48:23 2005
@@ -52,7 +52,12 @@
 from icalendar.caselessdict import CaselessDict
 from icalendar.parser import Parameters
 
-
+DATE_PART = r'(\d+)D'
+TIME_PART = r'T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?'
+DATETIME_PART = '(?:%s)?(?:%s)?' % (DATE_PART, TIME_PART)
+WEEKS_PART = r'(\d+)W'
+DURATION_REGEX = re.compile(r'([-+]?)P(?:%s|%s)$'
+                            % (WEEKS_PART, DATETIME_PART))
 
 class vBinary:
     """
@@ -443,13 +448,7 @@
         Parses the data format from ical text format.
         """
         try:
-            date_part = r'(\d+)D'
-            time_part = r'T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?'
-            datetime_part = '(?:%s)?(?:%s)?' % (date_part, time_part)
-            weeks_part = r'(\d+)W'
-            duration_rx = re.compile(r'([-+]?)P(?:%s|%s)$'
-                                     % (weeks_part, datetime_part))
-            match = duration_rx.match(ical)
+            match = DURATION_REGEX.match(ical)
             sign, weeks, days, hours, minutes, seconds = match.groups()
             if weeks:
                 value = timedelta(weeks=int(weeks))


More information about the icalendar-checkins mailing list