Show
Ignore:
Timestamp:
01/11/10 19:11:54 (2 years ago)
Author:
Menno Smits <menno@…>
Branch:
default
Message:

Parse imaplib's fetch data structures instead of doing it all ourselves

This makes the code messier but means imaplib does a bit more of the
heavy lifting - this is probably safer from a bug standpoint.

From Mark Hammond.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • imapclient/response_parser.py

    r112 r115  
    2222 
    2323 
    24  
    2524def parse_response(text): 
    2625    #XXX doc 
     
    3837    response = iter(parse_response(text)) 
    3938 
    40     def expect(expected_value): 
    41         next_value = response.next().upper() 
    42         if next_value != expected_value: 
    43             raise ParseError('expected %r, got %r' % (expected_value, next_value)) 
    44  
    4539    parsed_response = {} 
    4640    while True: 
    4741        try: 
    48             expect('*') 
     42            msg_id = _int_or_error(response.next(), 'invalid message ID') 
    4943        except StopIteration: 
    5044            break 
    5145 
    52         msg_id = _int_or_error(response.next(), 'invalid message ID') 
    53         expect('FETCH') 
     46        try: 
     47            msg_response = response.next() 
     48        except StopIteration: 
     49            raise ParseError('unexpected EOF') 
    5450 
    55         msg_response = response.next() 
    5651        if not isinstance(msg_response, tuple): 
    5752            raise ParseError('bad response type: %s' % repr(msg_response)) 
     
    5954            raise ParseError('uneven number of response items: %s' % repr(msg_response)) 
    6055 
    61         #XXX extract this 
    62         msg_data = {} 
     56        # always return the 'sequence' of the message, so it is available 
     57        # even if we return keyed by UID. 
     58        msg_data = {'SEQ': msg_id} 
    6359        for i in xrange(0, len(msg_response), 2): 
    6460            word = msg_response[i].upper() 
     
    8177        raise ParseError('%s: %s' % (error_text, repr(value))) 
    8278 
     79EOF = object() 
    8380 
     81# imaplib has poor handling of 'literals' - it both fails to remove the 
     82# {size} marker, and fails to keep responses grouped into the same logical 
     83# 'line'.  What we end up with is a list of response 'records', where each 
     84# record is either a simple string, or tuple of (str_with_lit, literal) - 
     85# where str_with_lit is a string with the {xxx} marker at its end.  Note 
     86# that each elt of this list does *not* correspond 1:1 with the untagged 
     87# responses. 
     88# (http://bugs.python.org/issue5045 also has comments about this) 
     89# So: we have a special file-like object for each of these records.  When 
     90# a string literal is finally processed, we peek into this file-like object 
     91# to grab the literal. 
     92class LiteralHandlingReader: 
     93    def __init__(self, lexer, resp_record): 
     94        self.pushed = None 
     95        self.lexer = lexer 
     96        if isinstance(resp_record, tuple): 
     97            # A 'record' with a string which includes a literal marker, and 
     98            # the literal itself. 
     99            src_text, self.literal = resp_record 
     100            assert src_text.endswith("}"), src_text 
     101            # add a token-sep after the text. 
     102            self.src = StringIO(src_text + " ") 
     103        else: 
     104            # just a line with no literals. 
     105            self.src = StringIO(resp_record) 
     106            self.literal = None 
    84107 
    85 EOF = object() 
     108    def read(self, n): 
     109        # We also hack into the lexer so we get special treatment for '\\' 
     110        # chars - they are only special inside a quoted string. 
     111        assert n==1 
     112        if self.pushed is not None: 
     113            ret = self.pushed 
     114            self.pushed = None 
     115        else: 
     116            ret = self.src.read(n) 
     117            if ret=="\\" and self.lexer.state not in '"\\': 
     118                self.pushed = "\\" 
     119        return ret 
     120 
     121    def close(self): 
     122        self.src.close() 
     123        self.src = None 
     124        self.literal = None 
     125 
    86126 
    87127class ResponseTokeniser(object): 
    88128 
    89129    CTRL_CHARS = ''.join([chr(ch) for ch in range(32)]) 
    90     ATOM_SPECIALS = r'()%*"]' + CTRL_CHARS 
     130    ATOM_SPECIALS = r'()%*"' + CTRL_CHARS 
    91131    ALL_CHARS = [chr(ch) for ch in range(256)] 
    92132    ATOM_NON_SPECIALS = [ch for ch in ALL_CHARS if ch not in ATOM_SPECIALS] 
    93133 
    94     def __init__(self, text): 
    95         self.lex = shlex.shlex(text) 
     134    def __init__(self, resp_chunks): 
     135        # initialize the lexer with all the chunks we read. 
     136        self.lex = shlex.shlex('', posix=True) 
     137        for chunk in reversed(resp_chunks): 
     138            self.lex.push_source(LiteralHandlingReader(self.lex, chunk)) 
     139 
    96140        self.lex.quotes = '"' 
    97141        self.lex.commenters = '' 
     
    106150        except StopIteration: 
    107151            return EOF 
    108  
    109     def read(self, bytes): 
    110         return self.lex.instream.read(bytes) 
    111152 
    112153 
     
    126167    elif token.startswith('{'): 
    127168        literal_len = int(token[1:-1]) 
    128         if src.read(1) != '\n': 
    129            raise ParseError('No CRLF after %s' % token) 
    130         return src.read(literal_len) 
     169        literal_text = src.lex.instream.literal 
     170        if literal_text is None: 
     171           raise ParseError('No literal corresponds to %r' % token) 
     172        if len(literal_text) != literal_len: 
     173            raise ParseError('Expecting literal of size %d, got %d' % ( 
     174                                literal_len, len(literal_text))) 
     175        return literal_text 
    131176    elif token.startswith('"'): 
    132177        return token[1:-1] 
     
    135180    else: 
    136181        return token 
    137  
    138  
    139