Changeset 115:7aa870b75d11 for imapclient/response_parser.py
- Timestamp:
- 01/11/10 19:11:54 (2 years ago)
- Branch:
- default
- Files:
-
- 1 modified
-
imapclient/response_parser.py (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
-
imapclient/response_parser.py
r112 r115 22 22 23 23 24 25 24 def parse_response(text): 26 25 #XXX doc … … 38 37 response = iter(parse_response(text)) 39 38 40 def expect(expected_value):41 next_value = response.next().upper()42 if next_value != expected_value:43 raise ParseError('expected %r, got %r' % (expected_value, next_value))44 45 39 parsed_response = {} 46 40 while True: 47 41 try: 48 expect('*')42 msg_id = _int_or_error(response.next(), 'invalid message ID') 49 43 except StopIteration: 50 44 break 51 45 52 msg_id = _int_or_error(response.next(), 'invalid message ID') 53 expect('FETCH') 46 try: 47 msg_response = response.next() 48 except StopIteration: 49 raise ParseError('unexpected EOF') 54 50 55 msg_response = response.next()56 51 if not isinstance(msg_response, tuple): 57 52 raise ParseError('bad response type: %s' % repr(msg_response)) … … 59 54 raise ParseError('uneven number of response items: %s' % repr(msg_response)) 60 55 61 #XXX extract this 62 msg_data = {} 56 # always return the 'sequence' of the message, so it is available 57 # even if we return keyed by UID. 58 msg_data = {'SEQ': msg_id} 63 59 for i in xrange(0, len(msg_response), 2): 64 60 word = msg_response[i].upper() … … 81 77 raise ParseError('%s: %s' % (error_text, repr(value))) 82 78 79 EOF = object() 83 80 81 # imaplib has poor handling of 'literals' - it both fails to remove the 82 # {size} marker, and fails to keep responses grouped into the same logical 83 # 'line'. What we end up with is a list of response 'records', where each 84 # record is either a simple string, or tuple of (str_with_lit, literal) - 85 # where str_with_lit is a string with the {xxx} marker at its end. Note 86 # that each elt of this list does *not* correspond 1:1 with the untagged 87 # responses. 88 # (http://bugs.python.org/issue5045 also has comments about this) 89 # So: we have a special file-like object for each of these records. When 90 # a string literal is finally processed, we peek into this file-like object 91 # to grab the literal. 92 class LiteralHandlingReader: 93 def __init__(self, lexer, resp_record): 94 self.pushed = None 95 self.lexer = lexer 96 if isinstance(resp_record, tuple): 97 # A 'record' with a string which includes a literal marker, and 98 # the literal itself. 99 src_text, self.literal = resp_record 100 assert src_text.endswith("}"), src_text 101 # add a token-sep after the text. 102 self.src = StringIO(src_text + " ") 103 else: 104 # just a line with no literals. 105 self.src = StringIO(resp_record) 106 self.literal = None 84 107 85 EOF = object() 108 def read(self, n): 109 # We also hack into the lexer so we get special treatment for '\\' 110 # chars - they are only special inside a quoted string. 111 assert n==1 112 if self.pushed is not None: 113 ret = self.pushed 114 self.pushed = None 115 else: 116 ret = self.src.read(n) 117 if ret=="\\" and self.lexer.state not in '"\\': 118 self.pushed = "\\" 119 return ret 120 121 def close(self): 122 self.src.close() 123 self.src = None 124 self.literal = None 125 86 126 87 127 class ResponseTokeniser(object): 88 128 89 129 CTRL_CHARS = ''.join([chr(ch) for ch in range(32)]) 90 ATOM_SPECIALS = r'()%*" ]' + CTRL_CHARS130 ATOM_SPECIALS = r'()%*"' + CTRL_CHARS 91 131 ALL_CHARS = [chr(ch) for ch in range(256)] 92 132 ATOM_NON_SPECIALS = [ch for ch in ALL_CHARS if ch not in ATOM_SPECIALS] 93 133 94 def __init__(self, text): 95 self.lex = shlex.shlex(text) 134 def __init__(self, resp_chunks): 135 # initialize the lexer with all the chunks we read. 136 self.lex = shlex.shlex('', posix=True) 137 for chunk in reversed(resp_chunks): 138 self.lex.push_source(LiteralHandlingReader(self.lex, chunk)) 139 96 140 self.lex.quotes = '"' 97 141 self.lex.commenters = '' … … 106 150 except StopIteration: 107 151 return EOF 108 109 def read(self, bytes):110 return self.lex.instream.read(bytes)111 152 112 153 … … 126 167 elif token.startswith('{'): 127 168 literal_len = int(token[1:-1]) 128 if src.read(1) != '\n': 129 raise ParseError('No CRLF after %s' % token) 130 return src.read(literal_len) 169 literal_text = src.lex.instream.literal 170 if literal_text is None: 171 raise ParseError('No literal corresponds to %r' % token) 172 if len(literal_text) != literal_len: 173 raise ParseError('Expecting literal of size %d, got %d' % ( 174 literal_len, len(literal_text))) 175 return literal_text 131 176 elif token.startswith('"'): 132 177 return token[1:-1] … … 135 180 else: 136 181 return token 137 138 139
