Changeset 149:ee1c4e27b4e0
- Timestamp:
- 06/05/10 10:22:19 (21 months ago)
- Author:
- Mark Hammond <mhammond@…>
- Branch:
- default
- Message:
-
implement a lexer which performs 7x faster for us
- Location:
- imapclient
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
r143
|
r149
|
|
| 5 | 5 | import re |
| 6 | 6 | import imaplib |
| 7 | | import shlex |
| | 7 | import response_lexer |
| 8 | 8 | #imaplib.Debug = 5 |
| 9 | 9 | |
| … |
… |
|
| 553 | 553 | self._checkok('getacl', typ, data) |
| 554 | 554 | |
| 555 | | parts = shlex.split(data[0]) |
| | 555 | parts = list(response_lexer.Lexer([data[0]])) |
| 556 | 556 | parts = parts[1:] # First item is folder name |
| 557 | 557 | |
-
|
r144
|
r149
|
|
| 9 | 9 | |
| 10 | 10 | import imaplib |
| 11 | | import shlex |
| 12 | | from cStringIO import StringIO |
| | 11 | import response_lexer |
| 13 | 12 | from datetime import datetime |
| 14 | 13 | from fixed_offset import FixedOffset |
| … |
… |
|
| 27 | 26 | Returns nested tuples of appropriately typed objects. |
| 28 | 27 | """ |
| | 28 | return tuple(gen_parsed_response(text)) |
| | 29 | |
| | 30 | |
| | 31 | def gen_parsed_response(text): |
| | 32 | if not text: |
| | 33 | return |
| 29 | 34 | src = ResponseTokeniser(text) |
| | 35 | token = None |
| 30 | 36 | try: |
| 31 | | return tuple(atom(src, token) for token in src) |
| | 37 | for token in src: |
| | 38 | yield atom(src, token) |
| 32 | 39 | except ParseError: |
| 33 | 40 | raise |
| 34 | 41 | except ValueError, err: |
| 35 | | raise ParseError("%s: %s" % (str(err), src.lex.token)) |
| | 42 | raise ParseError("%s: %s" % (str(err), token)) |
| 36 | 43 | |
| 37 | 44 | |
| … |
… |
|
| 42 | 49 | keyed by FETCH field type (eg."RFC822"). |
| 43 | 50 | """ |
| 44 | | response = iter(parse_response(text)) |
| | 51 | response = gen_parsed_response(text) |
| 45 | 52 | |
| 46 | 53 | parsed_response = {} |
| … |
… |
|
| 124 | 131 | # a string literal is finally processed, we peek into this file-like object |
| 125 | 132 | # to grab the literal. |
| 126 | | class LiteralHandlingReader: |
| | 133 | class LiteralHandlingIter: |
| 127 | 134 | def __init__(self, lexer, resp_record): |
| 128 | 135 | self.pushed = None |
| … |
… |
|
| 133 | 140 | src_text, self.literal = resp_record |
| 134 | 141 | assert src_text.endswith("}"), src_text |
| 135 | | # add a token-sep after the text. |
| 136 | | self.src = StringIO(src_text + " ") |
| | 142 | self.src_text = src_text |
| 137 | 143 | else: |
| 138 | 144 | # just a line with no literals. |
| 139 | | self.src = StringIO(resp_record) |
| | 145 | self.src_text = resp_record |
| 140 | 146 | self.literal = None |
| 141 | 147 | |
| 142 | | def read(self, n): |
| 143 | | # Two additional hacks: |
| 144 | | # 1. Hack into the lexer so we get special treatment for backslash |
| 145 | | # chars - they are only special inside a quoted string. |
| 146 | | # 2. For quoted strings return the quotes around the string so |
| 147 | | # that atom() can distinguish numbers from strings. Eg. "123" vs 123. |
| 148 | | # These are stripped off before returning them to the user. |
| 149 | | assert n==1 |
| 150 | | if self.pushed is not None: |
| 151 | | ret = self.pushed |
| 152 | | self.pushed = None |
| 153 | | else: |
| 154 | | ret = self.src.read(n) |
| 155 | | if ret == "\\" and self.lexer.state not in '"\\': |
| 156 | | self.pushed = "\\" |
| 157 | | elif ret == '"' and self.lexer.state != '\\': |
| 158 | | self.lexer.token += '"' |
| 159 | | return ret |
| 160 | | |
| 161 | | def close(self): |
| 162 | | self.src.close() |
| 163 | | self.src = None |
| 164 | | self.literal = None |
| | 148 | def __iter__(self): |
| | 149 | return iter(self.src_text) |
| 165 | 150 | |
| 166 | 151 | |
| 167 | 152 | class ResponseTokeniser(object): |
| 168 | | |
| 169 | | CTRL_CHARS = ''.join([chr(ch) for ch in range(32)]) |
| 170 | | SPECIALS = r'()%"' + CTRL_CHARS |
| 171 | | ALL_CHARS = [chr(ch) for ch in range(256)] |
| 172 | | NON_SPECIALS = [ch for ch in ALL_CHARS if ch not in SPECIALS] |
| 173 | | |
| 174 | 153 | def __init__(self, resp_chunks): |
| 175 | 154 | # initialize the lexer with all the chunks we read. |
| 176 | | self.lex = shlex.shlex('', posix=True) |
| 177 | | for chunk in reversed(resp_chunks): |
| 178 | | self.lex.push_source(LiteralHandlingReader(self.lex, chunk)) |
| 179 | | |
| 180 | | self.lex.quotes = '"' |
| 181 | | self.lex.commenters = '' |
| 182 | | self.lex.wordchars = self.NON_SPECIALS |
| | 155 | sources = (LiteralHandlingIter(lex, chunk) for chunk in resp_chunks) |
| | 156 | lex = response_lexer.Lexer(sources) |
| | 157 | self.tok_src = iter(lex) |
| | 158 | self.lex = lex |
| 183 | 159 | |
| 184 | 160 | def __iter__(self): |
| 185 | | return iter(self.lex) |
| 186 | | |
| 187 | | def next(self): |
| 188 | | try: |
| 189 | | return self.lex.next() |
| 190 | | except StopIteration: |
| 191 | | return EOF |
| | 161 | return self.tok_src |
| 192 | 162 | |
| 193 | 163 | |
| … |
… |
|
| 195 | 165 | if token == "(": |
| 196 | 166 | out = [] |
| 197 | | while True: |
| 198 | | token = src.next() |
| | 167 | for token in src: |
| 199 | 168 | if token == ")": |
| 200 | 169 | return tuple(out) |
| 201 | | if token == EOF: |
| 202 | | preceeding = ' '.join(str(val) for val in out) |
| 203 | | raise ParseError('Tuple incomplete before "(%s"' % preceeding) |
| 204 | 170 | out.append(atom(src, token)) |
| | 171 | # oops - no terminator! |
| | 172 | preceeding = ' '.join(str(val) for val in out) |
| | 173 | raise ParseError('Tuple incomplete before "(%s"' % preceeding) |
| 205 | 174 | elif token == 'NIL': |
| 206 | 175 | return None |
| 207 | | elif token.startswith('{'): |
| | 176 | elif token[0] == '{': |
| 208 | 177 | literal_len = int(token[1:-1]) |
| 209 | | literal_text = src.lex.instream.literal |
| | 178 | literal_text = src.lex.current_source.literal |
| 210 | 179 | if literal_text is None: |
| 211 | 180 | raise ParseError('No literal corresponds to %r' % token) |