| 1 | |
|---|
| 2 | |
|---|
| 3 | |
|---|
| 4 | |
|---|
| 5 | """ |
|---|
| 6 | Parsing for IMAP command responses with focus on FETCH responses as |
|---|
| 7 | returned by imaplib. |
|---|
| 8 | |
|---|
| 9 | Intially inspired by http://effbot.org/zone/simple-iterator-parser.htm |
|---|
| 10 | """ |
|---|
| 11 | |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | from datetime import datetime |
|---|
| 15 | from fixed_offset import FixedOffset |
|---|
| 16 | from response_lexer import TokenSource |
|---|
| 17 | |
|---|
| 18 | try: |
|---|
| 19 | import imaplib2 as imaplib |
|---|
| 20 | except ImportError: |
|---|
| 21 | imaplib2 = None |
|---|
| 22 | import imaplib |
|---|
| 23 | |
|---|
| 24 | __all__ = ['parse_response', 'ParseError'] |
|---|
| 25 | |
|---|
| 26 | |
|---|
| 27 | class ParseError(ValueError): |
|---|
| 28 | pass |
|---|
| 29 | |
|---|
| 30 | |
|---|
| 31 | def parse_response(text): |
|---|
| 32 | """Pull apart IMAP command responses. |
|---|
| 33 | |
|---|
| 34 | Returns nested tuples of appropriately typed objects. |
|---|
| 35 | """ |
|---|
| 36 | return tuple(gen_parsed_response(text)) |
|---|
| 37 | |
|---|
| 38 | |
|---|
| 39 | def gen_parsed_response(text): |
|---|
| 40 | if not text: |
|---|
| 41 | return |
|---|
| 42 | src = TokenSource(text) |
|---|
| 43 | |
|---|
| 44 | token = None |
|---|
| 45 | try: |
|---|
| 46 | for token in src: |
|---|
| 47 | yield atom(src, token) |
|---|
| 48 | except ParseError: |
|---|
| 49 | raise |
|---|
| 50 | except ValueError, err: |
|---|
| 51 | raise ParseError("%s: %s" % (str(err), token)) |
|---|
| 52 | |
|---|
| 53 | |
|---|
| 54 | def parse_fetch_response(text, normalise_times=True, uid_is_key=True): |
|---|
| 55 | """Pull apart IMAP FETCH responses as returned by imaplib. |
|---|
| 56 | |
|---|
| 57 | Returns a dictionary, keyed by message ID. Each value a dictionary |
|---|
| 58 | keyed by FETCH field type (eg."RFC822"). |
|---|
| 59 | """ |
|---|
| 60 | if text == [None]: |
|---|
| 61 | return {} |
|---|
| 62 | response = gen_parsed_response(text) |
|---|
| 63 | |
|---|
| 64 | parsed_response = {} |
|---|
| 65 | while True: |
|---|
| 66 | try: |
|---|
| 67 | msg_id = seq = _int_or_error(response.next(), 'invalid message ID') |
|---|
| 68 | except StopIteration: |
|---|
| 69 | break |
|---|
| 70 | |
|---|
| 71 | try: |
|---|
| 72 | msg_response = response.next() |
|---|
| 73 | except StopIteration: |
|---|
| 74 | raise ParseError('unexpected EOF') |
|---|
| 75 | |
|---|
| 76 | if not isinstance(msg_response, tuple): |
|---|
| 77 | raise ParseError('bad response type: %s' % repr(msg_response)) |
|---|
| 78 | if len(msg_response) % 2: |
|---|
| 79 | raise ParseError('uneven number of response items: %s' % repr(msg_response)) |
|---|
| 80 | |
|---|
| 81 | |
|---|
| 82 | |
|---|
| 83 | msg_data = {'SEQ': seq} |
|---|
| 84 | for i in xrange(0, len(msg_response), 2): |
|---|
| 85 | word = msg_response[i].upper() |
|---|
| 86 | value = msg_response[i+1] |
|---|
| 87 | |
|---|
| 88 | if word == 'UID': |
|---|
| 89 | uid = _int_or_error(value, 'invalid UID') |
|---|
| 90 | if uid_is_key: |
|---|
| 91 | msg_id = uid |
|---|
| 92 | else: |
|---|
| 93 | msg_data[word] = uid |
|---|
| 94 | elif word == 'INTERNALDATE': |
|---|
| 95 | msg_data[word] = _convert_INTERNALDATE(value, normalise_times) |
|---|
| 96 | elif word in ('BODY', 'BODYSTRUCTURE'): |
|---|
| 97 | msg_data[word] = BodyData.create(value) |
|---|
| 98 | else: |
|---|
| 99 | msg_data[word] = value |
|---|
| 100 | |
|---|
| 101 | parsed_response[msg_id] = msg_data |
|---|
| 102 | |
|---|
| 103 | return parsed_response |
|---|
| 104 | |
|---|
| 105 | |
|---|
| 106 | def _int_or_error(value, error_text): |
|---|
| 107 | try: |
|---|
| 108 | return int(value) |
|---|
| 109 | except (TypeError, ValueError): |
|---|
| 110 | raise ParseError('%s: %s' % (error_text, repr(value))) |
|---|
| 111 | |
|---|
| 112 | |
|---|
| 113 | class BodyData(tuple): |
|---|
| 114 | |
|---|
| 115 | @classmethod |
|---|
| 116 | def create(cls, response): |
|---|
| 117 | |
|---|
| 118 | |
|---|
| 119 | |
|---|
| 120 | |
|---|
| 121 | if isinstance(response[0], tuple): |
|---|
| 122 | |
|---|
| 123 | for i, part in enumerate(response): |
|---|
| 124 | if isinstance(part, basestring): |
|---|
| 125 | break |
|---|
| 126 | return cls((list(response[:i]),) + response[i:]) |
|---|
| 127 | else: |
|---|
| 128 | return cls(response) |
|---|
| 129 | |
|---|
| 130 | @property |
|---|
| 131 | def is_multipart(self): |
|---|
| 132 | return isinstance(self[0], list) |
|---|
| 133 | |
|---|
| 134 | |
|---|
| 135 | def _convert_INTERNALDATE(date_string, normalise_times=True): |
|---|
| 136 | mo = imaplib.InternalDate.match('INTERNALDATE "%s"' % date_string) |
|---|
| 137 | if not mo: |
|---|
| 138 | raise ValueError("couldn't parse date %r" % date_string) |
|---|
| 139 | |
|---|
| 140 | zoneh = int(mo.group('zoneh')) |
|---|
| 141 | zonem = (zoneh * 60) + int(mo.group('zonem')) |
|---|
| 142 | if mo.group('zonen') == '-': |
|---|
| 143 | zonem = -zonem |
|---|
| 144 | tz = FixedOffset(zonem) |
|---|
| 145 | |
|---|
| 146 | year = int(mo.group('year')) |
|---|
| 147 | mon = imaplib.Mon2num[mo.group('mon')] |
|---|
| 148 | day = int(mo.group('day')) |
|---|
| 149 | hour = int(mo.group('hour')) |
|---|
| 150 | min = int(mo.group('min')) |
|---|
| 151 | sec = int(mo.group('sec')) |
|---|
| 152 | |
|---|
| 153 | dt = datetime(year, mon, day, hour, min, sec, 0, tz) |
|---|
| 154 | |
|---|
| 155 | if normalise_times: |
|---|
| 156 | |
|---|
| 157 | return dt.astimezone(FixedOffset.for_system()).replace(tzinfo=None) |
|---|
| 158 | return dt |
|---|
| 159 | |
|---|
| 160 | |
|---|
| 161 | def atom(src, token): |
|---|
| 162 | if token == "(": |
|---|
| 163 | return parse_tuple(src) |
|---|
| 164 | elif token == 'NIL': |
|---|
| 165 | return None |
|---|
| 166 | elif token[0] == '{': |
|---|
| 167 | literal_len = int(token[1:-1]) |
|---|
| 168 | literal_text = src.current_literal |
|---|
| 169 | if literal_text is None: |
|---|
| 170 | raise ParseError('No literal corresponds to %r' % token) |
|---|
| 171 | if len(literal_text) != literal_len: |
|---|
| 172 | raise ParseError('Expecting literal of size %d, got %d' % ( |
|---|
| 173 | literal_len, len(literal_text))) |
|---|
| 174 | return literal_text |
|---|
| 175 | elif len(token) >= 2 and (token[0] == token[-1] == '"'): |
|---|
| 176 | return token[1:-1] |
|---|
| 177 | elif token.isdigit(): |
|---|
| 178 | return int(token) |
|---|
| 179 | else: |
|---|
| 180 | return token |
|---|
| 181 | |
|---|
| 182 | |
|---|
| 183 | def parse_tuple(src): |
|---|
| 184 | out = [] |
|---|
| 185 | for token in src: |
|---|
| 186 | if token == ")": |
|---|
| 187 | return tuple(out) |
|---|
| 188 | out.append(atom(src, token)) |
|---|
| 189 | |
|---|
| 190 | raise ParseError('Tuple incomplete before "(%s"' % _fmt_tuple(out)) |
|---|
| 191 | |
|---|
| 192 | |
|---|
| 193 | def _fmt_tuple(t): |
|---|
| 194 | return ' '.join(str(item) for item in t) |
|---|