root/imapclient/response_parser.py

Revision 282:e2d2b114427b, 5.5 KB (checked in by Menno Smits <menno@…>, 3 months ago)

clean up UID/SEQ distinction

Line 
1# Copyright (c) 2011, Menno Smits
2# Released subject to the New BSD License
3# Please see http://en.wikipedia.org/wiki/BSD_licenses
4
5"""
6Parsing for IMAP command responses with focus on FETCH responses as
7returned by imaplib.
8
9Intially inspired by http://effbot.org/zone/simple-iterator-parser.htm
10"""
11
12#TODO more exact error reporting
13
14from datetime import datetime
15from fixed_offset import FixedOffset
16from response_lexer import TokenSource
17
18try:
19    import imaplib2 as imaplib
20except ImportError:
21    imaplib2 = None
22    import imaplib
23
24__all__ = ['parse_response', 'ParseError']
25
26
27class ParseError(ValueError):
28    pass
29
30
31def parse_response(text):
32    """Pull apart IMAP command responses.
33
34    Returns nested tuples of appropriately typed objects.
35    """
36    return tuple(gen_parsed_response(text))
37
38
39def gen_parsed_response(text):
40    if not text:
41        return
42    src = TokenSource(text)
43   
44    token = None
45    try:
46        for token in src:
47            yield atom(src, token)
48    except ParseError:
49        raise
50    except ValueError, err:
51        raise ParseError("%s: %s" % (str(err), token))
52
53
54def parse_fetch_response(text, normalise_times=True, uid_is_key=True):
55    """Pull apart IMAP FETCH responses as returned by imaplib.
56
57    Returns a dictionary, keyed by message ID. Each value a dictionary
58    keyed by FETCH field type (eg."RFC822").
59    """
60    if text == [None]:
61        return {}
62    response = gen_parsed_response(text)
63
64    parsed_response = {}
65    while True:
66        try:
67            msg_id = seq = _int_or_error(response.next(), 'invalid message ID')
68        except StopIteration:
69            break
70
71        try:
72            msg_response = response.next()
73        except StopIteration:
74            raise ParseError('unexpected EOF')
75
76        if not isinstance(msg_response, tuple):
77            raise ParseError('bad response type: %s' % repr(msg_response))
78        if len(msg_response) % 2:
79            raise ParseError('uneven number of response items: %s' % repr(msg_response))
80
81        # always return the sequence of the message, so it is available
82        # even if we return keyed by UID.
83        msg_data = {'SEQ': seq}
84        for i in xrange(0, len(msg_response), 2):
85            word = msg_response[i].upper()
86            value = msg_response[i+1]
87
88            if word == 'UID':
89                uid = _int_or_error(value, 'invalid UID')
90                if uid_is_key:
91                    msg_id = uid
92                else:
93                    msg_data[word] = uid
94            elif word == 'INTERNALDATE':
95                msg_data[word] = _convert_INTERNALDATE(value, normalise_times)
96            elif word in ('BODY', 'BODYSTRUCTURE'):
97                msg_data[word] = BodyData.create(value)
98            else:
99                msg_data[word] = value
100
101        parsed_response[msg_id] = msg_data
102
103    return parsed_response
104
105
106def _int_or_error(value, error_text):
107    try:
108        return int(value)
109    except (TypeError, ValueError):
110        raise ParseError('%s: %s' % (error_text, repr(value)))
111
112
113class BodyData(tuple):
114
115    @classmethod
116    def create(cls, response):
117        # In case of multipart messages we will see at least 2 tuples
118        # at the start. Nest these in to a list so that the returned
119        # response tuple always has a consistent number of elements
120        # regardless of whether the message is multipart or not.
121        if isinstance(response[0], tuple):
122            # Multipart, find where the message part tuples stop
123            for i, part in enumerate(response):
124                if isinstance(part, basestring):
125                    break
126            return cls((list(response[:i]),) + response[i:])
127        else:
128            return cls(response)
129           
130    @property
131    def is_multipart(self):
132        return isinstance(self[0], list)
133   
134
135def _convert_INTERNALDATE(date_string, normalise_times=True):
136    mo = imaplib.InternalDate.match('INTERNALDATE "%s"' % date_string)
137    if not mo:
138        raise ValueError("couldn't parse date %r" % date_string)
139
140    zoneh = int(mo.group('zoneh'))
141    zonem = (zoneh * 60) + int(mo.group('zonem'))
142    if mo.group('zonen') == '-':
143        zonem = -zonem
144    tz = FixedOffset(zonem)
145
146    year = int(mo.group('year'))
147    mon = imaplib.Mon2num[mo.group('mon')]
148    day = int(mo.group('day'))
149    hour = int(mo.group('hour'))
150    min = int(mo.group('min'))
151    sec = int(mo.group('sec'))
152
153    dt = datetime(year, mon, day, hour, min, sec, 0, tz)
154
155    if normalise_times:
156        # Normalise to host system's timezone
157        return dt.astimezone(FixedOffset.for_system()).replace(tzinfo=None)
158    return dt
159
160
161def atom(src, token):
162    if token == "(":
163        return parse_tuple(src)
164    elif token == 'NIL':
165        return None
166    elif token[0] == '{':
167        literal_len = int(token[1:-1])
168        literal_text = src.current_literal
169        if literal_text is None:
170           raise ParseError('No literal corresponds to %r' % token)
171        if len(literal_text) != literal_len:
172            raise ParseError('Expecting literal of size %d, got %d' % (
173                                literal_len, len(literal_text)))
174        return literal_text
175    elif len(token) >= 2 and (token[0] == token[-1] == '"'):
176        return token[1:-1]
177    elif token.isdigit():
178        return int(token)
179    else:
180        return token
181
182
183def parse_tuple(src):
184    out = []
185    for token in src:
186        if token == ")":
187            return tuple(out)
188        out.append(atom(src, token))
189    # no terminator
190    raise ParseError('Tuple incomplete before "(%s"' % _fmt_tuple(out))
191
192
193def _fmt_tuple(t):
194    return ' '.join(str(item) for item in t)
Note: See TracBrowser for help on using the browser.