Show
Ignore:
Timestamp:
05/07/10 13:37:02 (2 years ago)
Author:
Menno Smits <menno@…>
Branch:
default
Message:

refactored Lexer classes. Not quite working yet.

Location:
imapclient
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • imapclient/response_lexer.py

    r149 r151  
    2020        self.wordchars = set(self.NON_SPECIALS) 
    2121        self.whitespace = set((' \t\r\n')) 
    22         self.sources = sources 
     22        self.sources = (LiteralHandlingIter(self, chunk) for chunk in sources) 
    2323        self.current_source = None 
    2424 
     
    100100 
    101101    def __iter__(self): 
    102         "Generate a token" 
     102        "Generate tokens" 
    103103        for source in self.sources: 
    104104            self.current_source = source 
    105105            for tok in self.read_token_stream(iter(source)): 
    106106                yield tok 
     107 
     108    @classmethod 
     109    def create_token_source(cls, text): 
     110        lex = cls(text) 
     111        return TokenIterator(lex) 
     112 
     113 
     114class TokenIterator(object): 
     115 
     116    def __init__(self, lex): 
     117        self.lex = lex 
     118        self.src = iter(lex) 
     119 
     120    @property 
     121    def current_literal(self): 
     122        return self.lex.current_source.literal 
     123 
     124    def __iter__(self): 
     125        return self.src 
     126     
     127 
     128# imaplib has poor handling of 'literals' - it both fails to remove the 
     129# {size} marker, and fails to keep responses grouped into the same logical 
     130# 'line'.  What we end up with is a list of response 'records', where each 
     131# record is either a simple string, or tuple of (str_with_lit, literal) - 
     132# where str_with_lit is a string with the {xxx} marker at its end.  Note 
     133# that each elt of this list does *not* correspond 1:1 with the untagged 
     134# responses. 
     135# (http://bugs.python.org/issue5045 also has comments about this) 
     136# So: we have a special file-like object for each of these records.  When 
     137# a string literal is finally processed, we peek into this file-like object 
     138# to grab the literal. 
     139class LiteralHandlingIter: 
     140    def __init__(self, lexer, resp_record): 
     141        self.pushed = None 
     142        self.lexer = lexer 
     143        if isinstance(resp_record, tuple): 
     144            # A 'record' with a string which includes a literal marker, and 
     145            # the literal itself. 
     146            src_text, self.literal = resp_record 
     147            assert src_text.endswith("}"), src_text 
     148            self.src_text = self.literal 
     149        else: 
     150            # just a line with no literals. 
     151            self.src_text = resp_record 
     152            self.literal = None 
     153 
     154    def __iter__(self): 
     155        return iter(self.src_text) 
     156 
  • imapclient/response_parser.py

    r149 r151  
    99 
    1010import imaplib 
    11 import response_lexer 
    1211from datetime import datetime 
    1312from fixed_offset import FixedOffset 
     13from response_lexer import Lexer 
    1414 
    1515 
     
    3232    if not text: 
    3333        return 
    34     src = ResponseTokeniser(text) 
     34    src = Lexer.create_token_source(text) 
     35     
    3536    token = None 
    3637    try: 
     
    118119 
    119120 
    120 EOF = object() 
    121  
    122 # imaplib has poor handling of 'literals' - it both fails to remove the 
    123 # {size} marker, and fails to keep responses grouped into the same logical 
    124 # 'line'.  What we end up with is a list of response 'records', where each 
    125 # record is either a simple string, or tuple of (str_with_lit, literal) - 
    126 # where str_with_lit is a string with the {xxx} marker at its end.  Note 
    127 # that each elt of this list does *not* correspond 1:1 with the untagged 
    128 # responses. 
    129 # (http://bugs.python.org/issue5045 also has comments about this) 
    130 # So: we have a special file-like object for each of these records.  When 
    131 # a string literal is finally processed, we peek into this file-like object 
    132 # to grab the literal. 
    133 class LiteralHandlingIter: 
    134     def __init__(self, lexer, resp_record): 
    135         self.pushed = None 
    136         self.lexer = lexer 
    137         if isinstance(resp_record, tuple): 
    138             # A 'record' with a string which includes a literal marker, and 
    139             # the literal itself. 
    140             src_text, self.literal = resp_record 
    141             assert src_text.endswith("}"), src_text 
    142             self.src_text = src_text 
    143         else: 
    144             # just a line with no literals. 
    145             self.src_text = resp_record 
    146             self.literal = None 
    147  
    148     def __iter__(self): 
    149         return iter(self.src_text) 
    150  
    151  
    152 class ResponseTokeniser(object): 
    153     def __init__(self, resp_chunks): 
    154         # initialize the lexer with all the chunks we read. 
    155         sources = (LiteralHandlingIter(lex, chunk) for chunk in resp_chunks) 
    156         lex = response_lexer.Lexer(sources) 
    157         self.tok_src = iter(lex) 
    158         self.lex = lex 
    159  
    160     def __iter__(self): 
    161         return self.tok_src 
    162  
    163  
    164121def atom(src, token): 
    165122    if token == "(": 
     
    176133    elif token[0] == '{': 
    177134        literal_len = int(token[1:-1]) 
    178         literal_text = src.lex.current_source.literal 
     135        literal_text = src.current_literal 
    179136        if literal_text is None: 
    180137           raise ParseError('No literal corresponds to %r' % token)