diff --git a/build_tests.py b/build_tests.py index af5e175..084ab31 100644 --- a/build_tests.py +++ b/build_tests.py @@ -23,23 +23,10 @@ S12 = ' ' * 12 def rust_tokenize(time_string): split_array = _timelex.split(time_string) - - def translate_token(token): - if token[0].isalpha(): - return 'Token::Alpha("{}".to_owned())'.format(token) - elif token[0].isnumeric(): - return 'Token::Numeric("{}".to_owned())'.format(token) - elif len(token) == 1: - return 'Token::Separator("{}".to_owned())'.format(token) - else: - raise Exception("Invalid token during parsing of dateutil " - "split: {}".format(token)) - - return [translate_token(t) for t in split_array] + return ['"{}".to_owned()'.format(token) for token in split_array] def build_split_string_tests(): - header = '''use Token; -use tokenize; + header = '''use ::tokenize; #[test] fn test_python_compat() {\n''' diff --git a/src/lib.rs b/src/lib.rs index 0d73bca..7edf7f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,15 +29,8 @@ enum ParseInternalError { type ParseResult = Result; type ParseIResult = Result; -#[derive(Debug, PartialEq)] -pub enum Token { - Alpha(String), - Numeric(String), - Separator(String), -} - pub struct Tokenizer { - token_stack: Vec, + token_stack: Vec, parse_string: String, } @@ -60,7 +53,7 @@ impl Tokenizer { } impl Iterator for Tokenizer { - type Item = Token; + type Item = String; fn next(&mut self) -> Option { if !self.token_stack.is_empty() { @@ -151,24 +144,24 @@ impl Iterator for Tokenizer { }); let needs_split = seen_letters || dot_count > 1 || char_stack.last().unwrap() == &'.' || char_stack.last().unwrap() == &','; - let final_string = char_stack.into_iter().collect(); + let final_string: String = char_stack.into_iter().collect(); let mut tokens = match state { - ParseState::Empty => vec![Token::Separator(final_string)], - ParseState::Alpha => vec![Token::Alpha(final_string)], - ParseState::Numeric => vec![Token::Numeric(final_string)], + ParseState::Empty => vec![final_string], + ParseState::Alpha => vec![final_string], + ParseState::Numeric => vec![final_string], ParseState::AlphaDecimal => { if needs_split { decimal_split(&final_string, false) } else { - vec![Token::Alpha(final_string)] + vec![final_string] } } ParseState::NumericDecimal => { if needs_split { decimal_split(&final_string, dot_count == 0) } else { - vec![Token::Numeric(final_string)] + vec![final_string] } } }.into_iter() @@ -181,8 +174,8 @@ impl Iterator for Tokenizer { } } -fn decimal_split(characters: &str, cast_period: bool) -> Vec { - let mut token_stack: Vec = Vec::new(); +fn decimal_split(characters: &str, cast_period: bool) -> Vec { + let mut token_stack: Vec = Vec::new(); let mut char_stack: Vec = Vec::new(); let mut state = ParseState::Empty; @@ -197,17 +190,17 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec { state = ParseState::Numeric; } else { let character = if cast_period { '.' } else { c }; - token_stack.push(Token::Separator(character.to_string())); + token_stack.push(character.to_string()); } } ParseState::Alpha => { if c.is_alphabetic() { char_stack.push(c); } else { - token_stack.push(Token::Alpha(char_stack.iter().collect())); + token_stack.push(char_stack.iter().collect()); char_stack.clear(); let character = if cast_period { '.' } else { c }; - token_stack.push(Token::Separator(character.to_string())); + token_stack.push(character.to_string()); state = ParseState::Empty; } } @@ -215,10 +208,10 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec { if c.is_numeric() { char_stack.push(c); } else { - token_stack.push(Token::Numeric(char_stack.iter().collect())); + token_stack.push(char_stack.iter().collect()); char_stack.clear(); let character = if cast_period { '.' } else { c }; - token_stack.push(Token::Separator(character.to_string())); + token_stack.push(character.to_string()); state = ParseState::Empty; } } @@ -227,8 +220,8 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec { } match state { - ParseState::Alpha => token_stack.push(Token::Alpha(char_stack.iter().collect())), - ParseState::Numeric => token_stack.push(Token::Numeric(char_stack.iter().collect())), + ParseState::Alpha => token_stack.push(char_stack.iter().collect()), + ParseState::Numeric => token_stack.push(char_stack.iter().collect()), ParseState::Empty => (), _ => panic!("Invalid parse state during decimal_split()"), } @@ -236,7 +229,7 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec { token_stack } -pub fn tokenize(parse_string: &str) -> Vec { +pub fn tokenize(parse_string: &str) -> Vec { let tokenizer = Tokenizer::new(parse_string.to_owned()); tokenizer.collect() } diff --git a/src/tests/compat_split_string.rs b/src/tests/compat_split_string.rs index 69ad3e4..7266ec9 100644 --- a/src/tests/compat_split_string.rs +++ b/src/tests/compat_split_string.rs @@ -2,7 +2,6 @@ // This file was auto-generated using the `build_tests.py` script. // Please do not edit it manually. -use Token; use tokenize; #[test] @@ -10,34 +9,34 @@ fn test_python_compat() { assert_eq!( tokenize("2018.5.15"), vec![ - Token::Numeric("2018".to_owned()), - Token::Separator(".".to_owned()), - Token::Numeric("5".to_owned()), - Token::Separator(".".to_owned()), - Token::Numeric("15".to_owned()), + "2018".to_owned(), + ".".to_owned(), + "5".to_owned(), + ".".to_owned(), + "15".to_owned(), ] ); assert_eq!( tokenize("May 5, 2018"), vec![ - Token::Alpha("May".to_owned()), - Token::Separator(" ".to_owned()), - Token::Numeric("5".to_owned()), - Token::Separator(",".to_owned()), - Token::Separator(" ".to_owned()), - Token::Numeric("2018".to_owned()), + "May".to_owned(), + " ".to_owned(), + "5".to_owned(), + ",".to_owned(), + " ".to_owned(), + "2018".to_owned(), ] ); assert_eq!( tokenize("Mar. 5, 2018"), vec![ - Token::Alpha("Mar".to_owned()), - Token::Separator(".".to_owned()), - Token::Separator(" ".to_owned()), - Token::Numeric("5".to_owned()), - Token::Separator(",".to_owned()), - Token::Separator(" ".to_owned()), - Token::Numeric("2018".to_owned()), + "Mar".to_owned(), + ".".to_owned(), + " ".to_owned(), + "5".to_owned(), + ",".to_owned(), + " ".to_owned(), + "2018".to_owned(), ] ); }