Remove the Token enum

Ended up not being that helpful
2025-10-26 15:10:33 -04:00 · 2018-05-24 22:24:28 -04:00
parent c3abcd4a55
commit 205f75995e
3 changed files with 38 additions and 59 deletions
--- a/build_tests.py
+++ b/build_tests.py
@ -23,23 +23,10 @@ S12 = ' ' * 12
 def rust_tokenize(time_string):
    split_array = _timelex.split(time_string)
-
+    return ['"{}".to_owned()'.format(token) for token in split_array]
    def translate_token(token):
        if token[0].isalpha():
            return 'Token::Alpha("{}".to_owned())'.format(token)
        elif token[0].isnumeric():
            return 'Token::Numeric("{}".to_owned())'.format(token)
        elif len(token) == 1:
            return 'Token::Separator("{}".to_owned())'.format(token)
        else:
            raise Exception("Invalid token during parsing of dateutil "
                            "split: {}".format(token))
    return [translate_token(t) for t in split_array]
 def build_split_string_tests():
-    header = '''use Token;
+    header = '''use ::tokenize;
 use tokenize;
 #[test]
 fn test_python_compat() {\n'''
--- a/src/lib.rs
+++ b/src/lib.rs
@ -29,15 +29,8 @@ enum ParseInternalError {
 type ParseResult<I> = Result<I, ParseError>;
 type ParseIResult<I> = Result<I, ParseInternalError>;
 #[derive(Debug, PartialEq)]
 pub enum Token {
    Alpha(String),
    Numeric(String),
    Separator(String),
 }
 pub struct Tokenizer {
-    token_stack: Vec<Token>,
+    token_stack: Vec<String>,
    parse_string: String,
 }
@ -60,7 +53,7 @@ impl Tokenizer {
 }
 impl Iterator for Tokenizer {
-    type Item = Token;
+    type Item = String;
    fn next(&mut self) -> Option<Self::Item> {
        if !self.token_stack.is_empty() {
@ -151,24 +144,24 @@ impl Iterator for Tokenizer {
        });
        let needs_split = seen_letters || dot_count > 1 || char_stack.last().unwrap() == &'.'
            || char_stack.last().unwrap() == &',';
-        let final_string = char_stack.into_iter().collect();
+        let final_string: String = char_stack.into_iter().collect();
        let mut tokens = match state {
-            ParseState::Empty => vec![Token::Separator(final_string)],
+            ParseState::Empty => vec![final_string],
-            ParseState::Alpha => vec![Token::Alpha(final_string)],
+            ParseState::Alpha => vec![final_string],
-            ParseState::Numeric => vec![Token::Numeric(final_string)],
+            ParseState::Numeric => vec![final_string],
            ParseState::AlphaDecimal => {
                if needs_split {
                    decimal_split(&final_string, false)
                } else {
-                    vec![Token::Alpha(final_string)]
+                    vec![final_string]
                }
            }
            ParseState::NumericDecimal => {
                if needs_split {
                    decimal_split(&final_string, dot_count == 0)
                } else {
-                    vec![Token::Numeric(final_string)]
+                    vec![final_string]
                }
            }
        }.into_iter()
@ -181,8 +174,8 @@ impl Iterator for Tokenizer {
    }
 }
-fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
+fn decimal_split(characters: &str, cast_period: bool) -> Vec<String> {
-    let mut token_stack: Vec<Token> = Vec::new();
+    let mut token_stack: Vec<String> = Vec::new();
    let mut char_stack: Vec<char> = Vec::new();
    let mut state = ParseState::Empty;
@ -197,17 +190,17 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
                    state = ParseState::Numeric;
                } else {
                    let character = if cast_period { '.' } else { c };
-                    token_stack.push(Token::Separator(character.to_string()));
+                    token_stack.push(character.to_string());
                }
            }
            ParseState::Alpha => {
                if c.is_alphabetic() {
                    char_stack.push(c);
                } else {
-                    token_stack.push(Token::Alpha(char_stack.iter().collect()));
+                    token_stack.push(char_stack.iter().collect());
                    char_stack.clear();
                    let character = if cast_period { '.' } else { c };
-                    token_stack.push(Token::Separator(character.to_string()));
+                    token_stack.push(character.to_string());
                    state = ParseState::Empty;
                }
            }
@ -215,10 +208,10 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
                if c.is_numeric() {
                    char_stack.push(c);
                } else {
-                    token_stack.push(Token::Numeric(char_stack.iter().collect()));
+                    token_stack.push(char_stack.iter().collect());
                    char_stack.clear();
                    let character = if cast_period { '.' } else { c };
-                    token_stack.push(Token::Separator(character.to_string()));
+                    token_stack.push(character.to_string());
                    state = ParseState::Empty;
                }
            }
@ -227,8 +220,8 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
    }
    match state {
-        ParseState::Alpha => token_stack.push(Token::Alpha(char_stack.iter().collect())),
+        ParseState::Alpha => token_stack.push(char_stack.iter().collect()),
-        ParseState::Numeric => token_stack.push(Token::Numeric(char_stack.iter().collect())),
+        ParseState::Numeric => token_stack.push(char_stack.iter().collect()),
        ParseState::Empty => (),
        _ => panic!("Invalid parse state during decimal_split()"),
    }
@ -236,7 +229,7 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
    token_stack
 }
-pub fn tokenize(parse_string: &str) -> Vec<Token> {
+pub fn tokenize(parse_string: &str) -> Vec<String> {
    let tokenizer = Tokenizer::new(parse_string.to_owned());
    tokenizer.collect()
 }
--- a/src/tests/compat_split_string.rs
+++ b/src/tests/compat_split_string.rs
@ -2,7 +2,6 @@
 // This file was auto-generated using the `build_tests.py` script.
 // Please do not edit it manually.
 use Token;
 use tokenize;
 #[test]
@ -10,34 +9,34 @@ fn test_python_compat() {
    assert_eq!(
        tokenize("2018.5.15"),
        vec![
-            Token::Numeric("2018".to_owned()),
+            "2018".to_owned(),
-            Token::Separator(".".to_owned()),
+            ".".to_owned(),
-            Token::Numeric("5".to_owned()),
+            "5".to_owned(),
-            Token::Separator(".".to_owned()),
+            ".".to_owned(),
-            Token::Numeric("15".to_owned()),
+            "15".to_owned(),
        ]
    );
    assert_eq!(
        tokenize("May 5, 2018"),
        vec![
-            Token::Alpha("May".to_owned()),
+            "May".to_owned(),
-            Token::Separator(" ".to_owned()),
+            " ".to_owned(),
-            Token::Numeric("5".to_owned()),
+            "5".to_owned(),
-            Token::Separator(",".to_owned()),
+            ",".to_owned(),
-            Token::Separator(" ".to_owned()),
+            " ".to_owned(),
-            Token::Numeric("2018".to_owned()),
+            "2018".to_owned(),
        ]
    );
    assert_eq!(
        tokenize("Mar. 5, 2018"),
        vec![
-            Token::Alpha("Mar".to_owned()),
+            "Mar".to_owned(),
-            Token::Separator(".".to_owned()),
+            ".".to_owned(),
-            Token::Separator(" ".to_owned()),
+            " ".to_owned(),
-            Token::Numeric("5".to_owned()),
+            "5".to_owned(),
-            Token::Separator(",".to_owned()),
+            ",".to_owned(),
-            Token::Separator(" ".to_owned()),
+            " ".to_owned(),
-            Token::Numeric("2018".to_owned()),
+            "2018".to_owned(),
        ]
    );
 }