Remove the Token enum

Ended up not being that helpful
2025-08-04 22:45:09 -04:00 · 2018-05-24 22:24:28 -04:00
parent c3abcd4a55
commit 205f75995e
3 changed files with 38 additions and 59 deletions
--- a/build_tests.py
+++ b/build_tests.py
@ -23,23 +23,10 @@ S12 = ' ' * 12

 def rust_tokenize(time_string):
    split_array = _timelex.split(time_string)
-
-    def translate_token(token):
-        if token[0].isalpha():
-            return 'Token::Alpha("{}".to_owned())'.format(token)
-        elif token[0].isnumeric():
-            return 'Token::Numeric("{}".to_owned())'.format(token)
-        elif len(token) == 1:
-            return 'Token::Separator("{}".to_owned())'.format(token)
-        else:
-            raise Exception("Invalid token during parsing of dateutil "
-                            "split: {}".format(token))
-
-    return [translate_token(t) for t in split_array]
+    return ['"{}".to_owned()'.format(token) for token in split_array]

 def build_split_string_tests():
-    header = '''use Token;
-use tokenize;
+    header = '''use ::tokenize;

 #[test]
 fn test_python_compat() {\n'''
--- a/src/lib.rs
+++ b/src/lib.rs
@ -29,15 +29,8 @@ enum ParseInternalError {
 type ParseResult<I> = Result<I, ParseError>;
 type ParseIResult<I> = Result<I, ParseInternalError>;

-#[derive(Debug, PartialEq)]
-pub enum Token {
-    Alpha(String),
-    Numeric(String),
-    Separator(String),
-}
-
 pub struct Tokenizer {
-    token_stack: Vec<Token>,
+    token_stack: Vec<String>,
    parse_string: String,
 }

@ -60,7 +53,7 @@ impl Tokenizer {
 }

 impl Iterator for Tokenizer {
-    type Item = Token;
+    type Item = String;

    fn next(&mut self) -> Option<Self::Item> {
        if !self.token_stack.is_empty() {
@ -151,24 +144,24 @@ impl Iterator for Tokenizer {
        });
        let needs_split = seen_letters || dot_count > 1 || char_stack.last().unwrap() == &'.'
            || char_stack.last().unwrap() == &',';
-        let final_string = char_stack.into_iter().collect();
+        let final_string: String = char_stack.into_iter().collect();

        let mut tokens = match state {
-            ParseState::Empty => vec![Token::Separator(final_string)],
-            ParseState::Alpha => vec![Token::Alpha(final_string)],
-            ParseState::Numeric => vec![Token::Numeric(final_string)],
+            ParseState::Empty => vec![final_string],
+            ParseState::Alpha => vec![final_string],
+            ParseState::Numeric => vec![final_string],
            ParseState::AlphaDecimal => {
                if needs_split {
                    decimal_split(&final_string, false)
                } else {
-                    vec![Token::Alpha(final_string)]
+                    vec![final_string]
                }
            }
            ParseState::NumericDecimal => {
                if needs_split {
                    decimal_split(&final_string, dot_count == 0)
                } else {
-                    vec![Token::Numeric(final_string)]
+                    vec![final_string]
                }
            }
        }.into_iter()
@ -181,8 +174,8 @@ impl Iterator for Tokenizer {
    }
 }

-fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
-    let mut token_stack: Vec<Token> = Vec::new();
+fn decimal_split(characters: &str, cast_period: bool) -> Vec<String> {
+    let mut token_stack: Vec<String> = Vec::new();
    let mut char_stack: Vec<char> = Vec::new();
    let mut state = ParseState::Empty;

@ -197,17 +190,17 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
                    state = ParseState::Numeric;
                } else {
                    let character = if cast_period { '.' } else { c };
-                    token_stack.push(Token::Separator(character.to_string()));
+                    token_stack.push(character.to_string());
                }
            }
            ParseState::Alpha => {
                if c.is_alphabetic() {
                    char_stack.push(c);
                } else {
-                    token_stack.push(Token::Alpha(char_stack.iter().collect()));
+                    token_stack.push(char_stack.iter().collect());
                    char_stack.clear();
                    let character = if cast_period { '.' } else { c };
-                    token_stack.push(Token::Separator(character.to_string()));
+                    token_stack.push(character.to_string());
                    state = ParseState::Empty;
                }
            }
@ -215,10 +208,10 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
                if c.is_numeric() {
                    char_stack.push(c);
                } else {
-                    token_stack.push(Token::Numeric(char_stack.iter().collect()));
+                    token_stack.push(char_stack.iter().collect());
                    char_stack.clear();
                    let character = if cast_period { '.' } else { c };
-                    token_stack.push(Token::Separator(character.to_string()));
+                    token_stack.push(character.to_string());
                    state = ParseState::Empty;
                }
            }
@ -227,8 +220,8 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
    }

    match state {
-        ParseState::Alpha => token_stack.push(Token::Alpha(char_stack.iter().collect())),
-        ParseState::Numeric => token_stack.push(Token::Numeric(char_stack.iter().collect())),
+        ParseState::Alpha => token_stack.push(char_stack.iter().collect()),
+        ParseState::Numeric => token_stack.push(char_stack.iter().collect()),
        ParseState::Empty => (),
        _ => panic!("Invalid parse state during decimal_split()"),
    }
@ -236,7 +229,7 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec<Token> {
    token_stack
 }

-pub fn tokenize(parse_string: &str) -> Vec<Token> {
+pub fn tokenize(parse_string: &str) -> Vec<String> {
    let tokenizer = Tokenizer::new(parse_string.to_owned());
    tokenizer.collect()
 }
--- a/src/tests/compat_split_string.rs
+++ b/src/tests/compat_split_string.rs
@ -2,7 +2,6 @@
 // This file was auto-generated using the `build_tests.py` script.
 // Please do not edit it manually.

-use Token;
 use tokenize;

 #[test]
@ -10,34 +9,34 @@ fn test_python_compat() {
    assert_eq!(
        tokenize("2018.5.15"),
        vec![
-            Token::Numeric("2018".to_owned()),
-            Token::Separator(".".to_owned()),
-            Token::Numeric("5".to_owned()),
-            Token::Separator(".".to_owned()),
-            Token::Numeric("15".to_owned()),
+            "2018".to_owned(),
+            ".".to_owned(),
+            "5".to_owned(),
+            ".".to_owned(),
+            "15".to_owned(),
        ]
    );
    assert_eq!(
        tokenize("May 5, 2018"),
        vec![
-            Token::Alpha("May".to_owned()),
-            Token::Separator(" ".to_owned()),
-            Token::Numeric("5".to_owned()),
-            Token::Separator(",".to_owned()),
-            Token::Separator(" ".to_owned()),
-            Token::Numeric("2018".to_owned()),
+            "May".to_owned(),
+            " ".to_owned(),
+            "5".to_owned(),
+            ",".to_owned(),
+            " ".to_owned(),
+            "2018".to_owned(),
        ]
    );
    assert_eq!(
        tokenize("Mar. 5, 2018"),
        vec![
-            Token::Alpha("Mar".to_owned()),
-            Token::Separator(".".to_owned()),
-            Token::Separator(" ".to_owned()),
-            Token::Numeric("5".to_owned()),
-            Token::Separator(",".to_owned()),
-            Token::Separator(" ".to_owned()),
-            Token::Numeric("2018".to_owned()),
+            "Mar".to_owned(),
+            ".".to_owned(),
+            " ".to_owned(),
+            "5".to_owned(),
+            ",".to_owned(),
+            " ".to_owned(),
+            "2018".to_owned(),
        ]
    );
 }