mirror of
				https://github.com/bspeice/dtparse
				synced 2025-10-27 23:50:32 -04:00 
			
		
		
		
	
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -2,4 +2,5 @@ | ||||
| /target | ||||
| **/*.rs.bk | ||||
| Cargo.lock | ||||
| .vscode | ||||
| .vscode | ||||
| *.pyc | ||||
| @ -17,8 +17,8 @@ Supported in v0.8 | ||||
| theoretically would provide support, but I'd also like some helper things available (e.g. "EST" is not a named zone in `chrono-tz`). | ||||
| Explicit time zones (i.e. "00:00:00 -0300") are working as expected. | ||||
|  | ||||
| 3. "Fuzzy" and "Fuzzy with tokens" modes haven't been tested. The code should work, but I need to get the | ||||
| test cases added to the auto-generation suite | ||||
| 3. ~~"Fuzzy" and "Fuzzy with tokens" modes haven't been tested. The code should work, but I need to get the | ||||
| test cases added to the auto-generation suite~~ | ||||
|  | ||||
| **Non-functional**: This library is intended to be a direct port from Python, and thus the code | ||||
| looks a lot more like Python than it does Rust. There are a ton of `TODO` comments in the code | ||||
|  | ||||
| @ -80,12 +80,26 @@ tests = { | ||||
|         '1994-11-05T08:15:30Z', '1976-07-04T00:01:02Z', | ||||
|         'Tue Apr 4 00:22:12 PDT 1995' | ||||
|     ], | ||||
|     'test_fuzzy_tzinfo': [ | ||||
|         'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.' | ||||
|     ], | ||||
|     'test_fuzzy_tokens_tzinfo': [ | ||||
|         'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.' | ||||
|     ], | ||||
|     'test_fuzzy_simple': [ | ||||
|         'I have a meeting on March 1, 1974', # testFuzzyAMPMProblem | ||||
|         'On June 8th, 2020, I am going to be the first man on Mars', # testFuzzyAMPMProblem | ||||
|         'Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003', # testFuzzyAMPMProblem | ||||
|         'Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset', # testFuzzyAMPMProblem | ||||
|         'Jan 29, 1945 14:45 AM I going to see you there?', # testFuzzyIgnoreAMPM | ||||
|         '2017-07-17 06:15:', # test_idx_check | ||||
|     ], | ||||
|     'test_parse_default_ignore': [ | ||||
|     ], | ||||
| } | ||||
|  | ||||
| def main(): | ||||
|     with open('tests/pycompat.rs', 'w+') as handle: | ||||
|     with open('src/tests/pycompat_parser.rs', 'w+') as handle: | ||||
|         handle.write(TEST_HEADER) | ||||
|  | ||||
|         for test_name, test_strings in tests.items(): | ||||
| @ -149,6 +163,30 @@ def test_parse_default_ignore(i, s): | ||||
|  | ||||
|     return TEST_PARSE_DEFAULT_IGNORE.format(i=i, d=d, s=s) | ||||
|  | ||||
|  | ||||
| def test_fuzzy_tzinfo(i, s): | ||||
|     d = parse(s, fuzzy=True) | ||||
|  | ||||
|     return TEST_FUZZY_TZINFO.format(i=i, d=d, s=s, offset=int(d.tzinfo._offset.total_seconds())) | ||||
|  | ||||
|  | ||||
| def test_fuzzy_tokens_tzinfo(i, s): | ||||
|     d, tokens = parse(s, fuzzy_with_tokens=True) | ||||
|  | ||||
|     r_tokens = ", ".join(list(map(lambda s: f'"{s}".to_owned()', tokens))) | ||||
|  | ||||
|     return TEST_FUZZY_TOKENS_TZINFO.format( | ||||
|         i=i, d=d, s=s, offset=int(d.tzinfo._offset.total_seconds()), | ||||
|         tokens=r_tokens | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_fuzzy_simple(i, s): | ||||
|     d = parse(s, fuzzy=True) | ||||
|  | ||||
|     return TEST_FUZZY_SIMPLE.format(i=i, d=d, s=s) | ||||
|  | ||||
|  | ||||
| # Here lies all the ugly junk. | ||||
| TEST_HEADER = ''' | ||||
| extern crate chrono; | ||||
| @ -159,10 +197,9 @@ use chrono::NaiveDateTime; | ||||
| use chrono::Timelike; | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| extern crate dtparse; | ||||
|  | ||||
| use dtparse::Parser; | ||||
| use dtparse::ParserInfo; | ||||
| use Parser; | ||||
| use ParserInfo; | ||||
| use parse; | ||||
|  | ||||
| struct PyDateTime { | ||||
|     year: i32, | ||||
| @ -205,23 +242,59 @@ fn parse_and_assert( | ||||
|     assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s); | ||||
|     assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s); | ||||
| } | ||||
|  | ||||
| fn parse_and_assert_simple( | ||||
|     pdt: PyDateTime, | ||||
|     s: &str, | ||||
| ) { | ||||
|     let rs_parsed = dtparse::parse(s).expect(&format!("Unable to parse date in Rust '{}'", s)); | ||||
|     assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for {}", s); | ||||
|     assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for {}", s); | ||||
|     assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for {}", s); | ||||
|     assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for {}", s); | ||||
|     assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for {}", s); | ||||
|     assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for {}", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s); | ||||
|     let rs_parsed = parse(s).expect(&format!("Unable to parse date in Rust '{}'", s)); | ||||
|     assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s); | ||||
| } | ||||
|  | ||||
| fn parse_fuzzy_and_assert( | ||||
|     pdt: PyDateTime, | ||||
|     ptokens: Option<Vec<String>>, | ||||
|     info: ParserInfo, | ||||
|     s: &str, | ||||
|     dayfirst: Option<bool>, | ||||
|     yearfirst: Option<bool>, | ||||
|     fuzzy: bool, | ||||
|     fuzzy_with_tokens: bool, | ||||
|     default: Option<&NaiveDateTime>, | ||||
|     ignoretz: bool, | ||||
|     tzinfos: HashMap<String, i32>, | ||||
| ) { | ||||
|  | ||||
|     let mut parser = Parser::new(info); | ||||
|     let rs_parsed = parser.parse( | ||||
|         s, | ||||
|         dayfirst, | ||||
|         yearfirst, | ||||
|         fuzzy, | ||||
|         fuzzy_with_tokens, | ||||
|         default, | ||||
|         ignoretz, | ||||
|         tzinfos).expect(&format!("Unable to parse date in Rust '{}'", s)); | ||||
|  | ||||
|     assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s); | ||||
|     assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s); | ||||
|     assert_eq!(ptokens, rs_parsed.2, "Tokens mismatch for '{}'", s); | ||||
| } | ||||
|  | ||||
| macro_rules! rs_tzinfo_map { | ||||
| @ -364,6 +437,46 @@ fn test_parse_default_ignore{i}() {{ | ||||
|                      Some(default_rsdate), false, HashMap::new()); | ||||
| }}\n''' | ||||
|  | ||||
| TEST_FUZZY_TZINFO = ''' | ||||
| #[test] | ||||
| fn test_fuzzy_tzinfo{i}() {{ | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime {{ | ||||
|         year: {d.year}, month: {d.month}, day: {d.day}, | ||||
|         hour: {d.hour}, minute: {d.minute}, second: {d.second}, | ||||
|         micros: {d.microsecond}, tzo: Some({offset}) | ||||
|     }}; | ||||
|     parse_fuzzy_and_assert(pdt, None, info, "{s}", None, None, true, false, | ||||
|                            None, false, HashMap::new()); | ||||
| }}\n''' | ||||
|  | ||||
| TEST_FUZZY_TOKENS_TZINFO = ''' | ||||
| #[test] | ||||
| fn test_fuzzy_tokens_tzinfo{i}() {{ | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime {{ | ||||
|         year: {d.year}, month: {d.month}, day: {d.day}, | ||||
|         hour: {d.hour}, minute: {d.minute}, second: {d.second}, | ||||
|         micros: {d.microsecond}, tzo: Some({offset}) | ||||
|     }}; | ||||
|     let tokens = vec![{tokens}]; | ||||
|     parse_fuzzy_and_assert(pdt, Some(tokens), info, "{s}", None, None, true, true, | ||||
|                            None, false, HashMap::new()); | ||||
| }}\n''' | ||||
|  | ||||
| TEST_FUZZY_SIMPLE = ''' | ||||
| #[test] | ||||
| fn test_fuzzy_simple{i}() {{ | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime {{ | ||||
|         year: {d.year}, month: {d.month}, day: {d.day}, | ||||
|         hour: {d.hour}, minute: {d.minute}, second: {d.second}, | ||||
|         micros: {d.microsecond}, tzo: None | ||||
|     }}; | ||||
|     parse_fuzzy_and_assert(pdt, None, info, "{s}", None, None, true, false, | ||||
|                            None, false, HashMap::new()); | ||||
| }}\n''' | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
							
								
								
									
										35
									
								
								build_pycompat_tokenizer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								build_pycompat_tokenizer.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,35 @@ | ||||
| from dateutil.parser import _timelex | ||||
|  | ||||
| from build_pycompat import tests | ||||
|  | ||||
| def main(): | ||||
|     with open('src/tests/pycompat_tokenizer.rs', 'w+') as handle: | ||||
|         handle.write(TEST_HEADER) | ||||
|  | ||||
|         counter = 0 | ||||
|         for _, test_strings in tests.items(): | ||||
|             for s in test_strings: | ||||
|                 handle.write(build_test(counter, s)) | ||||
|                 counter += 1 | ||||
|  | ||||
| def build_test(i, test_string): | ||||
|     python_tokens = list(_timelex(test_string)) | ||||
|     formatted_tokens = 'vec!["' + '", "'.join(python_tokens) + '"]' | ||||
|     return f''' | ||||
| #[test] | ||||
| fn test_tokenize{i}() {{ | ||||
|     let comp = {formatted_tokens}; | ||||
|     tokenize_assert("{test_string}", comp); | ||||
| }}\n''' | ||||
|  | ||||
|  | ||||
| TEST_HEADER = ''' | ||||
| use tokenize::Tokenizer; | ||||
|  | ||||
| fn tokenize_assert(test_str: &str, comparison: Vec<&str>) { | ||||
|     let tokens: Vec<String> = Tokenizer::new(test_str).collect(); | ||||
|     assert_eq!(tokens, comparison, "Tokenizing mismatch for `{}`", test_str); | ||||
| }\n''' | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
							
								
								
									
										119
									
								
								src/lib.rs
									
									
									
									
									
								
							
							
						
						
									
										119
									
								
								src/lib.rs
									
									
									
									
									
								
							| @ -1,6 +1,3 @@ | ||||
| #![allow(dead_code)] | ||||
| #![allow(unused)] | ||||
|  | ||||
| #[macro_use] | ||||
| extern crate lazy_static; | ||||
|  | ||||
| @ -8,7 +5,6 @@ extern crate chrono; | ||||
| extern crate num_traits; | ||||
| extern crate rust_decimal; | ||||
|  | ||||
| use chrono::DateTime; | ||||
| use chrono::Datelike; | ||||
| use chrono::Duration; | ||||
| use chrono::FixedOffset; | ||||
| @ -17,7 +13,6 @@ use chrono::NaiveDate; | ||||
| use chrono::NaiveDateTime; | ||||
| use chrono::NaiveTime; | ||||
| use chrono::Timelike; | ||||
| use chrono::Utc; | ||||
| use num_traits::cast::ToPrimitive; | ||||
| use rust_decimal::Decimal; | ||||
| use rust_decimal::Error as DecimalError; | ||||
| @ -33,7 +28,6 @@ mod weekday; | ||||
| #[cfg(test)] | ||||
| mod tests; | ||||
|  | ||||
| use tokenize::ParseState; | ||||
| use tokenize::Tokenizer; | ||||
| use weekday::day_of_week; | ||||
| use weekday::DayOfWeek; | ||||
| @ -59,13 +53,13 @@ pub enum ParseInternalError { | ||||
| } | ||||
|  | ||||
| impl From<DecimalError> for ParseInternalError { | ||||
|     fn from(err: DecimalError) -> Self { | ||||
|     fn from(_err: DecimalError) -> Self { | ||||
|         ParseInternalError::InvalidDecimal | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<ParseIntError> for ParseInternalError { | ||||
|     fn from(err: ParseIntError) -> Self { | ||||
|     fn from(_err: ParseIntError) -> Self { | ||||
|         ParseInternalError::InvalidInteger | ||||
|     } | ||||
| } | ||||
| @ -92,7 +86,7 @@ type ParseResult<I> = Result<I, ParseError>; | ||||
| type ParseIResult<I> = Result<I, ParseInternalError>; | ||||
|  | ||||
| pub fn tokenize(parse_string: &str) -> Vec<String> { | ||||
|     let tokenizer = Tokenizer::new(parse_string.to_owned()); | ||||
|     let tokenizer = Tokenizer::new(parse_string); | ||||
|     tokenizer.collect() | ||||
| } | ||||
|  | ||||
| @ -201,7 +195,9 @@ impl ParserInfo { | ||||
|  | ||||
|     fn get_ampm(&self, name: &str) -> Option<bool> { | ||||
|         if let Some(v) = self.ampm.get(&name.to_lowercase()) { | ||||
|             Some(v.to_owned() == 1) | ||||
|             // Python technically uses numbers here, but given that the numbers are | ||||
|             // only 0 and 1, it's easier to use booleans | ||||
|             Some(*v == 1) | ||||
|         } else { | ||||
|             None | ||||
|         } | ||||
| @ -293,11 +289,6 @@ struct YMD { | ||||
|     ystridx: Option<usize>, | ||||
| } | ||||
|  | ||||
| enum YMDAppendEither { | ||||
|     Number(i32), | ||||
|     Stringy(String), | ||||
| } | ||||
|  | ||||
| impl YMD { | ||||
|     fn len(&self) -> usize { | ||||
|         self._ymd.len() | ||||
| @ -387,7 +378,7 @@ impl YMD { | ||||
|                     Ok(()) | ||||
|                 } | ||||
|             } | ||||
|             None => Err(ParseInternalError::ValueError("Missing label.".to_owned())), | ||||
|             None => Ok(()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @ -565,7 +556,6 @@ impl Parser { | ||||
|  | ||||
|         let default_ts = NaiveDateTime::new(default_date, NaiveTime::from_hms(0, 0, 0)); | ||||
|  | ||||
|         // TODO: What should be done with the tokens? | ||||
|         let (res, tokens) = | ||||
|             self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?; | ||||
|  | ||||
| @ -613,34 +603,34 @@ impl Parser { | ||||
|         while i < len_l { | ||||
|             let value_repr = l[i].clone(); | ||||
|  | ||||
|             if let Ok(v) = Decimal::from_str(&value_repr) { | ||||
|             if let Ok(_v) = Decimal::from_str(&value_repr) { | ||||
|                 i = self.parse_numeric_token(&l, i, &self.info, &mut ymd, &mut res, fuzzy)?; | ||||
|             } else if let Some(value) = self.info.get_weekday(&l[i]) { | ||||
|                 res.weekday = Some(value); | ||||
|             } else if let Some(value) = self.info.get_month(&l[i]) { | ||||
|                 ymd.append(value as i32, &l[i], Some(YMDLabel::Month)); | ||||
|                 ymd.append(value as i32, &l[i], Some(YMDLabel::Month))?; | ||||
|  | ||||
|                 if i + 1 < len_l { | ||||
|                     if l[i + 1] == "-" || l[i + 1] == "/" { | ||||
|                         // Jan-01[-99] | ||||
|                         let sep = &l[i + 1]; | ||||
|                         // TODO: This seems like a very unsafe unwrap | ||||
|                         ymd.append(l[i + 2].parse::<i32>().unwrap(), &l[i + 2], None); | ||||
|                         ymd.append(l[i + 2].parse::<i32>().unwrap(), &l[i + 2], None)?; | ||||
|  | ||||
|                         if i + 3 < len_l && &l[i + 3] == sep { | ||||
|                             // Jan-01-99 | ||||
|                             ymd.append(l[i + 4].parse::<i32>().unwrap(), &l[i + 4], None); | ||||
|                             ymd.append(l[i + 4].parse::<i32>().unwrap(), &l[i + 4], None)?; | ||||
|                             i += 2; | ||||
|                         } | ||||
|  | ||||
|                         i += 2; | ||||
|                     } else if (i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " " | ||||
|                         && self.info.get_pertain(&l[i + 2])) | ||||
|                     } else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " " | ||||
|                         && self.info.get_pertain(&l[i + 2]) | ||||
|                     { | ||||
|                         // Jan of 01 | ||||
|                         if let Some(value) = l[i + 4].parse::<i32>().ok() { | ||||
|                             let year = self.info.convertyear(value, false); | ||||
|                             ymd.append(year, &l[i + 4], Some(YMDLabel::Year)); | ||||
|                             ymd.append(year, &l[i + 4], Some(YMDLabel::Year))?; | ||||
|                         } | ||||
|  | ||||
|                         i += 4; | ||||
| @ -649,8 +639,8 @@ impl Parser { | ||||
|             } else if let Some(value) = self.info.get_ampm(&l[i]) { | ||||
|                 let is_ampm = self.ampm_valid(res.hour, res.ampm, fuzzy); | ||||
|  | ||||
|                 if is_ampm.is_ok() { | ||||
|                     res.hour = Some(self.adjust_ampm(res.hour.unwrap(), value)); | ||||
|                 if is_ampm == Ok(true) { | ||||
|                     res.hour = res.hour.map(|h| self.adjust_ampm(h, value)); | ||||
|                     res.ampm = Some(value); | ||||
|                 } else if fuzzy { | ||||
|                     skipped_idxs.push(i); | ||||
| @ -718,7 +708,7 @@ impl Parser { | ||||
|                 } | ||||
|  | ||||
|                 i += 1; | ||||
|             } else if !self.info.get_jump(&l[i]) || fuzzy { | ||||
|             } else if !(self.info.get_jump(&l[i]) || fuzzy) { | ||||
|                 return Err(ParseError::UnrecognizedToken(l[i].clone())); | ||||
|             } else { | ||||
|                 skipped_idxs.push(i); | ||||
| @ -737,7 +727,7 @@ impl Parser { | ||||
|         if !self.info.validate(&mut res) { | ||||
|             Err(ParseError::InvalidParseResult(res)) | ||||
|         } else if fuzzy_with_tokens { | ||||
|             let skipped_tokens = skipped_idxs.into_iter().map(|i| l[i].clone()).collect(); | ||||
|             let skipped_tokens = self.recombine_skipped(skipped_idxs, l); | ||||
|             Ok((res, Some(skipped_tokens))) | ||||
|         } else { | ||||
|             Ok((res, None)) | ||||
| @ -759,25 +749,27 @@ impl Parser { | ||||
|     } | ||||
|  | ||||
|     fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> { | ||||
|         if fuzzy && ampm == Some(true) { | ||||
|             return Ok(false); | ||||
|         let mut val_is_ampm = true; | ||||
|  | ||||
|         if fuzzy && ampm.is_some() { | ||||
|             val_is_ampm = false; | ||||
|         } | ||||
|  | ||||
|         if hour.is_none() { | ||||
|             if fuzzy { | ||||
|                 Ok(false) | ||||
|                 val_is_ampm = false; | ||||
|             } else { | ||||
|                 Err(ParseError::AmPmWithoutHour) | ||||
|                 return Err(ParseError::AmPmWithoutHour); | ||||
|             } | ||||
|         } else if !(0 <= hour.unwrap() && hour.unwrap() <= 12) { | ||||
|             if fuzzy { | ||||
|                 Ok(false) | ||||
|                 val_is_ampm = false; | ||||
|             } else { | ||||
|                 Err(ParseError::ImpossibleTimestamp("Invalid hour")) | ||||
|                 return Err(ParseError::ImpossibleTimestamp("Invalid hour")); | ||||
|             } | ||||
|         } else { | ||||
|             Ok(false) | ||||
|         } | ||||
|  | ||||
|         Ok(val_is_ampm) | ||||
|     } | ||||
|  | ||||
|     fn build_naive(&self, res: &ParsingResult, default: &NaiveDateTime) -> ParseResult<NaiveDateTime> { | ||||
| @ -797,7 +789,7 @@ impl Parser { | ||||
|         }; | ||||
|  | ||||
|         // TODO: Change month/day to u32 | ||||
|         let mut d = NaiveDate::from_ymd( | ||||
|         let d = NaiveDate::from_ymd( | ||||
|             y, | ||||
|             m, | ||||
|             min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?) | ||||
| @ -829,7 +821,7 @@ impl Parser { | ||||
|  | ||||
|     fn build_tzaware( | ||||
|         &self, | ||||
|         dt: &NaiveDateTime, | ||||
|         _dt: &NaiveDateTime, | ||||
|         res: &ParsingResult, | ||||
|         tzinfos: HashMap<String, i32>, | ||||
|     ) -> ParseResult<Option<FixedOffset>> { | ||||
| @ -888,9 +880,9 @@ impl Parser { | ||||
|             let s = &tokens[idx]; | ||||
|  | ||||
|             if ymd.len() == 0 && tokens[idx].find(".") == None { | ||||
|                 ymd.append(s[0..2].parse::<i32>().unwrap(), &s[0..2], None); | ||||
|                 ymd.append(s[2..4].parse::<i32>().unwrap(), &s[2..4], None); | ||||
|                 ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None); | ||||
|                 ymd.append(s[0..2].parse::<i32>().unwrap(), &s[0..2], None)?; | ||||
|                 ymd.append(s[2..4].parse::<i32>().unwrap(), &s[2..4], None)?; | ||||
|                 ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?; | ||||
|             } else { | ||||
|                 // 19990101T235959[.59] | ||||
|                 res.hour = s[0..2].parse::<i32>().ok(); | ||||
| @ -903,9 +895,9 @@ impl Parser { | ||||
|         } else if vec![8, 12, 14].contains(&len_li) { | ||||
|             // YYMMDD | ||||
|             let s = &tokens[idx]; | ||||
|             ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year)); | ||||
|             ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None); | ||||
|             ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None); | ||||
|             ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?; | ||||
|             ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?; | ||||
|             ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?; | ||||
|  | ||||
|             if len_li > 8 { | ||||
|                 res.hour = Some(s[8..10].parse::<i32>()?); | ||||
| @ -947,20 +939,20 @@ impl Parser { | ||||
|         { | ||||
|             // TODO: There's got to be a better way of handling the condition above | ||||
|             let sep = &tokens[idx + 1]; | ||||
|             ymd.append(value_repr.parse::<i32>().unwrap(), &value_repr, None); | ||||
|             ymd.append(value_repr.parse::<i32>().unwrap(), &value_repr, None)?; | ||||
|  | ||||
|             if idx + 2 < len_l && !info.get_jump(&tokens[idx + 2]) { | ||||
|                 if let Ok(val) = tokens[idx + 2].parse::<i32>() { | ||||
|                     ymd.append(val, &tokens[idx + 2], None); | ||||
|                     ymd.append(val, &tokens[idx + 2], None)?; | ||||
|                 } else if let Some(val) = info.get_month(&tokens[idx + 2]) { | ||||
|                     ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month)); | ||||
|                     ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month))?; | ||||
|                 } | ||||
|  | ||||
|                 if idx + 3 < len_l && &tokens[idx + 3] == sep { | ||||
|                     if let Some(value) = info.get_month(&tokens[idx + 4]) { | ||||
|                         ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month)); | ||||
|                         ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?; | ||||
|                     } else { | ||||
|                         ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None); | ||||
|                         ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None)?; | ||||
|                     } | ||||
|  | ||||
|                     idx += 2; | ||||
| @ -975,9 +967,12 @@ impl Parser { | ||||
|                 let hour = value.to_i64().unwrap() as i32; | ||||
|                 let ampm = info.get_ampm(&tokens[idx + 2]).unwrap(); | ||||
|                 res.hour = Some(self.adjust_ampm(hour, ampm)); | ||||
|                 idx += 1; | ||||
|             } else { | ||||
|                 ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None); | ||||
|                 ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None)?; | ||||
|             } | ||||
|  | ||||
|             idx += 1; | ||||
|         } else if info.get_ampm(&tokens[idx + 1]).is_some() | ||||
|             && (*ZERO <= value && value < *TWENTY_FOUR) | ||||
|         { | ||||
| @ -986,7 +981,7 @@ impl Parser { | ||||
|             res.hour = Some(self.adjust_ampm(hour, info.get_ampm(&tokens[idx + 1]).unwrap())); | ||||
|             idx += 1; | ||||
|         } else if ymd.could_be_day(value.to_i64().unwrap() as i32) { | ||||
|             ymd.append(value.to_i64().unwrap() as i32, &value_repr, None); | ||||
|             ymd.append(value.to_i64().unwrap() as i32, &value_repr, None)?; | ||||
|         } else if !fuzzy { | ||||
|             return Err(ParseInternalError::ValueError("".to_owned())); | ||||
|         } | ||||
| @ -1087,7 +1082,6 @@ impl Parser { | ||||
|         if hms == 0 { | ||||
|             res.hour = Some(value.to_i64().unwrap() as i32); | ||||
|             if !close_to_integer(&value) { | ||||
|                 // TODO: High probability of issues with rounding here. | ||||
|                 res.minute = Some((*SIXTY * (value % *ONE)).to_i64().unwrap() as i32); | ||||
|             } | ||||
|         } else if hms == 1 { | ||||
| @ -1107,6 +1101,7 @@ impl Parser { | ||||
|     } | ||||
|  | ||||
|     fn parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>) { | ||||
|         // UNWRAP: i64 guaranteed to be fine because of preceding floor | ||||
|         let minute = value.floor().to_i64().unwrap() as i32; | ||||
|         let mut second = None; | ||||
|  | ||||
| @ -1117,6 +1112,26 @@ impl Parser { | ||||
|  | ||||
|         (minute, second) | ||||
|     } | ||||
|  | ||||
|     fn recombine_skipped(&self, skipped_idxs: Vec<usize>, tokens: Vec<String>) -> Vec<String> { | ||||
|         let mut skipped_tokens: Vec<String> = vec![]; | ||||
|  | ||||
|         let mut sorted_idxs = skipped_idxs.clone(); | ||||
|         sorted_idxs.sort(); | ||||
|  | ||||
|         for (i, idx) in sorted_idxs.iter().enumerate() { | ||||
|             if i > 0 && idx - 1 == skipped_idxs[i - 1] { | ||||
|                 // UNWRAP: Having an initial value and unconditional push at end guarantees value | ||||
|                 let mut t = skipped_tokens.pop().unwrap(); | ||||
|                 t.push_str(tokens[idx.clone()].as_ref()); | ||||
|                 skipped_tokens.push(t); | ||||
|             } else { | ||||
|                 skipped_tokens.push(tokens[idx.clone()].to_owned()); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         skipped_tokens | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn close_to_integer(value: &Decimal) -> bool { | ||||
|  | ||||
							
								
								
									
										3
									
								
								src/tests/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								src/tests/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,3 @@ | ||||
| mod fuzzing; | ||||
| mod pycompat_parser; | ||||
| mod pycompat_tokenizer; | ||||
| @ -7,10 +7,9 @@ use chrono::NaiveDateTime; | ||||
| use chrono::Timelike; | ||||
| use std::collections::HashMap; | ||||
| 
 | ||||
| extern crate dtparse; | ||||
| 
 | ||||
| use dtparse::Parser; | ||||
| use dtparse::ParserInfo; | ||||
| use Parser; | ||||
| use ParserInfo; | ||||
| use parse; | ||||
| 
 | ||||
| struct PyDateTime { | ||||
|     year: i32, | ||||
| @ -53,23 +52,59 @@ fn parse_and_assert( | ||||
|     assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s); | ||||
|     assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s); | ||||
| } | ||||
| 
 | ||||
| fn parse_and_assert_simple( | ||||
|     pdt: PyDateTime, | ||||
|     s: &str, | ||||
| ) { | ||||
|     let rs_parsed = dtparse::parse(s).expect(&format!("Unable to parse date in Rust '{}'", s)); | ||||
|     assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for {}", s); | ||||
|     assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for {}", s); | ||||
|     assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for {}", s); | ||||
|     assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for {}", s); | ||||
|     assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for {}", s); | ||||
|     assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for {}", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s); | ||||
|     let rs_parsed = parse(s).expect(&format!("Unable to parse date in Rust '{}'", s)); | ||||
|     assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s); | ||||
| } | ||||
| 
 | ||||
| fn parse_fuzzy_and_assert( | ||||
|     pdt: PyDateTime, | ||||
|     ptokens: Option<Vec<String>>, | ||||
|     info: ParserInfo, | ||||
|     s: &str, | ||||
|     dayfirst: Option<bool>, | ||||
|     yearfirst: Option<bool>, | ||||
|     fuzzy: bool, | ||||
|     fuzzy_with_tokens: bool, | ||||
|     default: Option<&NaiveDateTime>, | ||||
|     ignoretz: bool, | ||||
|     tzinfos: HashMap<String, i32>, | ||||
| ) { | ||||
| 
 | ||||
|     let mut parser = Parser::new(info); | ||||
|     let rs_parsed = parser.parse( | ||||
|         s, | ||||
|         dayfirst, | ||||
|         yearfirst, | ||||
|         fuzzy, | ||||
|         fuzzy_with_tokens, | ||||
|         default, | ||||
|         ignoretz, | ||||
|         tzinfos).expect(&format!("Unable to parse date in Rust '{}'", s)); | ||||
| 
 | ||||
|     assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s); | ||||
|     assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s); | ||||
|     assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s); | ||||
|     assert_eq!(ptokens, rs_parsed.2, "Tokens mismatch for '{}'", s); | ||||
| } | ||||
| 
 | ||||
| macro_rules! rs_tzinfo_map { | ||||
| @ -1697,3 +1732,100 @@ fn test_parse_ignoretz7() { | ||||
|     parse_and_assert(pdt, info, "Tue Apr 4 00:22:12 PDT 1995", None, None, false, false, | ||||
|                      None, true, HashMap::new()); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_fuzzy_tzinfo0() { | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime { | ||||
|         year: 2003, month: 9, day: 25, | ||||
|         hour: 10, minute: 49, second: 41, | ||||
|         micros: 0, tzo: Some(-10800) | ||||
|     }; | ||||
|     parse_fuzzy_and_assert(pdt, None, info, "Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", None, None, true, false, | ||||
|                            None, false, HashMap::new()); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_fuzzy_tokens_tzinfo0() { | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime { | ||||
|         year: 2003, month: 9, day: 25, | ||||
|         hour: 10, minute: 49, second: 41, | ||||
|         micros: 0, tzo: Some(-10800) | ||||
|     }; | ||||
|     let tokens = vec!["Today is ".to_owned(), "of ".to_owned(), ", exactly at ".to_owned(), " with timezone ".to_owned(), ".".to_owned()]; | ||||
|     parse_fuzzy_and_assert(pdt, Some(tokens), info, "Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", None, None, true, true, | ||||
|                            None, false, HashMap::new()); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_fuzzy_simple0() { | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime { | ||||
|         year: 1974, month: 3, day: 1, | ||||
|         hour: 0, minute: 0, second: 0, | ||||
|         micros: 0, tzo: None | ||||
|     }; | ||||
|     parse_fuzzy_and_assert(pdt, None, info, "I have a meeting on March 1, 1974", None, None, true, false, | ||||
|                            None, false, HashMap::new()); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_fuzzy_simple1() { | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime { | ||||
|         year: 2020, month: 6, day: 8, | ||||
|         hour: 0, minute: 0, second: 0, | ||||
|         micros: 0, tzo: None | ||||
|     }; | ||||
|     parse_fuzzy_and_assert(pdt, None, info, "On June 8th, 2020, I am going to be the first man on Mars", None, None, true, false, | ||||
|                            None, false, HashMap::new()); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_fuzzy_simple2() { | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime { | ||||
|         year: 2003, month: 12, day: 3, | ||||
|         hour: 3, minute: 0, second: 0, | ||||
|         micros: 0, tzo: None | ||||
|     }; | ||||
|     parse_fuzzy_and_assert(pdt, None, info, "Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", None, None, true, false, | ||||
|                            None, false, HashMap::new()); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_fuzzy_simple3() { | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime { | ||||
|         year: 2003, month: 12, day: 3, | ||||
|         hour: 3, minute: 0, second: 0, | ||||
|         micros: 0, tzo: None | ||||
|     }; | ||||
|     parse_fuzzy_and_assert(pdt, None, info, "Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", None, None, true, false, | ||||
|                            None, false, HashMap::new()); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_fuzzy_simple4() { | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime { | ||||
|         year: 1945, month: 1, day: 29, | ||||
|         hour: 14, minute: 45, second: 0, | ||||
|         micros: 0, tzo: None | ||||
|     }; | ||||
|     parse_fuzzy_and_assert(pdt, None, info, "Jan 29, 1945 14:45 AM I going to see you there?", None, None, true, false, | ||||
|                            None, false, HashMap::new()); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_fuzzy_simple5() { | ||||
|     let info = ParserInfo::default(); | ||||
|     let pdt = PyDateTime { | ||||
|         year: 2017, month: 7, day: 17, | ||||
|         hour: 6, minute: 15, second: 0, | ||||
|         micros: 0, tzo: None | ||||
|     }; | ||||
|     parse_fuzzy_and_assert(pdt, None, info, "2017-07-17 06:15:", None, None, true, false, | ||||
|                            None, false, HashMap::new()); | ||||
| } | ||||
							
								
								
									
										901
									
								
								src/tests/pycompat_tokenizer.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										901
									
								
								src/tests/pycompat_tokenizer.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,901 @@ | ||||
|  | ||||
| use tokenize::Tokenizer; | ||||
|  | ||||
| fn tokenize_assert(test_str: &str, comparison: Vec<&str>) { | ||||
|     let tokens: Vec<String> = Tokenizer::new(test_str).collect(); | ||||
|     assert_eq!(tokens, comparison, "Tokenizing mismatch for `{}`", test_str); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize0() { | ||||
|     let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28"]; | ||||
|     tokenize_assert("Thu Sep 25 10:36:28", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize1() { | ||||
|     let comp = vec!["Sep", " ", "10", ":", "36", ":", "28"]; | ||||
|     tokenize_assert("Sep 10:36:28", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize2() { | ||||
|     let comp = vec!["10", ":", "36", ":", "28"]; | ||||
|     tokenize_assert("10:36:28", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize3() { | ||||
|     let comp = vec!["10", ":", "36"]; | ||||
|     tokenize_assert("10:36", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize4() { | ||||
|     let comp = vec!["Sep", " ", "2003"]; | ||||
|     tokenize_assert("Sep 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize5() { | ||||
|     let comp = vec!["Sep"]; | ||||
|     tokenize_assert("Sep", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize6() { | ||||
|     let comp = vec!["2003"]; | ||||
|     tokenize_assert("2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize7() { | ||||
|     let comp = vec!["10", "h", "36", "m", "28.5", "s"]; | ||||
|     tokenize_assert("10h36m28.5s", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize8() { | ||||
|     let comp = vec!["10", "h", "36", "m", "28", "s"]; | ||||
|     tokenize_assert("10h36m28s", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize9() { | ||||
|     let comp = vec!["10", "h", "36", "m"]; | ||||
|     tokenize_assert("10h36m", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize10() { | ||||
|     let comp = vec!["10", "h"]; | ||||
|     tokenize_assert("10h", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize11() { | ||||
|     let comp = vec!["10", " ", "h", " ", "36"]; | ||||
|     tokenize_assert("10 h 36", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize12() { | ||||
|     let comp = vec!["10", " ", "h", " ", "36.5"]; | ||||
|     tokenize_assert("10 h 36.5", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize13() { | ||||
|     let comp = vec!["36", " ", "m", " ", "5"]; | ||||
|     tokenize_assert("36 m 5", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize14() { | ||||
|     let comp = vec!["36", " ", "m", " ", "5", " ", "s"]; | ||||
|     tokenize_assert("36 m 5 s", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize15() { | ||||
|     let comp = vec!["36", " ", "m", " ", "05"]; | ||||
|     tokenize_assert("36 m 05", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize16() { | ||||
|     let comp = vec!["36", " ", "m", " ", "05", " ", "s"]; | ||||
|     tokenize_assert("36 m 05 s", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize17() { | ||||
|     let comp = vec!["10", "h", " ", "am"]; | ||||
|     tokenize_assert("10h am", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize18() { | ||||
|     let comp = vec!["10", "h", " ", "pm"]; | ||||
|     tokenize_assert("10h pm", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize19() { | ||||
|     let comp = vec!["10", "am"]; | ||||
|     tokenize_assert("10am", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize20() { | ||||
|     let comp = vec!["10", "pm"]; | ||||
|     tokenize_assert("10pm", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize21() { | ||||
|     let comp = vec!["10", ":", "00", " ", "am"]; | ||||
|     tokenize_assert("10:00 am", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize22() { | ||||
|     let comp = vec!["10", ":", "00", " ", "pm"]; | ||||
|     tokenize_assert("10:00 pm", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize23() { | ||||
|     let comp = vec!["10", ":", "00", "am"]; | ||||
|     tokenize_assert("10:00am", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize24() { | ||||
|     let comp = vec!["10", ":", "00", "pm"]; | ||||
|     tokenize_assert("10:00pm", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize25() { | ||||
|     let comp = vec!["10", ":", "00", "a", ".", "m"]; | ||||
|     tokenize_assert("10:00a.m", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize26() { | ||||
|     let comp = vec!["10", ":", "00", "p", ".", "m"]; | ||||
|     tokenize_assert("10:00p.m", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize27() { | ||||
|     let comp = vec!["10", ":", "00", "a", ".", "m", "."]; | ||||
|     tokenize_assert("10:00a.m.", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize28() { | ||||
|     let comp = vec!["10", ":", "00", "p", ".", "m", "."]; | ||||
|     tokenize_assert("10:00p.m.", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize29() { | ||||
|     let comp = vec!["October"]; | ||||
|     tokenize_assert("October", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize30() { | ||||
|     let comp = vec!["31", "-", "Dec", "-", "00"]; | ||||
|     tokenize_assert("31-Dec-00", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize31() { | ||||
|     let comp = vec!["0", ":", "01", ":", "02"]; | ||||
|     tokenize_assert("0:01:02", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize32() { | ||||
|     let comp = vec!["12", "h", " ", "01", "m", "02", "s", " ", "am"]; | ||||
|     tokenize_assert("12h 01m02s am", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize33() { | ||||
|     let comp = vec!["12", ":", "08", " ", "PM"]; | ||||
|     tokenize_assert("12:08 PM", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize34() { | ||||
|     let comp = vec!["01", "h", "02", "m", "03"]; | ||||
|     tokenize_assert("01h02m03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize35() { | ||||
|     let comp = vec!["01", "h", "02"]; | ||||
|     tokenize_assert("01h02", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize36() { | ||||
|     let comp = vec!["01", "h", "02", "s"]; | ||||
|     tokenize_assert("01h02s", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize37() { | ||||
|     let comp = vec!["01", "m", "02"]; | ||||
|     tokenize_assert("01m02", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize38() { | ||||
|     let comp = vec!["01", "m", "02", "h"]; | ||||
|     tokenize_assert("01m02h", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize39() { | ||||
|     let comp = vec!["2004", " ", "10", " ", "Apr", " ", "11", "h", "30", "m"]; | ||||
|     tokenize_assert("2004 10 Apr 11h30m", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize40() { | ||||
|     let comp = vec!["Sep", " ", "03"]; | ||||
|     tokenize_assert("Sep 03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize41() { | ||||
|     let comp = vec!["Sep", " ", "of", " ", "03"]; | ||||
|     tokenize_assert("Sep of 03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize42() { | ||||
|     let comp = vec!["02", ":", "17", "NOV", "2017"]; | ||||
|     tokenize_assert("02:17NOV2017", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize43() { | ||||
|     let comp = vec!["Thu", " ", "Sep", " ", "10", ":", "36", ":", "28"]; | ||||
|     tokenize_assert("Thu Sep 10:36:28", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize44() { | ||||
|     let comp = vec!["Thu", " ", "10", ":", "36", ":", "28"]; | ||||
|     tokenize_assert("Thu 10:36:28", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize45() { | ||||
|     let comp = vec!["Wed"]; | ||||
|     tokenize_assert("Wed", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize46() { | ||||
|     let comp = vec!["Wednesday"]; | ||||
|     tokenize_assert("Wednesday", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize47() { | ||||
|     let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003"]; | ||||
|     tokenize_assert("Thu Sep 25 10:36:28 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize48() { | ||||
|     let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "2003"]; | ||||
|     tokenize_assert("Thu Sep 25 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize49() { | ||||
|     let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41"]; | ||||
|     tokenize_assert("2003-09-25T10:49:41", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize50() { | ||||
|     let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49"]; | ||||
|     tokenize_assert("2003-09-25T10:49", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize51() { | ||||
|     let comp = vec!["2003", "-", "09", "-", "25", "T", "10"]; | ||||
|     tokenize_assert("2003-09-25T10", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize52() { | ||||
|     let comp = vec!["2003", "-", "09", "-", "25"]; | ||||
|     tokenize_assert("2003-09-25", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize53() { | ||||
|     let comp = vec!["20030925", "T", "104941"]; | ||||
|     tokenize_assert("20030925T104941", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize54() { | ||||
|     let comp = vec!["20030925", "T", "1049"]; | ||||
|     tokenize_assert("20030925T1049", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize55() { | ||||
|     let comp = vec!["20030925", "T", "10"]; | ||||
|     tokenize_assert("20030925T10", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize56() { | ||||
|     let comp = vec!["20030925"]; | ||||
|     tokenize_assert("20030925", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize57() { | ||||
|     let comp = vec!["2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502"]; | ||||
|     tokenize_assert("2003-09-25 10:49:41,502", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize58() { | ||||
|     let comp = vec!["199709020908"]; | ||||
|     tokenize_assert("199709020908", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize59() { | ||||
|     let comp = vec!["19970902090807"]; | ||||
|     tokenize_assert("19970902090807", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize60() { | ||||
|     let comp = vec!["2003", "-", "09", "-", "25"]; | ||||
|     tokenize_assert("2003-09-25", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize61() { | ||||
|     let comp = vec!["09", "-", "25", "-", "2003"]; | ||||
|     tokenize_assert("09-25-2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize62() { | ||||
|     let comp = vec!["25", "-", "09", "-", "2003"]; | ||||
|     tokenize_assert("25-09-2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize63() { | ||||
|     let comp = vec!["10", "-", "09", "-", "2003"]; | ||||
|     tokenize_assert("10-09-2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize64() { | ||||
|     let comp = vec!["10", "-", "09", "-", "03"]; | ||||
|     tokenize_assert("10-09-03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize65() { | ||||
|     let comp = vec!["2003", ".", "09", ".", "25"]; | ||||
|     tokenize_assert("2003.09.25", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize66() { | ||||
|     let comp = vec!["09", ".", "25", ".", "2003"]; | ||||
|     tokenize_assert("09.25.2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize67() { | ||||
|     let comp = vec!["25", ".", "09", ".", "2003"]; | ||||
|     tokenize_assert("25.09.2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize68() { | ||||
|     let comp = vec!["10", ".", "09", ".", "2003"]; | ||||
|     tokenize_assert("10.09.2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize69() { | ||||
|     let comp = vec!["10", ".", "09", ".", "03"]; | ||||
|     tokenize_assert("10.09.03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize70() { | ||||
|     let comp = vec!["2003", "/", "09", "/", "25"]; | ||||
|     tokenize_assert("2003/09/25", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize71() { | ||||
|     let comp = vec!["09", "/", "25", "/", "2003"]; | ||||
|     tokenize_assert("09/25/2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize72() { | ||||
|     let comp = vec!["25", "/", "09", "/", "2003"]; | ||||
|     tokenize_assert("25/09/2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize73() { | ||||
|     let comp = vec!["10", "/", "09", "/", "2003"]; | ||||
|     tokenize_assert("10/09/2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize74() { | ||||
|     let comp = vec!["10", "/", "09", "/", "03"]; | ||||
|     tokenize_assert("10/09/03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize75() { | ||||
|     let comp = vec!["2003", " ", "09", " ", "25"]; | ||||
|     tokenize_assert("2003 09 25", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize76() { | ||||
|     let comp = vec!["09", " ", "25", " ", "2003"]; | ||||
|     tokenize_assert("09 25 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize77() { | ||||
|     let comp = vec!["25", " ", "09", " ", "2003"]; | ||||
|     tokenize_assert("25 09 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize78() { | ||||
|     let comp = vec!["10", " ", "09", " ", "2003"]; | ||||
|     tokenize_assert("10 09 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize79() { | ||||
|     let comp = vec!["10", " ", "09", " ", "03"]; | ||||
|     tokenize_assert("10 09 03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize80() { | ||||
|     let comp = vec!["25", " ", "09", " ", "03"]; | ||||
|     tokenize_assert("25 09 03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize81() { | ||||
|     let comp = vec!["03", " ", "25", " ", "Sep"]; | ||||
|     tokenize_assert("03 25 Sep", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize82() { | ||||
|     let comp = vec!["25", " ", "03", " ", "Sep"]; | ||||
|     tokenize_assert("25 03 Sep", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize83() { | ||||
|     let comp = vec![" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":", "01", ":", "02", " ", " ", " ", "am", " ", " "]; | ||||
|     tokenize_assert("  July   4 ,  1976   12:01:02   am  ", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize84() { | ||||
|     let comp = vec!["Wed", ",", " ", "July", " ", "10", ",", " ", "'", "96"]; | ||||
|     tokenize_assert("Wed, July 10, '96", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize85() { | ||||
|     let comp = vec!["1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM"]; | ||||
|     tokenize_assert("1996.July.10 AD 12:08 PM", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize86() { | ||||
|     let comp = vec!["July", " ", "4", ",", " ", "1976"]; | ||||
|     tokenize_assert("July 4, 1976", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize87() { | ||||
|     let comp = vec!["7", " ", "4", " ", "1976"]; | ||||
|     tokenize_assert("7 4 1976", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize88() { | ||||
|     let comp = vec!["4", " ", "jul", " ", "1976"]; | ||||
|     tokenize_assert("4 jul 1976", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize89() { | ||||
|     let comp = vec!["7", "-", "4", "-", "76"]; | ||||
|     tokenize_assert("7-4-76", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize90() { | ||||
|     let comp = vec!["19760704"]; | ||||
|     tokenize_assert("19760704", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize91() { | ||||
|     let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; | ||||
|     tokenize_assert("0:01:02 on July 4, 1976", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize92() { | ||||
|     let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; | ||||
|     tokenize_assert("0:01:02 on July 4, 1976", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize93() { | ||||
|     let comp = vec!["July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am"]; | ||||
|     tokenize_assert("July 4, 1976 12:01:02 am", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize94() { | ||||
|     let comp = vec!["Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995"]; | ||||
|     tokenize_assert("Mon Jan  2 04:24:27 1995", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize95() { | ||||
|     let comp = vec!["04", ".", "04", ".", "95", " ", "00", ":", "22"]; | ||||
|     tokenize_assert("04.04.95 00:22", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize96() { | ||||
|     let comp = vec!["Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578"]; | ||||
|     tokenize_assert("Jan 1 1999 11:23:34.578", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize97() { | ||||
|     let comp = vec!["950404", " ", "122212"]; | ||||
|     tokenize_assert("950404 122212", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize98() { | ||||
|     let comp = vec!["3", "rd", " ", "of", " ", "May", " ", "2001"]; | ||||
|     tokenize_assert("3rd of May 2001", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize99() { | ||||
|     let comp = vec!["5", "th", " ", "of", " ", "March", " ", "2001"]; | ||||
|     tokenize_assert("5th of March 2001", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize100() { | ||||
|     let comp = vec!["1", "st", " ", "of", " ", "May", " ", "2003"]; | ||||
|     tokenize_assert("1st of May 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize101() { | ||||
|     let comp = vec!["0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; | ||||
|     tokenize_assert("0099-01-01T00:00:00", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize102() { | ||||
|     let comp = vec!["0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; | ||||
|     tokenize_assert("0031-01-01T00:00:00", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize103() { | ||||
|     let comp = vec!["20080227", "T", "21", ":", "26", ":", "01.123456789"]; | ||||
|     tokenize_assert("20080227T21:26:01.123456789", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize104() { | ||||
|     let comp = vec!["13", "NOV", "2017"]; | ||||
|     tokenize_assert("13NOV2017", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize105() { | ||||
|     let comp = vec!["0003", "-", "03", "-", "04"]; | ||||
|     tokenize_assert("0003-03-04", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize106() { | ||||
|     let comp = vec!["December", ".", "0031", ".", "30"]; | ||||
|     tokenize_assert("December.0031.30", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize107() { | ||||
|     let comp = vec!["090107"]; | ||||
|     tokenize_assert("090107", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize108() { | ||||
|     let comp = vec!["2015", "-", "15", "-", "May"]; | ||||
|     tokenize_assert("2015-15-May", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize109() { | ||||
|     let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; | ||||
|     tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize110() { | ||||
|     let comp = vec!["2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu"]; | ||||
|     tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize111() { | ||||
|     let comp = vec!["Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-", "0300"]; | ||||
|     tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize112() { | ||||
|     let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00"]; | ||||
|     tokenize_assert("2003-09-25T10:49:41.5-03:00", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize113() { | ||||
|     let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00"]; | ||||
|     tokenize_assert("2003-09-25T10:49:41-03:00", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize114() { | ||||
|     let comp = vec!["20030925", "T", "104941.5", "-", "0300"]; | ||||
|     tokenize_assert("20030925T104941.5-0300", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize115() { | ||||
|     let comp = vec!["20030925", "T", "104941", "-", "0300"]; | ||||
|     tokenize_assert("20030925T104941-0300", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize116() { | ||||
|     let comp = vec!["10", "-", "09", "-", "2003"]; | ||||
|     tokenize_assert("10-09-2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize117() { | ||||
|     let comp = vec!["10", ".", "09", ".", "2003"]; | ||||
|     tokenize_assert("10.09.2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize118() { | ||||
|     let comp = vec!["10", "/", "09", "/", "2003"]; | ||||
|     tokenize_assert("10/09/2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize119() { | ||||
|     let comp = vec!["10", " ", "09", " ", "2003"]; | ||||
|     tokenize_assert("10 09 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize120() { | ||||
|     let comp = vec!["090107"]; | ||||
|     tokenize_assert("090107", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize121() { | ||||
|     let comp = vec!["2015", " ", "09", " ", "25"]; | ||||
|     tokenize_assert("2015 09 25", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize122() { | ||||
|     let comp = vec!["10", "-", "09", "-", "03"]; | ||||
|     tokenize_assert("10-09-03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize123() { | ||||
|     let comp = vec!["10", ".", "09", ".", "03"]; | ||||
|     tokenize_assert("10.09.03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize124() { | ||||
|     let comp = vec!["10", "/", "09", "/", "03"]; | ||||
|     tokenize_assert("10/09/03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize125() { | ||||
|     let comp = vec!["10", " ", "09", " ", "03"]; | ||||
|     tokenize_assert("10 09 03", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize126() { | ||||
|     let comp = vec!["090107"]; | ||||
|     tokenize_assert("090107", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize127() { | ||||
|     let comp = vec!["2015", " ", "09", " ", "25"]; | ||||
|     tokenize_assert("2015 09 25", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize128() { | ||||
|     let comp = vec!["090107"]; | ||||
|     tokenize_assert("090107", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize129() { | ||||
|     let comp = vec!["2015", " ", "09", " ", "25"]; | ||||
|     tokenize_assert("2015 09 25", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize130() { | ||||
|     let comp = vec!["April", " ", "2009"]; | ||||
|     tokenize_assert("April 2009", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize131() { | ||||
|     let comp = vec!["Feb", " ", "2007"]; | ||||
|     tokenize_assert("Feb 2007", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize132() { | ||||
|     let comp = vec!["Feb", " ", "2008"]; | ||||
|     tokenize_assert("Feb 2008", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize133() { | ||||
|     let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; | ||||
|     tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize134() { | ||||
|     let comp = vec!["1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ", "PDT"]; | ||||
|     tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize135() { | ||||
|     let comp = vec!["Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30", ":", "42", "pm", " ", "PST"]; | ||||
|     tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize136() { | ||||
|     let comp = vec!["November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am", " ", "EST"]; | ||||
|     tokenize_assert("November 5, 1994, 8:15:30 am EST", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize137() { | ||||
|     let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00"]; | ||||
|     tokenize_assert("1994-11-05T08:15:30-05:00", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize138() { | ||||
|     let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z"]; | ||||
|     tokenize_assert("1994-11-05T08:15:30Z", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize139() { | ||||
|     let comp = vec!["1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z"]; | ||||
|     tokenize_assert("1976-07-04T00:01:02Z", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize140() { | ||||
|     let comp = vec!["Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995"]; | ||||
|     tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize141() { | ||||
|     let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."]; | ||||
|     tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize142() { | ||||
|     let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."]; | ||||
|     tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize143() { | ||||
|     let comp = vec!["I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ", "1974"]; | ||||
|     tokenize_assert("I have a meeting on March 1, 1974", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize144() { | ||||
|     let comp = vec!["On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ", "going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ", "Mars"]; | ||||
|     tokenize_assert("On June 8th, 2020, I am going to be the first man on Mars", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize145() { | ||||
|     let comp = vec!["Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003"]; | ||||
|     tokenize_assert("Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize146() { | ||||
|     let comp = vec!["Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset"]; | ||||
|     tokenize_assert("Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize147() { | ||||
|     let comp = vec!["Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ", "going", " ", "to", " ", "see", " ", "you", " ", "there", "?"]; | ||||
|     tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_tokenize148() { | ||||
|     let comp = vec!["2017", "-", "07", "-", "17", " ", "06", ":", "15", ":"]; | ||||
|     tokenize_assert("2017-07-17 06:15:", comp); | ||||
| } | ||||
							
								
								
									
										259
									
								
								src/tokenize.rs
									
									
									
									
									
								
							
							
						
						
									
										259
									
								
								src/tokenize.rs
									
									
									
									
									
								
							| @ -1,5 +1,6 @@ | ||||
| pub(crate) struct Tokenizer { | ||||
|     token_stack: Vec<String>, | ||||
|     // TODO: Should this be more generic? io::Read for example? | ||||
|     parse_string: String, | ||||
| } | ||||
|  | ||||
| @ -13,12 +14,49 @@ pub(crate) enum ParseState { | ||||
| } | ||||
|  | ||||
| impl Tokenizer { | ||||
|     pub(crate) fn new(parse_string: String) -> Self { | ||||
|  | ||||
|     pub(crate) fn new(parse_string: &str) -> Self { | ||||
|         Tokenizer { | ||||
|             token_stack: Vec::new(), | ||||
|             token_stack: vec![], | ||||
|             parse_string: parse_string.chars().rev().collect(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn isword(&self, c: char) -> bool { | ||||
|         c.is_alphabetic() | ||||
|     } | ||||
|  | ||||
|     fn isnum(&self, c: char) -> bool { | ||||
|         c.is_numeric() | ||||
|     } | ||||
|  | ||||
|     fn isspace(&self, c: char) -> bool { | ||||
|         c.is_whitespace() | ||||
|     } | ||||
|  | ||||
|     fn decimal_split(&self, s: &str) -> Vec<String> { | ||||
|         // Handles the same thing as Python's re.split() | ||||
|         let mut tokens: Vec<String> = vec!["".to_owned()]; | ||||
|  | ||||
|         for c in s.chars() { | ||||
|             if c == '.' || c == ',' { | ||||
|                 tokens.push(c.to_string()); | ||||
|                 tokens.push("".to_owned()); | ||||
|             } else { | ||||
|                 // UNWRAP: Initial setup guarantees we always have an item | ||||
|                 let mut t = tokens.pop().unwrap(); | ||||
|                 t.push(c); | ||||
|                 tokens.push(t); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // TODO: Do I really have to use &String instead of &str? | ||||
|         if tokens.last() == Some(&"".to_owned()) { | ||||
|             tokens.pop(); | ||||
|         } | ||||
|  | ||||
|         tokens | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Iterator for Tokenizer { | ||||
| @ -26,178 +64,123 @@ impl Iterator for Tokenizer { | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         if !self.token_stack.is_empty() { | ||||
|             return Some(self.token_stack.pop().unwrap()); | ||||
|         }; | ||||
|         if self.parse_string.is_empty() { | ||||
|             return None; | ||||
|         }; | ||||
|             return Some(self.token_stack.remove(0)); | ||||
|         } | ||||
|  | ||||
|         let mut char_stack: Vec<char> = Vec::new(); | ||||
|         let mut seen_letters = false; | ||||
|         let mut seenletters = false; | ||||
|         let mut token: Option<String> = None; | ||||
|         let mut state = ParseState::Empty; | ||||
|  | ||||
|         while let Some(next) = self.parse_string.pop() { | ||||
|         while !self.parse_string.is_empty() { | ||||
|             // Dateutil uses a separate `charstack` to manage the incoming stream. | ||||
|             // Because parse_string can have things pushed back onto it, we skip | ||||
|             // a couple of steps related to the `charstack`. | ||||
|  | ||||
|             // UNWRAP: Just checked that parse_string isn't empty | ||||
|             let nextchar = self.parse_string.pop().unwrap(); | ||||
|  | ||||
|             match state { | ||||
|                 ParseState::Empty => { | ||||
|                     if next.is_numeric() { | ||||
|                         state = ParseState::Numeric; | ||||
|                         char_stack.push(next); | ||||
|                     } else if next.is_alphabetic() { | ||||
|                     token = Some(nextchar.to_string()); | ||||
|                     if self.isword(nextchar) { | ||||
|                         state = ParseState::Alpha; | ||||
|                         seen_letters = true; | ||||
|                         char_stack.push(next); | ||||
|                     } else if next.is_whitespace() { | ||||
|                         char_stack.push(' '); | ||||
|                     } else if self.isnum(nextchar) { | ||||
|                         state = ParseState::Numeric; | ||||
|                     } else if self.isspace(nextchar) { | ||||
|                         token = Some(" ".to_owned()); | ||||
|                         break; | ||||
|                     } else { | ||||
|                         char_stack.push(next); | ||||
|                         break; | ||||
|                     } | ||||
|                 } | ||||
|                 }, | ||||
|                 ParseState::Alpha => { | ||||
|                     if next.is_alphabetic() { | ||||
|                         char_stack.push(next); | ||||
|                     } else if next == '.' { | ||||
|                     seenletters = true; | ||||
|                     if self.isword(nextchar) { | ||||
|                         // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token | ||||
|                         token.as_mut().unwrap().push(nextchar); | ||||
|                     } else if nextchar == '.' { | ||||
|                         token.as_mut().unwrap().push(nextchar); | ||||
|                         state = ParseState::AlphaDecimal; | ||||
|                         char_stack.push(next); | ||||
|                     } else { | ||||
|                         // We don't recognize the character, so push it back | ||||
|                         // to be handled later. | ||||
|                         self.parse_string.push(next); | ||||
|                         self.parse_string.push(nextchar); | ||||
|                         break; | ||||
|                     } | ||||
|                 } | ||||
|                 ParseState::AlphaDecimal => { | ||||
|                     if next == '.' || next.is_alphabetic() { | ||||
|                         char_stack.push(next); | ||||
|                     } else if next.is_numeric() && char_stack.last().unwrap().clone() == '.' { | ||||
|                         char_stack.push(next); | ||||
|                         state = ParseState::NumericDecimal; | ||||
|                     } else { | ||||
|                         self.parse_string.push(next); | ||||
|                         break; | ||||
|                     } | ||||
|                 } | ||||
|                 }, | ||||
|                 ParseState::Numeric => { | ||||
|                     if next.is_numeric() { | ||||
|                         char_stack.push(next); | ||||
|                     } else if next == '.' || (next == ',' && char_stack.len() >= 2) { | ||||
|                         char_stack.push(next); | ||||
|                     if self.isnum(nextchar) { | ||||
|                         // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token | ||||
|                         token.as_mut().unwrap().push(nextchar); | ||||
|                     } else if nextchar == '.' || (nextchar == ',' && token.as_ref().unwrap().len() >= 2) { | ||||
|                         token.as_mut().unwrap().push(nextchar); | ||||
|                         state = ParseState::NumericDecimal; | ||||
|                     } else { | ||||
|                         // We don't recognize the character, so push it back | ||||
|                         // to be handled later | ||||
|                         self.parse_string.push(next); | ||||
|                         self.parse_string.push(nextchar); | ||||
|                         break; | ||||
|                     } | ||||
|                 } | ||||
|                 }, | ||||
|                 ParseState::AlphaDecimal => { | ||||
|                     seenletters = true; | ||||
|                     if nextchar == '.' || self.isword(nextchar) { | ||||
|                         // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token | ||||
|                         token.as_mut().unwrap().push(nextchar); | ||||
|                     } else if self.isnum(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') { | ||||
|                         token.as_mut().unwrap().push(nextchar); | ||||
|                         state = ParseState::NumericDecimal; | ||||
|                     } else { | ||||
|                         self.parse_string.push(nextchar); | ||||
|                         break; | ||||
|                     } | ||||
|                 }, | ||||
|                 ParseState::NumericDecimal => { | ||||
|                     if next == '.' || next.is_numeric() { | ||||
|                         char_stack.push(next); | ||||
|                     } else if next.is_alphabetic() && char_stack.last().unwrap().clone() == '.' { | ||||
|                         char_stack.push(next); | ||||
|                     if nextchar == '.' || self.isnum(nextchar) { | ||||
|                         // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token | ||||
|                         token.as_mut().unwrap().push(nextchar); | ||||
|                     } else if self.isword(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') { | ||||
|                         token.as_mut().unwrap().push(nextchar); | ||||
|                         state = ParseState::AlphaDecimal; | ||||
|                     } else { | ||||
|                         self.parse_string.push(next); | ||||
|                         self.parse_string.push(nextchar); | ||||
|                         break; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // I like Python's version of this much better: | ||||
|         // needs_split = seen_letters or char_stack.count('.') > 1 or char_stack[-1] in '.,' | ||||
|         let dot_count = char_stack.iter().fold(0, |count, character| { | ||||
|             count + (if character == &'.' { 1 } else { 0 }) | ||||
|         }); | ||||
|         let needs_split = seen_letters || dot_count > 1 || char_stack.last().unwrap() == &'.' | ||||
|             || char_stack.last().unwrap() == &','; | ||||
|         let final_string: String = char_stack.into_iter().collect(); | ||||
|  | ||||
|         let mut tokens = match state { | ||||
|             ParseState::Empty => vec![final_string], | ||||
|             ParseState::Alpha => vec![final_string], | ||||
|             ParseState::Numeric => vec![final_string], | ||||
|             ParseState::AlphaDecimal => { | ||||
|                 if needs_split { | ||||
|                     decimal_split(&final_string, false) | ||||
|                 } else { | ||||
|                     vec![final_string] | ||||
|         // Python uses the state to short-circuit and make sure it doesn't run into issues with None | ||||
|         // We do something slightly different to express the same logic | ||||
|         if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal { | ||||
|             // UNWRAP: The state check guarantees that we have a value | ||||
|             let dot_count = token.as_ref().unwrap().chars().filter(|c| *c == '.').count(); | ||||
|             let last_char = token.as_ref().unwrap().chars().last(); | ||||
|             let last_splittable = last_char == Some('.') || last_char == Some(','); | ||||
|      | ||||
|             if seenletters || dot_count > 1 || last_splittable { | ||||
|                 let mut l = self.decimal_split(token.as_ref().unwrap()); | ||||
|                 let remaining = l.split_off(1); | ||||
|      | ||||
|                 token = Some(l[0].clone()); | ||||
|                 for t in remaining { | ||||
|                     self.token_stack.push(t); | ||||
|                 } | ||||
|             } | ||||
|             ParseState::NumericDecimal => { | ||||
|                 if needs_split { | ||||
|                     decimal_split(&final_string, dot_count == 0) | ||||
|                 } else { | ||||
|                     vec![final_string] | ||||
|                 } | ||||
|      | ||||
|             if state == ParseState::NumericDecimal && dot_count == 0 { | ||||
|                 token = Some(token.unwrap().replace(',', ".")); | ||||
|             } | ||||
|         }.into_iter() | ||||
|             .rev() | ||||
|             .collect(); | ||||
|  | ||||
|         self.token_stack.append(&mut tokens); | ||||
|         // UNWRAP: Previous match guaranteed that at least one token was added | ||||
|         let token = self.token_stack.pop().unwrap(); | ||||
|         if state == ParseState::NumericDecimal && !token.contains(".") { | ||||
|             Some(token.replace(",", ".")) | ||||
|         } else { | ||||
|             Some(token) | ||||
|         } | ||||
|  | ||||
|         token | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn decimal_split(characters: &str, cast_period: bool) -> Vec<String> { | ||||
|     let mut token_stack: Vec<String> = Vec::new(); | ||||
|     let mut char_stack: Vec<char> = Vec::new(); | ||||
|     let mut state = ParseState::Empty; | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|  | ||||
|     for c in characters.chars() { | ||||
|         match state { | ||||
|             ParseState::Empty => { | ||||
|                 if c.is_alphabetic() { | ||||
|                     char_stack.push(c); | ||||
|                     state = ParseState::Alpha; | ||||
|                 } else if c.is_numeric() { | ||||
|                     char_stack.push(c); | ||||
|                     state = ParseState::Numeric; | ||||
|                 } else { | ||||
|                     let character = if cast_period { '.' } else { c }; | ||||
|                     token_stack.push(character.to_string()); | ||||
|                 } | ||||
|             } | ||||
|             ParseState::Alpha => { | ||||
|                 if c.is_alphabetic() { | ||||
|                     char_stack.push(c); | ||||
|                 } else { | ||||
|                     token_stack.push(char_stack.iter().collect()); | ||||
|                     char_stack.clear(); | ||||
|                     let character = if cast_period { '.' } else { c }; | ||||
|                     token_stack.push(character.to_string()); | ||||
|                     state = ParseState::Empty; | ||||
|                 } | ||||
|             } | ||||
|             ParseState::Numeric => { | ||||
|                 if c.is_numeric() { | ||||
|                     char_stack.push(c); | ||||
|                 } else { | ||||
|                     token_stack.push(char_stack.iter().collect()); | ||||
|                     char_stack.clear(); | ||||
|                     let character = if cast_period { '.' } else { c }; | ||||
|                     token_stack.push(character.to_string()); | ||||
|                     state = ParseState::Empty; | ||||
|                 } | ||||
|             } | ||||
|             _ => panic!("Invalid parse state during decimal_split()"), | ||||
|         } | ||||
|     use Tokenizer; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_basic() { | ||||
|         let tokens: Vec<String> = Tokenizer::new("September of 2003,").collect(); | ||||
|         assert_eq!(tokens, vec!["September", " ", "of", " ", "2003", ","]); | ||||
|     } | ||||
|  | ||||
|     match state { | ||||
|         ParseState::Alpha => token_stack.push(char_stack.iter().collect()), | ||||
|         ParseState::Numeric => token_stack.push(char_stack.iter().collect()), | ||||
|         ParseState::Empty => (), | ||||
|         _ => panic!("Invalid parse state during decimal_split()"), | ||||
|     } | ||||
|  | ||||
|     token_stack | ||||
| } | ||||
|  | ||||
| @ -1,5 +1,3 @@ | ||||
| use std::cmp::max; | ||||
|  | ||||
| use ParseResult; | ||||
| use ParseError; | ||||
|  | ||||
| @ -101,6 +99,8 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> { | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Rust warns about unused imports here, but they're definitely used. | ||||
| #[allow(unused_imports)] | ||||
| mod test { | ||||
|  | ||||
|     use weekday::day_of_week; | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 bspeice
					bspeice