mirror of
https://github.com/bspeice/dtparse
synced 2024-12-22 04:18:09 -05:00
commit
fe0a0ea3a7
3
.gitignore
vendored
3
.gitignore
vendored
@ -2,4 +2,5 @@
|
|||||||
/target
|
/target
|
||||||
**/*.rs.bk
|
**/*.rs.bk
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
.vscode
|
.vscode
|
||||||
|
*.pyc
|
@ -17,8 +17,8 @@ Supported in v0.8
|
|||||||
theoretically would provide support, but I'd also like some helper things available (e.g. "EST" is not a named zone in `chrono-tz`).
|
theoretically would provide support, but I'd also like some helper things available (e.g. "EST" is not a named zone in `chrono-tz`).
|
||||||
Explicit time zones (i.e. "00:00:00 -0300") are working as expected.
|
Explicit time zones (i.e. "00:00:00 -0300") are working as expected.
|
||||||
|
|
||||||
3. "Fuzzy" and "Fuzzy with tokens" modes haven't been tested. The code should work, but I need to get the
|
3. ~~"Fuzzy" and "Fuzzy with tokens" modes haven't been tested. The code should work, but I need to get the
|
||||||
test cases added to the auto-generation suite
|
test cases added to the auto-generation suite~~
|
||||||
|
|
||||||
**Non-functional**: This library is intended to be a direct port from Python, and thus the code
|
**Non-functional**: This library is intended to be a direct port from Python, and thus the code
|
||||||
looks a lot more like Python than it does Rust. There are a ton of `TODO` comments in the code
|
looks a lot more like Python than it does Rust. There are a ton of `TODO` comments in the code
|
||||||
|
@ -80,12 +80,26 @@ tests = {
|
|||||||
'1994-11-05T08:15:30Z', '1976-07-04T00:01:02Z',
|
'1994-11-05T08:15:30Z', '1976-07-04T00:01:02Z',
|
||||||
'Tue Apr 4 00:22:12 PDT 1995'
|
'Tue Apr 4 00:22:12 PDT 1995'
|
||||||
],
|
],
|
||||||
|
'test_fuzzy_tzinfo': [
|
||||||
|
'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.'
|
||||||
|
],
|
||||||
|
'test_fuzzy_tokens_tzinfo': [
|
||||||
|
'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.'
|
||||||
|
],
|
||||||
|
'test_fuzzy_simple': [
|
||||||
|
'I have a meeting on March 1, 1974', # testFuzzyAMPMProblem
|
||||||
|
'On June 8th, 2020, I am going to be the first man on Mars', # testFuzzyAMPMProblem
|
||||||
|
'Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003', # testFuzzyAMPMProblem
|
||||||
|
'Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset', # testFuzzyAMPMProblem
|
||||||
|
'Jan 29, 1945 14:45 AM I going to see you there?', # testFuzzyIgnoreAMPM
|
||||||
|
'2017-07-17 06:15:', # test_idx_check
|
||||||
|
],
|
||||||
'test_parse_default_ignore': [
|
'test_parse_default_ignore': [
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
with open('tests/pycompat.rs', 'w+') as handle:
|
with open('src/tests/pycompat_parser.rs', 'w+') as handle:
|
||||||
handle.write(TEST_HEADER)
|
handle.write(TEST_HEADER)
|
||||||
|
|
||||||
for test_name, test_strings in tests.items():
|
for test_name, test_strings in tests.items():
|
||||||
@ -149,6 +163,30 @@ def test_parse_default_ignore(i, s):
|
|||||||
|
|
||||||
return TEST_PARSE_DEFAULT_IGNORE.format(i=i, d=d, s=s)
|
return TEST_PARSE_DEFAULT_IGNORE.format(i=i, d=d, s=s)
|
||||||
|
|
||||||
|
|
||||||
|
def test_fuzzy_tzinfo(i, s):
|
||||||
|
d = parse(s, fuzzy=True)
|
||||||
|
|
||||||
|
return TEST_FUZZY_TZINFO.format(i=i, d=d, s=s, offset=int(d.tzinfo._offset.total_seconds()))
|
||||||
|
|
||||||
|
|
||||||
|
def test_fuzzy_tokens_tzinfo(i, s):
|
||||||
|
d, tokens = parse(s, fuzzy_with_tokens=True)
|
||||||
|
|
||||||
|
r_tokens = ", ".join(list(map(lambda s: f'"{s}".to_owned()', tokens)))
|
||||||
|
|
||||||
|
return TEST_FUZZY_TOKENS_TZINFO.format(
|
||||||
|
i=i, d=d, s=s, offset=int(d.tzinfo._offset.total_seconds()),
|
||||||
|
tokens=r_tokens
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_fuzzy_simple(i, s):
|
||||||
|
d = parse(s, fuzzy=True)
|
||||||
|
|
||||||
|
return TEST_FUZZY_SIMPLE.format(i=i, d=d, s=s)
|
||||||
|
|
||||||
|
|
||||||
# Here lies all the ugly junk.
|
# Here lies all the ugly junk.
|
||||||
TEST_HEADER = '''
|
TEST_HEADER = '''
|
||||||
extern crate chrono;
|
extern crate chrono;
|
||||||
@ -159,10 +197,9 @@ use chrono::NaiveDateTime;
|
|||||||
use chrono::Timelike;
|
use chrono::Timelike;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
extern crate dtparse;
|
use Parser;
|
||||||
|
use ParserInfo;
|
||||||
use dtparse::Parser;
|
use parse;
|
||||||
use dtparse::ParserInfo;
|
|
||||||
|
|
||||||
struct PyDateTime {
|
struct PyDateTime {
|
||||||
year: i32,
|
year: i32,
|
||||||
@ -205,23 +242,59 @@ fn parse_and_assert(
|
|||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_and_assert_simple(
|
fn parse_and_assert_simple(
|
||||||
pdt: PyDateTime,
|
pdt: PyDateTime,
|
||||||
s: &str,
|
s: &str,
|
||||||
) {
|
) {
|
||||||
let rs_parsed = dtparse::parse(s).expect(&format!("Unable to parse date in Rust '{}'", s));
|
let rs_parsed = parse(s).expect(&format!("Unable to parse date in Rust '{}'", s));
|
||||||
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for {}", s);
|
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for {}", s);
|
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for {}", s);
|
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for {}", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for {}", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_fuzzy_and_assert(
|
||||||
|
pdt: PyDateTime,
|
||||||
|
ptokens: Option<Vec<String>>,
|
||||||
|
info: ParserInfo,
|
||||||
|
s: &str,
|
||||||
|
dayfirst: Option<bool>,
|
||||||
|
yearfirst: Option<bool>,
|
||||||
|
fuzzy: bool,
|
||||||
|
fuzzy_with_tokens: bool,
|
||||||
|
default: Option<&NaiveDateTime>,
|
||||||
|
ignoretz: bool,
|
||||||
|
tzinfos: HashMap<String, i32>,
|
||||||
|
) {
|
||||||
|
|
||||||
|
let mut parser = Parser::new(info);
|
||||||
|
let rs_parsed = parser.parse(
|
||||||
|
s,
|
||||||
|
dayfirst,
|
||||||
|
yearfirst,
|
||||||
|
fuzzy,
|
||||||
|
fuzzy_with_tokens,
|
||||||
|
default,
|
||||||
|
ignoretz,
|
||||||
|
tzinfos).expect(&format!("Unable to parse date in Rust '{}'", s));
|
||||||
|
|
||||||
|
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
||||||
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
|
assert_eq!(ptokens, rs_parsed.2, "Tokens mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! rs_tzinfo_map {
|
macro_rules! rs_tzinfo_map {
|
||||||
@ -364,6 +437,46 @@ fn test_parse_default_ignore{i}() {{
|
|||||||
Some(default_rsdate), false, HashMap::new());
|
Some(default_rsdate), false, HashMap::new());
|
||||||
}}\n'''
|
}}\n'''
|
||||||
|
|
||||||
|
TEST_FUZZY_TZINFO = '''
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_tzinfo{i}() {{
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {{
|
||||||
|
year: {d.year}, month: {d.month}, day: {d.day},
|
||||||
|
hour: {d.hour}, minute: {d.minute}, second: {d.second},
|
||||||
|
micros: {d.microsecond}, tzo: Some({offset})
|
||||||
|
}};
|
||||||
|
parse_fuzzy_and_assert(pdt, None, info, "{s}", None, None, true, false,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}}\n'''
|
||||||
|
|
||||||
|
TEST_FUZZY_TOKENS_TZINFO = '''
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_tokens_tzinfo{i}() {{
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {{
|
||||||
|
year: {d.year}, month: {d.month}, day: {d.day},
|
||||||
|
hour: {d.hour}, minute: {d.minute}, second: {d.second},
|
||||||
|
micros: {d.microsecond}, tzo: Some({offset})
|
||||||
|
}};
|
||||||
|
let tokens = vec![{tokens}];
|
||||||
|
parse_fuzzy_and_assert(pdt, Some(tokens), info, "{s}", None, None, true, true,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}}\n'''
|
||||||
|
|
||||||
|
TEST_FUZZY_SIMPLE = '''
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_simple{i}() {{
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {{
|
||||||
|
year: {d.year}, month: {d.month}, day: {d.day},
|
||||||
|
hour: {d.hour}, minute: {d.minute}, second: {d.second},
|
||||||
|
micros: {d.microsecond}, tzo: None
|
||||||
|
}};
|
||||||
|
parse_fuzzy_and_assert(pdt, None, info, "{s}", None, None, true, false,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}}\n'''
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
35
build_pycompat_tokenizer.py
Normal file
35
build_pycompat_tokenizer.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from dateutil.parser import _timelex
|
||||||
|
|
||||||
|
from build_pycompat import tests
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with open('src/tests/pycompat_tokenizer.rs', 'w+') as handle:
|
||||||
|
handle.write(TEST_HEADER)
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
for _, test_strings in tests.items():
|
||||||
|
for s in test_strings:
|
||||||
|
handle.write(build_test(counter, s))
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
def build_test(i, test_string):
|
||||||
|
python_tokens = list(_timelex(test_string))
|
||||||
|
formatted_tokens = 'vec!["' + '", "'.join(python_tokens) + '"]'
|
||||||
|
return f'''
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize{i}() {{
|
||||||
|
let comp = {formatted_tokens};
|
||||||
|
tokenize_assert("{test_string}", comp);
|
||||||
|
}}\n'''
|
||||||
|
|
||||||
|
|
||||||
|
TEST_HEADER = '''
|
||||||
|
use tokenize::Tokenizer;
|
||||||
|
|
||||||
|
fn tokenize_assert(test_str: &str, comparison: Vec<&str>) {
|
||||||
|
let tokens: Vec<String> = Tokenizer::new(test_str).collect();
|
||||||
|
assert_eq!(tokens, comparison, "Tokenizing mismatch for `{}`", test_str);
|
||||||
|
}\n'''
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
119
src/lib.rs
119
src/lib.rs
@ -1,6 +1,3 @@
|
|||||||
#![allow(dead_code)]
|
|
||||||
#![allow(unused)]
|
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate lazy_static;
|
extern crate lazy_static;
|
||||||
|
|
||||||
@ -8,7 +5,6 @@ extern crate chrono;
|
|||||||
extern crate num_traits;
|
extern crate num_traits;
|
||||||
extern crate rust_decimal;
|
extern crate rust_decimal;
|
||||||
|
|
||||||
use chrono::DateTime;
|
|
||||||
use chrono::Datelike;
|
use chrono::Datelike;
|
||||||
use chrono::Duration;
|
use chrono::Duration;
|
||||||
use chrono::FixedOffset;
|
use chrono::FixedOffset;
|
||||||
@ -17,7 +13,6 @@ use chrono::NaiveDate;
|
|||||||
use chrono::NaiveDateTime;
|
use chrono::NaiveDateTime;
|
||||||
use chrono::NaiveTime;
|
use chrono::NaiveTime;
|
||||||
use chrono::Timelike;
|
use chrono::Timelike;
|
||||||
use chrono::Utc;
|
|
||||||
use num_traits::cast::ToPrimitive;
|
use num_traits::cast::ToPrimitive;
|
||||||
use rust_decimal::Decimal;
|
use rust_decimal::Decimal;
|
||||||
use rust_decimal::Error as DecimalError;
|
use rust_decimal::Error as DecimalError;
|
||||||
@ -33,7 +28,6 @@ mod weekday;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
use tokenize::ParseState;
|
|
||||||
use tokenize::Tokenizer;
|
use tokenize::Tokenizer;
|
||||||
use weekday::day_of_week;
|
use weekday::day_of_week;
|
||||||
use weekday::DayOfWeek;
|
use weekday::DayOfWeek;
|
||||||
@ -59,13 +53,13 @@ pub enum ParseInternalError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl From<DecimalError> for ParseInternalError {
|
impl From<DecimalError> for ParseInternalError {
|
||||||
fn from(err: DecimalError) -> Self {
|
fn from(_err: DecimalError) -> Self {
|
||||||
ParseInternalError::InvalidDecimal
|
ParseInternalError::InvalidDecimal
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<ParseIntError> for ParseInternalError {
|
impl From<ParseIntError> for ParseInternalError {
|
||||||
fn from(err: ParseIntError) -> Self {
|
fn from(_err: ParseIntError) -> Self {
|
||||||
ParseInternalError::InvalidInteger
|
ParseInternalError::InvalidInteger
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -92,7 +86,7 @@ type ParseResult<I> = Result<I, ParseError>;
|
|||||||
type ParseIResult<I> = Result<I, ParseInternalError>;
|
type ParseIResult<I> = Result<I, ParseInternalError>;
|
||||||
|
|
||||||
pub fn tokenize(parse_string: &str) -> Vec<String> {
|
pub fn tokenize(parse_string: &str) -> Vec<String> {
|
||||||
let tokenizer = Tokenizer::new(parse_string.to_owned());
|
let tokenizer = Tokenizer::new(parse_string);
|
||||||
tokenizer.collect()
|
tokenizer.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -201,7 +195,9 @@ impl ParserInfo {
|
|||||||
|
|
||||||
fn get_ampm(&self, name: &str) -> Option<bool> {
|
fn get_ampm(&self, name: &str) -> Option<bool> {
|
||||||
if let Some(v) = self.ampm.get(&name.to_lowercase()) {
|
if let Some(v) = self.ampm.get(&name.to_lowercase()) {
|
||||||
Some(v.to_owned() == 1)
|
// Python technically uses numbers here, but given that the numbers are
|
||||||
|
// only 0 and 1, it's easier to use booleans
|
||||||
|
Some(*v == 1)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
@ -293,11 +289,6 @@ struct YMD {
|
|||||||
ystridx: Option<usize>,
|
ystridx: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum YMDAppendEither {
|
|
||||||
Number(i32),
|
|
||||||
Stringy(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl YMD {
|
impl YMD {
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
self._ymd.len()
|
self._ymd.len()
|
||||||
@ -387,7 +378,7 @@ impl YMD {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => Err(ParseInternalError::ValueError("Missing label.".to_owned())),
|
None => Ok(()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -565,7 +556,6 @@ impl Parser {
|
|||||||
|
|
||||||
let default_ts = NaiveDateTime::new(default_date, NaiveTime::from_hms(0, 0, 0));
|
let default_ts = NaiveDateTime::new(default_date, NaiveTime::from_hms(0, 0, 0));
|
||||||
|
|
||||||
// TODO: What should be done with the tokens?
|
|
||||||
let (res, tokens) =
|
let (res, tokens) =
|
||||||
self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?;
|
self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?;
|
||||||
|
|
||||||
@ -613,34 +603,34 @@ impl Parser {
|
|||||||
while i < len_l {
|
while i < len_l {
|
||||||
let value_repr = l[i].clone();
|
let value_repr = l[i].clone();
|
||||||
|
|
||||||
if let Ok(v) = Decimal::from_str(&value_repr) {
|
if let Ok(_v) = Decimal::from_str(&value_repr) {
|
||||||
i = self.parse_numeric_token(&l, i, &self.info, &mut ymd, &mut res, fuzzy)?;
|
i = self.parse_numeric_token(&l, i, &self.info, &mut ymd, &mut res, fuzzy)?;
|
||||||
} else if let Some(value) = self.info.get_weekday(&l[i]) {
|
} else if let Some(value) = self.info.get_weekday(&l[i]) {
|
||||||
res.weekday = Some(value);
|
res.weekday = Some(value);
|
||||||
} else if let Some(value) = self.info.get_month(&l[i]) {
|
} else if let Some(value) = self.info.get_month(&l[i]) {
|
||||||
ymd.append(value as i32, &l[i], Some(YMDLabel::Month));
|
ymd.append(value as i32, &l[i], Some(YMDLabel::Month))?;
|
||||||
|
|
||||||
if i + 1 < len_l {
|
if i + 1 < len_l {
|
||||||
if l[i + 1] == "-" || l[i + 1] == "/" {
|
if l[i + 1] == "-" || l[i + 1] == "/" {
|
||||||
// Jan-01[-99]
|
// Jan-01[-99]
|
||||||
let sep = &l[i + 1];
|
let sep = &l[i + 1];
|
||||||
// TODO: This seems like a very unsafe unwrap
|
// TODO: This seems like a very unsafe unwrap
|
||||||
ymd.append(l[i + 2].parse::<i32>().unwrap(), &l[i + 2], None);
|
ymd.append(l[i + 2].parse::<i32>().unwrap(), &l[i + 2], None)?;
|
||||||
|
|
||||||
if i + 3 < len_l && &l[i + 3] == sep {
|
if i + 3 < len_l && &l[i + 3] == sep {
|
||||||
// Jan-01-99
|
// Jan-01-99
|
||||||
ymd.append(l[i + 4].parse::<i32>().unwrap(), &l[i + 4], None);
|
ymd.append(l[i + 4].parse::<i32>().unwrap(), &l[i + 4], None)?;
|
||||||
i += 2;
|
i += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 2;
|
i += 2;
|
||||||
} else if (i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " "
|
} else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " "
|
||||||
&& self.info.get_pertain(&l[i + 2]))
|
&& self.info.get_pertain(&l[i + 2])
|
||||||
{
|
{
|
||||||
// Jan of 01
|
// Jan of 01
|
||||||
if let Some(value) = l[i + 4].parse::<i32>().ok() {
|
if let Some(value) = l[i + 4].parse::<i32>().ok() {
|
||||||
let year = self.info.convertyear(value, false);
|
let year = self.info.convertyear(value, false);
|
||||||
ymd.append(year, &l[i + 4], Some(YMDLabel::Year));
|
ymd.append(year, &l[i + 4], Some(YMDLabel::Year))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 4;
|
i += 4;
|
||||||
@ -649,8 +639,8 @@ impl Parser {
|
|||||||
} else if let Some(value) = self.info.get_ampm(&l[i]) {
|
} else if let Some(value) = self.info.get_ampm(&l[i]) {
|
||||||
let is_ampm = self.ampm_valid(res.hour, res.ampm, fuzzy);
|
let is_ampm = self.ampm_valid(res.hour, res.ampm, fuzzy);
|
||||||
|
|
||||||
if is_ampm.is_ok() {
|
if is_ampm == Ok(true) {
|
||||||
res.hour = Some(self.adjust_ampm(res.hour.unwrap(), value));
|
res.hour = res.hour.map(|h| self.adjust_ampm(h, value));
|
||||||
res.ampm = Some(value);
|
res.ampm = Some(value);
|
||||||
} else if fuzzy {
|
} else if fuzzy {
|
||||||
skipped_idxs.push(i);
|
skipped_idxs.push(i);
|
||||||
@ -718,7 +708,7 @@ impl Parser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
i += 1;
|
i += 1;
|
||||||
} else if !self.info.get_jump(&l[i]) || fuzzy {
|
} else if !(self.info.get_jump(&l[i]) || fuzzy) {
|
||||||
return Err(ParseError::UnrecognizedToken(l[i].clone()));
|
return Err(ParseError::UnrecognizedToken(l[i].clone()));
|
||||||
} else {
|
} else {
|
||||||
skipped_idxs.push(i);
|
skipped_idxs.push(i);
|
||||||
@ -737,7 +727,7 @@ impl Parser {
|
|||||||
if !self.info.validate(&mut res) {
|
if !self.info.validate(&mut res) {
|
||||||
Err(ParseError::InvalidParseResult(res))
|
Err(ParseError::InvalidParseResult(res))
|
||||||
} else if fuzzy_with_tokens {
|
} else if fuzzy_with_tokens {
|
||||||
let skipped_tokens = skipped_idxs.into_iter().map(|i| l[i].clone()).collect();
|
let skipped_tokens = self.recombine_skipped(skipped_idxs, l);
|
||||||
Ok((res, Some(skipped_tokens)))
|
Ok((res, Some(skipped_tokens)))
|
||||||
} else {
|
} else {
|
||||||
Ok((res, None))
|
Ok((res, None))
|
||||||
@ -759,25 +749,27 @@ impl Parser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> {
|
fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> {
|
||||||
if fuzzy && ampm == Some(true) {
|
let mut val_is_ampm = true;
|
||||||
return Ok(false);
|
|
||||||
|
if fuzzy && ampm.is_some() {
|
||||||
|
val_is_ampm = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if hour.is_none() {
|
if hour.is_none() {
|
||||||
if fuzzy {
|
if fuzzy {
|
||||||
Ok(false)
|
val_is_ampm = false;
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::AmPmWithoutHour)
|
return Err(ParseError::AmPmWithoutHour);
|
||||||
}
|
}
|
||||||
} else if !(0 <= hour.unwrap() && hour.unwrap() <= 12) {
|
} else if !(0 <= hour.unwrap() && hour.unwrap() <= 12) {
|
||||||
if fuzzy {
|
if fuzzy {
|
||||||
Ok(false)
|
val_is_ampm = false;
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::ImpossibleTimestamp("Invalid hour"))
|
return Err(ParseError::ImpossibleTimestamp("Invalid hour"));
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
Ok(false)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(val_is_ampm)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_naive(&self, res: &ParsingResult, default: &NaiveDateTime) -> ParseResult<NaiveDateTime> {
|
fn build_naive(&self, res: &ParsingResult, default: &NaiveDateTime) -> ParseResult<NaiveDateTime> {
|
||||||
@ -797,7 +789,7 @@ impl Parser {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// TODO: Change month/day to u32
|
// TODO: Change month/day to u32
|
||||||
let mut d = NaiveDate::from_ymd(
|
let d = NaiveDate::from_ymd(
|
||||||
y,
|
y,
|
||||||
m,
|
m,
|
||||||
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?)
|
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?)
|
||||||
@ -829,7 +821,7 @@ impl Parser {
|
|||||||
|
|
||||||
fn build_tzaware(
|
fn build_tzaware(
|
||||||
&self,
|
&self,
|
||||||
dt: &NaiveDateTime,
|
_dt: &NaiveDateTime,
|
||||||
res: &ParsingResult,
|
res: &ParsingResult,
|
||||||
tzinfos: HashMap<String, i32>,
|
tzinfos: HashMap<String, i32>,
|
||||||
) -> ParseResult<Option<FixedOffset>> {
|
) -> ParseResult<Option<FixedOffset>> {
|
||||||
@ -888,9 +880,9 @@ impl Parser {
|
|||||||
let s = &tokens[idx];
|
let s = &tokens[idx];
|
||||||
|
|
||||||
if ymd.len() == 0 && tokens[idx].find(".") == None {
|
if ymd.len() == 0 && tokens[idx].find(".") == None {
|
||||||
ymd.append(s[0..2].parse::<i32>().unwrap(), &s[0..2], None);
|
ymd.append(s[0..2].parse::<i32>().unwrap(), &s[0..2], None)?;
|
||||||
ymd.append(s[2..4].parse::<i32>().unwrap(), &s[2..4], None);
|
ymd.append(s[2..4].parse::<i32>().unwrap(), &s[2..4], None)?;
|
||||||
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None);
|
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
|
||||||
} else {
|
} else {
|
||||||
// 19990101T235959[.59]
|
// 19990101T235959[.59]
|
||||||
res.hour = s[0..2].parse::<i32>().ok();
|
res.hour = s[0..2].parse::<i32>().ok();
|
||||||
@ -903,9 +895,9 @@ impl Parser {
|
|||||||
} else if vec![8, 12, 14].contains(&len_li) {
|
} else if vec![8, 12, 14].contains(&len_li) {
|
||||||
// YYMMDD
|
// YYMMDD
|
||||||
let s = &tokens[idx];
|
let s = &tokens[idx];
|
||||||
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year));
|
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?;
|
||||||
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None);
|
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
|
||||||
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None);
|
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?;
|
||||||
|
|
||||||
if len_li > 8 {
|
if len_li > 8 {
|
||||||
res.hour = Some(s[8..10].parse::<i32>()?);
|
res.hour = Some(s[8..10].parse::<i32>()?);
|
||||||
@ -947,20 +939,20 @@ impl Parser {
|
|||||||
{
|
{
|
||||||
// TODO: There's got to be a better way of handling the condition above
|
// TODO: There's got to be a better way of handling the condition above
|
||||||
let sep = &tokens[idx + 1];
|
let sep = &tokens[idx + 1];
|
||||||
ymd.append(value_repr.parse::<i32>().unwrap(), &value_repr, None);
|
ymd.append(value_repr.parse::<i32>().unwrap(), &value_repr, None)?;
|
||||||
|
|
||||||
if idx + 2 < len_l && !info.get_jump(&tokens[idx + 2]) {
|
if idx + 2 < len_l && !info.get_jump(&tokens[idx + 2]) {
|
||||||
if let Ok(val) = tokens[idx + 2].parse::<i32>() {
|
if let Ok(val) = tokens[idx + 2].parse::<i32>() {
|
||||||
ymd.append(val, &tokens[idx + 2], None);
|
ymd.append(val, &tokens[idx + 2], None)?;
|
||||||
} else if let Some(val) = info.get_month(&tokens[idx + 2]) {
|
} else if let Some(val) = info.get_month(&tokens[idx + 2]) {
|
||||||
ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month));
|
ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if idx + 3 < len_l && &tokens[idx + 3] == sep {
|
if idx + 3 < len_l && &tokens[idx + 3] == sep {
|
||||||
if let Some(value) = info.get_month(&tokens[idx + 4]) {
|
if let Some(value) = info.get_month(&tokens[idx + 4]) {
|
||||||
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month));
|
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
|
||||||
} else {
|
} else {
|
||||||
ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None);
|
ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
idx += 2;
|
idx += 2;
|
||||||
@ -975,9 +967,12 @@ impl Parser {
|
|||||||
let hour = value.to_i64().unwrap() as i32;
|
let hour = value.to_i64().unwrap() as i32;
|
||||||
let ampm = info.get_ampm(&tokens[idx + 2]).unwrap();
|
let ampm = info.get_ampm(&tokens[idx + 2]).unwrap();
|
||||||
res.hour = Some(self.adjust_ampm(hour, ampm));
|
res.hour = Some(self.adjust_ampm(hour, ampm));
|
||||||
|
idx += 1;
|
||||||
} else {
|
} else {
|
||||||
ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None);
|
ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
idx += 1;
|
||||||
} else if info.get_ampm(&tokens[idx + 1]).is_some()
|
} else if info.get_ampm(&tokens[idx + 1]).is_some()
|
||||||
&& (*ZERO <= value && value < *TWENTY_FOUR)
|
&& (*ZERO <= value && value < *TWENTY_FOUR)
|
||||||
{
|
{
|
||||||
@ -986,7 +981,7 @@ impl Parser {
|
|||||||
res.hour = Some(self.adjust_ampm(hour, info.get_ampm(&tokens[idx + 1]).unwrap()));
|
res.hour = Some(self.adjust_ampm(hour, info.get_ampm(&tokens[idx + 1]).unwrap()));
|
||||||
idx += 1;
|
idx += 1;
|
||||||
} else if ymd.could_be_day(value.to_i64().unwrap() as i32) {
|
} else if ymd.could_be_day(value.to_i64().unwrap() as i32) {
|
||||||
ymd.append(value.to_i64().unwrap() as i32, &value_repr, None);
|
ymd.append(value.to_i64().unwrap() as i32, &value_repr, None)?;
|
||||||
} else if !fuzzy {
|
} else if !fuzzy {
|
||||||
return Err(ParseInternalError::ValueError("".to_owned()));
|
return Err(ParseInternalError::ValueError("".to_owned()));
|
||||||
}
|
}
|
||||||
@ -1087,7 +1082,6 @@ impl Parser {
|
|||||||
if hms == 0 {
|
if hms == 0 {
|
||||||
res.hour = Some(value.to_i64().unwrap() as i32);
|
res.hour = Some(value.to_i64().unwrap() as i32);
|
||||||
if !close_to_integer(&value) {
|
if !close_to_integer(&value) {
|
||||||
// TODO: High probability of issues with rounding here.
|
|
||||||
res.minute = Some((*SIXTY * (value % *ONE)).to_i64().unwrap() as i32);
|
res.minute = Some((*SIXTY * (value % *ONE)).to_i64().unwrap() as i32);
|
||||||
}
|
}
|
||||||
} else if hms == 1 {
|
} else if hms == 1 {
|
||||||
@ -1107,6 +1101,7 @@ impl Parser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>) {
|
fn parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>) {
|
||||||
|
// UNWRAP: i64 guaranteed to be fine because of preceding floor
|
||||||
let minute = value.floor().to_i64().unwrap() as i32;
|
let minute = value.floor().to_i64().unwrap() as i32;
|
||||||
let mut second = None;
|
let mut second = None;
|
||||||
|
|
||||||
@ -1117,6 +1112,26 @@ impl Parser {
|
|||||||
|
|
||||||
(minute, second)
|
(minute, second)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn recombine_skipped(&self, skipped_idxs: Vec<usize>, tokens: Vec<String>) -> Vec<String> {
|
||||||
|
let mut skipped_tokens: Vec<String> = vec![];
|
||||||
|
|
||||||
|
let mut sorted_idxs = skipped_idxs.clone();
|
||||||
|
sorted_idxs.sort();
|
||||||
|
|
||||||
|
for (i, idx) in sorted_idxs.iter().enumerate() {
|
||||||
|
if i > 0 && idx - 1 == skipped_idxs[i - 1] {
|
||||||
|
// UNWRAP: Having an initial value and unconditional push at end guarantees value
|
||||||
|
let mut t = skipped_tokens.pop().unwrap();
|
||||||
|
t.push_str(tokens[idx.clone()].as_ref());
|
||||||
|
skipped_tokens.push(t);
|
||||||
|
} else {
|
||||||
|
skipped_tokens.push(tokens[idx.clone()].to_owned());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
skipped_tokens
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn close_to_integer(value: &Decimal) -> bool {
|
fn close_to_integer(value: &Decimal) -> bool {
|
||||||
|
3
src/tests/mod.rs
Normal file
3
src/tests/mod.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
mod fuzzing;
|
||||||
|
mod pycompat_parser;
|
||||||
|
mod pycompat_tokenizer;
|
@ -7,10 +7,9 @@ use chrono::NaiveDateTime;
|
|||||||
use chrono::Timelike;
|
use chrono::Timelike;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
extern crate dtparse;
|
use Parser;
|
||||||
|
use ParserInfo;
|
||||||
use dtparse::Parser;
|
use parse;
|
||||||
use dtparse::ParserInfo;
|
|
||||||
|
|
||||||
struct PyDateTime {
|
struct PyDateTime {
|
||||||
year: i32,
|
year: i32,
|
||||||
@ -53,23 +52,59 @@ fn parse_and_assert(
|
|||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_and_assert_simple(
|
fn parse_and_assert_simple(
|
||||||
pdt: PyDateTime,
|
pdt: PyDateTime,
|
||||||
s: &str,
|
s: &str,
|
||||||
) {
|
) {
|
||||||
let rs_parsed = dtparse::parse(s).expect(&format!("Unable to parse date in Rust '{}'", s));
|
let rs_parsed = parse(s).expect(&format!("Unable to parse date in Rust '{}'", s));
|
||||||
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for {}", s);
|
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for {}", s);
|
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for {}", s);
|
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for {}", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for {}", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_fuzzy_and_assert(
|
||||||
|
pdt: PyDateTime,
|
||||||
|
ptokens: Option<Vec<String>>,
|
||||||
|
info: ParserInfo,
|
||||||
|
s: &str,
|
||||||
|
dayfirst: Option<bool>,
|
||||||
|
yearfirst: Option<bool>,
|
||||||
|
fuzzy: bool,
|
||||||
|
fuzzy_with_tokens: bool,
|
||||||
|
default: Option<&NaiveDateTime>,
|
||||||
|
ignoretz: bool,
|
||||||
|
tzinfos: HashMap<String, i32>,
|
||||||
|
) {
|
||||||
|
|
||||||
|
let mut parser = Parser::new(info);
|
||||||
|
let rs_parsed = parser.parse(
|
||||||
|
s,
|
||||||
|
dayfirst,
|
||||||
|
yearfirst,
|
||||||
|
fuzzy,
|
||||||
|
fuzzy_with_tokens,
|
||||||
|
default,
|
||||||
|
ignoretz,
|
||||||
|
tzinfos).expect(&format!("Unable to parse date in Rust '{}'", s));
|
||||||
|
|
||||||
|
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
||||||
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
|
assert_eq!(ptokens, rs_parsed.2, "Tokens mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! rs_tzinfo_map {
|
macro_rules! rs_tzinfo_map {
|
||||||
@ -1697,3 +1732,100 @@ fn test_parse_ignoretz7() {
|
|||||||
parse_and_assert(pdt, info, "Tue Apr 4 00:22:12 PDT 1995", None, None, false, false,
|
parse_and_assert(pdt, info, "Tue Apr 4 00:22:12 PDT 1995", None, None, false, false,
|
||||||
None, true, HashMap::new());
|
None, true, HashMap::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_tzinfo0() {
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {
|
||||||
|
year: 2003, month: 9, day: 25,
|
||||||
|
hour: 10, minute: 49, second: 41,
|
||||||
|
micros: 0, tzo: Some(-10800)
|
||||||
|
};
|
||||||
|
parse_fuzzy_and_assert(pdt, None, info, "Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", None, None, true, false,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_tokens_tzinfo0() {
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {
|
||||||
|
year: 2003, month: 9, day: 25,
|
||||||
|
hour: 10, minute: 49, second: 41,
|
||||||
|
micros: 0, tzo: Some(-10800)
|
||||||
|
};
|
||||||
|
let tokens = vec!["Today is ".to_owned(), "of ".to_owned(), ", exactly at ".to_owned(), " with timezone ".to_owned(), ".".to_owned()];
|
||||||
|
parse_fuzzy_and_assert(pdt, Some(tokens), info, "Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", None, None, true, true,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_simple0() {
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {
|
||||||
|
year: 1974, month: 3, day: 1,
|
||||||
|
hour: 0, minute: 0, second: 0,
|
||||||
|
micros: 0, tzo: None
|
||||||
|
};
|
||||||
|
parse_fuzzy_and_assert(pdt, None, info, "I have a meeting on March 1, 1974", None, None, true, false,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_simple1() {
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {
|
||||||
|
year: 2020, month: 6, day: 8,
|
||||||
|
hour: 0, minute: 0, second: 0,
|
||||||
|
micros: 0, tzo: None
|
||||||
|
};
|
||||||
|
parse_fuzzy_and_assert(pdt, None, info, "On June 8th, 2020, I am going to be the first man on Mars", None, None, true, false,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_simple2() {
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {
|
||||||
|
year: 2003, month: 12, day: 3,
|
||||||
|
hour: 3, minute: 0, second: 0,
|
||||||
|
micros: 0, tzo: None
|
||||||
|
};
|
||||||
|
parse_fuzzy_and_assert(pdt, None, info, "Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", None, None, true, false,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_simple3() {
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {
|
||||||
|
year: 2003, month: 12, day: 3,
|
||||||
|
hour: 3, minute: 0, second: 0,
|
||||||
|
micros: 0, tzo: None
|
||||||
|
};
|
||||||
|
parse_fuzzy_and_assert(pdt, None, info, "Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", None, None, true, false,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_simple4() {
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {
|
||||||
|
year: 1945, month: 1, day: 29,
|
||||||
|
hour: 14, minute: 45, second: 0,
|
||||||
|
micros: 0, tzo: None
|
||||||
|
};
|
||||||
|
parse_fuzzy_and_assert(pdt, None, info, "Jan 29, 1945 14:45 AM I going to see you there?", None, None, true, false,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_simple5() {
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {
|
||||||
|
year: 2017, month: 7, day: 17,
|
||||||
|
hour: 6, minute: 15, second: 0,
|
||||||
|
micros: 0, tzo: None
|
||||||
|
};
|
||||||
|
parse_fuzzy_and_assert(pdt, None, info, "2017-07-17 06:15:", None, None, true, false,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}
|
901
src/tests/pycompat_tokenizer.rs
Normal file
901
src/tests/pycompat_tokenizer.rs
Normal file
@ -0,0 +1,901 @@
|
|||||||
|
|
||||||
|
use tokenize::Tokenizer;
|
||||||
|
|
||||||
|
fn tokenize_assert(test_str: &str, comparison: Vec<&str>) {
|
||||||
|
let tokens: Vec<String> = Tokenizer::new(test_str).collect();
|
||||||
|
assert_eq!(tokens, comparison, "Tokenizing mismatch for `{}`", test_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize0() {
|
||||||
|
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28"];
|
||||||
|
tokenize_assert("Thu Sep 25 10:36:28", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize1() {
|
||||||
|
let comp = vec!["Sep", " ", "10", ":", "36", ":", "28"];
|
||||||
|
tokenize_assert("Sep 10:36:28", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize2() {
|
||||||
|
let comp = vec!["10", ":", "36", ":", "28"];
|
||||||
|
tokenize_assert("10:36:28", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize3() {
|
||||||
|
let comp = vec!["10", ":", "36"];
|
||||||
|
tokenize_assert("10:36", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize4() {
|
||||||
|
let comp = vec!["Sep", " ", "2003"];
|
||||||
|
tokenize_assert("Sep 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize5() {
|
||||||
|
let comp = vec!["Sep"];
|
||||||
|
tokenize_assert("Sep", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize6() {
|
||||||
|
let comp = vec!["2003"];
|
||||||
|
tokenize_assert("2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize7() {
|
||||||
|
let comp = vec!["10", "h", "36", "m", "28.5", "s"];
|
||||||
|
tokenize_assert("10h36m28.5s", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize8() {
|
||||||
|
let comp = vec!["10", "h", "36", "m", "28", "s"];
|
||||||
|
tokenize_assert("10h36m28s", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize9() {
|
||||||
|
let comp = vec!["10", "h", "36", "m"];
|
||||||
|
tokenize_assert("10h36m", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize10() {
|
||||||
|
let comp = vec!["10", "h"];
|
||||||
|
tokenize_assert("10h", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize11() {
|
||||||
|
let comp = vec!["10", " ", "h", " ", "36"];
|
||||||
|
tokenize_assert("10 h 36", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize12() {
|
||||||
|
let comp = vec!["10", " ", "h", " ", "36.5"];
|
||||||
|
tokenize_assert("10 h 36.5", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize13() {
|
||||||
|
let comp = vec!["36", " ", "m", " ", "5"];
|
||||||
|
tokenize_assert("36 m 5", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize14() {
|
||||||
|
let comp = vec!["36", " ", "m", " ", "5", " ", "s"];
|
||||||
|
tokenize_assert("36 m 5 s", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize15() {
|
||||||
|
let comp = vec!["36", " ", "m", " ", "05"];
|
||||||
|
tokenize_assert("36 m 05", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize16() {
|
||||||
|
let comp = vec!["36", " ", "m", " ", "05", " ", "s"];
|
||||||
|
tokenize_assert("36 m 05 s", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize17() {
|
||||||
|
let comp = vec!["10", "h", " ", "am"];
|
||||||
|
tokenize_assert("10h am", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize18() {
|
||||||
|
let comp = vec!["10", "h", " ", "pm"];
|
||||||
|
tokenize_assert("10h pm", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize19() {
|
||||||
|
let comp = vec!["10", "am"];
|
||||||
|
tokenize_assert("10am", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize20() {
|
||||||
|
let comp = vec!["10", "pm"];
|
||||||
|
tokenize_assert("10pm", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize21() {
|
||||||
|
let comp = vec!["10", ":", "00", " ", "am"];
|
||||||
|
tokenize_assert("10:00 am", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize22() {
|
||||||
|
let comp = vec!["10", ":", "00", " ", "pm"];
|
||||||
|
tokenize_assert("10:00 pm", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize23() {
|
||||||
|
let comp = vec!["10", ":", "00", "am"];
|
||||||
|
tokenize_assert("10:00am", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize24() {
|
||||||
|
let comp = vec!["10", ":", "00", "pm"];
|
||||||
|
tokenize_assert("10:00pm", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize25() {
|
||||||
|
let comp = vec!["10", ":", "00", "a", ".", "m"];
|
||||||
|
tokenize_assert("10:00a.m", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize26() {
|
||||||
|
let comp = vec!["10", ":", "00", "p", ".", "m"];
|
||||||
|
tokenize_assert("10:00p.m", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize27() {
|
||||||
|
let comp = vec!["10", ":", "00", "a", ".", "m", "."];
|
||||||
|
tokenize_assert("10:00a.m.", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize28() {
|
||||||
|
let comp = vec!["10", ":", "00", "p", ".", "m", "."];
|
||||||
|
tokenize_assert("10:00p.m.", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize29() {
|
||||||
|
let comp = vec!["October"];
|
||||||
|
tokenize_assert("October", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize30() {
|
||||||
|
let comp = vec!["31", "-", "Dec", "-", "00"];
|
||||||
|
tokenize_assert("31-Dec-00", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize31() {
|
||||||
|
let comp = vec!["0", ":", "01", ":", "02"];
|
||||||
|
tokenize_assert("0:01:02", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize32() {
|
||||||
|
let comp = vec!["12", "h", " ", "01", "m", "02", "s", " ", "am"];
|
||||||
|
tokenize_assert("12h 01m02s am", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize33() {
|
||||||
|
let comp = vec!["12", ":", "08", " ", "PM"];
|
||||||
|
tokenize_assert("12:08 PM", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize34() {
|
||||||
|
let comp = vec!["01", "h", "02", "m", "03"];
|
||||||
|
tokenize_assert("01h02m03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize35() {
|
||||||
|
let comp = vec!["01", "h", "02"];
|
||||||
|
tokenize_assert("01h02", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize36() {
|
||||||
|
let comp = vec!["01", "h", "02", "s"];
|
||||||
|
tokenize_assert("01h02s", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize37() {
|
||||||
|
let comp = vec!["01", "m", "02"];
|
||||||
|
tokenize_assert("01m02", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize38() {
|
||||||
|
let comp = vec!["01", "m", "02", "h"];
|
||||||
|
tokenize_assert("01m02h", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize39() {
|
||||||
|
let comp = vec!["2004", " ", "10", " ", "Apr", " ", "11", "h", "30", "m"];
|
||||||
|
tokenize_assert("2004 10 Apr 11h30m", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize40() {
|
||||||
|
let comp = vec!["Sep", " ", "03"];
|
||||||
|
tokenize_assert("Sep 03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize41() {
|
||||||
|
let comp = vec!["Sep", " ", "of", " ", "03"];
|
||||||
|
tokenize_assert("Sep of 03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize42() {
|
||||||
|
let comp = vec!["02", ":", "17", "NOV", "2017"];
|
||||||
|
tokenize_assert("02:17NOV2017", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize43() {
|
||||||
|
let comp = vec!["Thu", " ", "Sep", " ", "10", ":", "36", ":", "28"];
|
||||||
|
tokenize_assert("Thu Sep 10:36:28", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize44() {
|
||||||
|
let comp = vec!["Thu", " ", "10", ":", "36", ":", "28"];
|
||||||
|
tokenize_assert("Thu 10:36:28", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize45() {
|
||||||
|
let comp = vec!["Wed"];
|
||||||
|
tokenize_assert("Wed", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize46() {
|
||||||
|
let comp = vec!["Wednesday"];
|
||||||
|
tokenize_assert("Wednesday", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize47() {
|
||||||
|
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003"];
|
||||||
|
tokenize_assert("Thu Sep 25 10:36:28 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize48() {
|
||||||
|
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "2003"];
|
||||||
|
tokenize_assert("Thu Sep 25 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize49() {
|
||||||
|
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41"];
|
||||||
|
tokenize_assert("2003-09-25T10:49:41", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize50() {
|
||||||
|
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49"];
|
||||||
|
tokenize_assert("2003-09-25T10:49", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize51() {
|
||||||
|
let comp = vec!["2003", "-", "09", "-", "25", "T", "10"];
|
||||||
|
tokenize_assert("2003-09-25T10", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize52() {
|
||||||
|
let comp = vec!["2003", "-", "09", "-", "25"];
|
||||||
|
tokenize_assert("2003-09-25", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize53() {
|
||||||
|
let comp = vec!["20030925", "T", "104941"];
|
||||||
|
tokenize_assert("20030925T104941", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize54() {
|
||||||
|
let comp = vec!["20030925", "T", "1049"];
|
||||||
|
tokenize_assert("20030925T1049", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize55() {
|
||||||
|
let comp = vec!["20030925", "T", "10"];
|
||||||
|
tokenize_assert("20030925T10", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize56() {
|
||||||
|
let comp = vec!["20030925"];
|
||||||
|
tokenize_assert("20030925", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize57() {
|
||||||
|
let comp = vec!["2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502"];
|
||||||
|
tokenize_assert("2003-09-25 10:49:41,502", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize58() {
|
||||||
|
let comp = vec!["199709020908"];
|
||||||
|
tokenize_assert("199709020908", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize59() {
|
||||||
|
let comp = vec!["19970902090807"];
|
||||||
|
tokenize_assert("19970902090807", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize60() {
|
||||||
|
let comp = vec!["2003", "-", "09", "-", "25"];
|
||||||
|
tokenize_assert("2003-09-25", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize61() {
|
||||||
|
let comp = vec!["09", "-", "25", "-", "2003"];
|
||||||
|
tokenize_assert("09-25-2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize62() {
|
||||||
|
let comp = vec!["25", "-", "09", "-", "2003"];
|
||||||
|
tokenize_assert("25-09-2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize63() {
|
||||||
|
let comp = vec!["10", "-", "09", "-", "2003"];
|
||||||
|
tokenize_assert("10-09-2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize64() {
|
||||||
|
let comp = vec!["10", "-", "09", "-", "03"];
|
||||||
|
tokenize_assert("10-09-03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize65() {
|
||||||
|
let comp = vec!["2003", ".", "09", ".", "25"];
|
||||||
|
tokenize_assert("2003.09.25", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize66() {
|
||||||
|
let comp = vec!["09", ".", "25", ".", "2003"];
|
||||||
|
tokenize_assert("09.25.2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize67() {
|
||||||
|
let comp = vec!["25", ".", "09", ".", "2003"];
|
||||||
|
tokenize_assert("25.09.2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize68() {
|
||||||
|
let comp = vec!["10", ".", "09", ".", "2003"];
|
||||||
|
tokenize_assert("10.09.2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize69() {
|
||||||
|
let comp = vec!["10", ".", "09", ".", "03"];
|
||||||
|
tokenize_assert("10.09.03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize70() {
|
||||||
|
let comp = vec!["2003", "/", "09", "/", "25"];
|
||||||
|
tokenize_assert("2003/09/25", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize71() {
|
||||||
|
let comp = vec!["09", "/", "25", "/", "2003"];
|
||||||
|
tokenize_assert("09/25/2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize72() {
|
||||||
|
let comp = vec!["25", "/", "09", "/", "2003"];
|
||||||
|
tokenize_assert("25/09/2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize73() {
|
||||||
|
let comp = vec!["10", "/", "09", "/", "2003"];
|
||||||
|
tokenize_assert("10/09/2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize74() {
|
||||||
|
let comp = vec!["10", "/", "09", "/", "03"];
|
||||||
|
tokenize_assert("10/09/03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize75() {
|
||||||
|
let comp = vec!["2003", " ", "09", " ", "25"];
|
||||||
|
tokenize_assert("2003 09 25", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize76() {
|
||||||
|
let comp = vec!["09", " ", "25", " ", "2003"];
|
||||||
|
tokenize_assert("09 25 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize77() {
|
||||||
|
let comp = vec!["25", " ", "09", " ", "2003"];
|
||||||
|
tokenize_assert("25 09 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize78() {
|
||||||
|
let comp = vec!["10", " ", "09", " ", "2003"];
|
||||||
|
tokenize_assert("10 09 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize79() {
|
||||||
|
let comp = vec!["10", " ", "09", " ", "03"];
|
||||||
|
tokenize_assert("10 09 03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize80() {
|
||||||
|
let comp = vec!["25", " ", "09", " ", "03"];
|
||||||
|
tokenize_assert("25 09 03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize81() {
|
||||||
|
let comp = vec!["03", " ", "25", " ", "Sep"];
|
||||||
|
tokenize_assert("03 25 Sep", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize82() {
|
||||||
|
let comp = vec!["25", " ", "03", " ", "Sep"];
|
||||||
|
tokenize_assert("25 03 Sep", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize83() {
|
||||||
|
let comp = vec![" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":", "01", ":", "02", " ", " ", " ", "am", " ", " "];
|
||||||
|
tokenize_assert(" July 4 , 1976 12:01:02 am ", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize84() {
|
||||||
|
let comp = vec!["Wed", ",", " ", "July", " ", "10", ",", " ", "'", "96"];
|
||||||
|
tokenize_assert("Wed, July 10, '96", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize85() {
|
||||||
|
let comp = vec!["1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM"];
|
||||||
|
tokenize_assert("1996.July.10 AD 12:08 PM", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize86() {
|
||||||
|
let comp = vec!["July", " ", "4", ",", " ", "1976"];
|
||||||
|
tokenize_assert("July 4, 1976", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize87() {
|
||||||
|
let comp = vec!["7", " ", "4", " ", "1976"];
|
||||||
|
tokenize_assert("7 4 1976", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize88() {
|
||||||
|
let comp = vec!["4", " ", "jul", " ", "1976"];
|
||||||
|
tokenize_assert("4 jul 1976", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize89() {
|
||||||
|
let comp = vec!["7", "-", "4", "-", "76"];
|
||||||
|
tokenize_assert("7-4-76", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize90() {
|
||||||
|
let comp = vec!["19760704"];
|
||||||
|
tokenize_assert("19760704", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize91() {
|
||||||
|
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"];
|
||||||
|
tokenize_assert("0:01:02 on July 4, 1976", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize92() {
|
||||||
|
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"];
|
||||||
|
tokenize_assert("0:01:02 on July 4, 1976", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize93() {
|
||||||
|
let comp = vec!["July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am"];
|
||||||
|
tokenize_assert("July 4, 1976 12:01:02 am", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize94() {
|
||||||
|
let comp = vec!["Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995"];
|
||||||
|
tokenize_assert("Mon Jan 2 04:24:27 1995", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize95() {
|
||||||
|
let comp = vec!["04", ".", "04", ".", "95", " ", "00", ":", "22"];
|
||||||
|
tokenize_assert("04.04.95 00:22", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize96() {
|
||||||
|
let comp = vec!["Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578"];
|
||||||
|
tokenize_assert("Jan 1 1999 11:23:34.578", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize97() {
|
||||||
|
let comp = vec!["950404", " ", "122212"];
|
||||||
|
tokenize_assert("950404 122212", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize98() {
|
||||||
|
let comp = vec!["3", "rd", " ", "of", " ", "May", " ", "2001"];
|
||||||
|
tokenize_assert("3rd of May 2001", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize99() {
|
||||||
|
let comp = vec!["5", "th", " ", "of", " ", "March", " ", "2001"];
|
||||||
|
tokenize_assert("5th of March 2001", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize100() {
|
||||||
|
let comp = vec!["1", "st", " ", "of", " ", "May", " ", "2003"];
|
||||||
|
tokenize_assert("1st of May 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize101() {
|
||||||
|
let comp = vec!["0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"];
|
||||||
|
tokenize_assert("0099-01-01T00:00:00", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize102() {
|
||||||
|
let comp = vec!["0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"];
|
||||||
|
tokenize_assert("0031-01-01T00:00:00", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize103() {
|
||||||
|
let comp = vec!["20080227", "T", "21", ":", "26", ":", "01.123456789"];
|
||||||
|
tokenize_assert("20080227T21:26:01.123456789", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize104() {
|
||||||
|
let comp = vec!["13", "NOV", "2017"];
|
||||||
|
tokenize_assert("13NOV2017", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize105() {
|
||||||
|
let comp = vec!["0003", "-", "03", "-", "04"];
|
||||||
|
tokenize_assert("0003-03-04", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize106() {
|
||||||
|
let comp = vec!["December", ".", "0031", ".", "30"];
|
||||||
|
tokenize_assert("December.0031.30", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize107() {
|
||||||
|
let comp = vec!["090107"];
|
||||||
|
tokenize_assert("090107", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize108() {
|
||||||
|
let comp = vec!["2015", "-", "15", "-", "May"];
|
||||||
|
tokenize_assert("2015-15-May", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize109() {
|
||||||
|
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"];
|
||||||
|
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize110() {
|
||||||
|
let comp = vec!["2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu"];
|
||||||
|
tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize111() {
|
||||||
|
let comp = vec!["Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-", "0300"];
|
||||||
|
tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize112() {
|
||||||
|
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00"];
|
||||||
|
tokenize_assert("2003-09-25T10:49:41.5-03:00", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize113() {
|
||||||
|
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00"];
|
||||||
|
tokenize_assert("2003-09-25T10:49:41-03:00", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize114() {
|
||||||
|
let comp = vec!["20030925", "T", "104941.5", "-", "0300"];
|
||||||
|
tokenize_assert("20030925T104941.5-0300", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize115() {
|
||||||
|
let comp = vec!["20030925", "T", "104941", "-", "0300"];
|
||||||
|
tokenize_assert("20030925T104941-0300", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize116() {
|
||||||
|
let comp = vec!["10", "-", "09", "-", "2003"];
|
||||||
|
tokenize_assert("10-09-2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize117() {
|
||||||
|
let comp = vec!["10", ".", "09", ".", "2003"];
|
||||||
|
tokenize_assert("10.09.2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize118() {
|
||||||
|
let comp = vec!["10", "/", "09", "/", "2003"];
|
||||||
|
tokenize_assert("10/09/2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize119() {
|
||||||
|
let comp = vec!["10", " ", "09", " ", "2003"];
|
||||||
|
tokenize_assert("10 09 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize120() {
|
||||||
|
let comp = vec!["090107"];
|
||||||
|
tokenize_assert("090107", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize121() {
|
||||||
|
let comp = vec!["2015", " ", "09", " ", "25"];
|
||||||
|
tokenize_assert("2015 09 25", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize122() {
|
||||||
|
let comp = vec!["10", "-", "09", "-", "03"];
|
||||||
|
tokenize_assert("10-09-03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize123() {
|
||||||
|
let comp = vec!["10", ".", "09", ".", "03"];
|
||||||
|
tokenize_assert("10.09.03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize124() {
|
||||||
|
let comp = vec!["10", "/", "09", "/", "03"];
|
||||||
|
tokenize_assert("10/09/03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize125() {
|
||||||
|
let comp = vec!["10", " ", "09", " ", "03"];
|
||||||
|
tokenize_assert("10 09 03", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize126() {
|
||||||
|
let comp = vec!["090107"];
|
||||||
|
tokenize_assert("090107", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize127() {
|
||||||
|
let comp = vec!["2015", " ", "09", " ", "25"];
|
||||||
|
tokenize_assert("2015 09 25", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize128() {
|
||||||
|
let comp = vec!["090107"];
|
||||||
|
tokenize_assert("090107", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize129() {
|
||||||
|
let comp = vec!["2015", " ", "09", " ", "25"];
|
||||||
|
tokenize_assert("2015 09 25", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize130() {
|
||||||
|
let comp = vec!["April", " ", "2009"];
|
||||||
|
tokenize_assert("April 2009", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize131() {
|
||||||
|
let comp = vec!["Feb", " ", "2007"];
|
||||||
|
tokenize_assert("Feb 2007", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize132() {
|
||||||
|
let comp = vec!["Feb", " ", "2008"];
|
||||||
|
tokenize_assert("Feb 2008", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize133() {
|
||||||
|
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"];
|
||||||
|
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize134() {
|
||||||
|
let comp = vec!["1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ", "PDT"];
|
||||||
|
tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize135() {
|
||||||
|
let comp = vec!["Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30", ":", "42", "pm", " ", "PST"];
|
||||||
|
tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize136() {
|
||||||
|
let comp = vec!["November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am", " ", "EST"];
|
||||||
|
tokenize_assert("November 5, 1994, 8:15:30 am EST", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize137() {
|
||||||
|
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00"];
|
||||||
|
tokenize_assert("1994-11-05T08:15:30-05:00", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize138() {
|
||||||
|
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z"];
|
||||||
|
tokenize_assert("1994-11-05T08:15:30Z", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize139() {
|
||||||
|
let comp = vec!["1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z"];
|
||||||
|
tokenize_assert("1976-07-04T00:01:02Z", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize140() {
|
||||||
|
let comp = vec!["Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995"];
|
||||||
|
tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize141() {
|
||||||
|
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."];
|
||||||
|
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize142() {
|
||||||
|
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."];
|
||||||
|
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize143() {
|
||||||
|
let comp = vec!["I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ", "1974"];
|
||||||
|
tokenize_assert("I have a meeting on March 1, 1974", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize144() {
|
||||||
|
let comp = vec!["On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ", "going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ", "Mars"];
|
||||||
|
tokenize_assert("On June 8th, 2020, I am going to be the first man on Mars", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize145() {
|
||||||
|
let comp = vec!["Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003"];
|
||||||
|
tokenize_assert("Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize146() {
|
||||||
|
let comp = vec!["Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset"];
|
||||||
|
tokenize_assert("Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize147() {
|
||||||
|
let comp = vec!["Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ", "going", " ", "to", " ", "see", " ", "you", " ", "there", "?"];
|
||||||
|
tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize148() {
|
||||||
|
let comp = vec!["2017", "-", "07", "-", "17", " ", "06", ":", "15", ":"];
|
||||||
|
tokenize_assert("2017-07-17 06:15:", comp);
|
||||||
|
}
|
259
src/tokenize.rs
259
src/tokenize.rs
@ -1,5 +1,6 @@
|
|||||||
pub(crate) struct Tokenizer {
|
pub(crate) struct Tokenizer {
|
||||||
token_stack: Vec<String>,
|
token_stack: Vec<String>,
|
||||||
|
// TODO: Should this be more generic? io::Read for example?
|
||||||
parse_string: String,
|
parse_string: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -13,12 +14,49 @@ pub(crate) enum ParseState {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Tokenizer {
|
impl Tokenizer {
|
||||||
pub(crate) fn new(parse_string: String) -> Self {
|
|
||||||
|
pub(crate) fn new(parse_string: &str) -> Self {
|
||||||
Tokenizer {
|
Tokenizer {
|
||||||
token_stack: Vec::new(),
|
token_stack: vec![],
|
||||||
parse_string: parse_string.chars().rev().collect(),
|
parse_string: parse_string.chars().rev().collect(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn isword(&self, c: char) -> bool {
|
||||||
|
c.is_alphabetic()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn isnum(&self, c: char) -> bool {
|
||||||
|
c.is_numeric()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn isspace(&self, c: char) -> bool {
|
||||||
|
c.is_whitespace()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decimal_split(&self, s: &str) -> Vec<String> {
|
||||||
|
// Handles the same thing as Python's re.split()
|
||||||
|
let mut tokens: Vec<String> = vec!["".to_owned()];
|
||||||
|
|
||||||
|
for c in s.chars() {
|
||||||
|
if c == '.' || c == ',' {
|
||||||
|
tokens.push(c.to_string());
|
||||||
|
tokens.push("".to_owned());
|
||||||
|
} else {
|
||||||
|
// UNWRAP: Initial setup guarantees we always have an item
|
||||||
|
let mut t = tokens.pop().unwrap();
|
||||||
|
t.push(c);
|
||||||
|
tokens.push(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Do I really have to use &String instead of &str?
|
||||||
|
if tokens.last() == Some(&"".to_owned()) {
|
||||||
|
tokens.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for Tokenizer {
|
impl Iterator for Tokenizer {
|
||||||
@ -26,178 +64,123 @@ impl Iterator for Tokenizer {
|
|||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
if !self.token_stack.is_empty() {
|
if !self.token_stack.is_empty() {
|
||||||
return Some(self.token_stack.pop().unwrap());
|
return Some(self.token_stack.remove(0));
|
||||||
};
|
}
|
||||||
if self.parse_string.is_empty() {
|
|
||||||
return None;
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut char_stack: Vec<char> = Vec::new();
|
let mut seenletters = false;
|
||||||
let mut seen_letters = false;
|
let mut token: Option<String> = None;
|
||||||
let mut state = ParseState::Empty;
|
let mut state = ParseState::Empty;
|
||||||
|
|
||||||
while let Some(next) = self.parse_string.pop() {
|
while !self.parse_string.is_empty() {
|
||||||
|
// Dateutil uses a separate `charstack` to manage the incoming stream.
|
||||||
|
// Because parse_string can have things pushed back onto it, we skip
|
||||||
|
// a couple of steps related to the `charstack`.
|
||||||
|
|
||||||
|
// UNWRAP: Just checked that parse_string isn't empty
|
||||||
|
let nextchar = self.parse_string.pop().unwrap();
|
||||||
|
|
||||||
match state {
|
match state {
|
||||||
ParseState::Empty => {
|
ParseState::Empty => {
|
||||||
if next.is_numeric() {
|
token = Some(nextchar.to_string());
|
||||||
state = ParseState::Numeric;
|
if self.isword(nextchar) {
|
||||||
char_stack.push(next);
|
|
||||||
} else if next.is_alphabetic() {
|
|
||||||
state = ParseState::Alpha;
|
state = ParseState::Alpha;
|
||||||
seen_letters = true;
|
} else if self.isnum(nextchar) {
|
||||||
char_stack.push(next);
|
state = ParseState::Numeric;
|
||||||
} else if next.is_whitespace() {
|
} else if self.isspace(nextchar) {
|
||||||
char_stack.push(' ');
|
token = Some(" ".to_owned());
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
char_stack.push(next);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
ParseState::Alpha => {
|
ParseState::Alpha => {
|
||||||
if next.is_alphabetic() {
|
seenletters = true;
|
||||||
char_stack.push(next);
|
if self.isword(nextchar) {
|
||||||
} else if next == '.' {
|
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
|
||||||
|
token.as_mut().unwrap().push(nextchar);
|
||||||
|
} else if nextchar == '.' {
|
||||||
|
token.as_mut().unwrap().push(nextchar);
|
||||||
state = ParseState::AlphaDecimal;
|
state = ParseState::AlphaDecimal;
|
||||||
char_stack.push(next);
|
|
||||||
} else {
|
} else {
|
||||||
// We don't recognize the character, so push it back
|
self.parse_string.push(nextchar);
|
||||||
// to be handled later.
|
|
||||||
self.parse_string.push(next);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
ParseState::AlphaDecimal => {
|
|
||||||
if next == '.' || next.is_alphabetic() {
|
|
||||||
char_stack.push(next);
|
|
||||||
} else if next.is_numeric() && char_stack.last().unwrap().clone() == '.' {
|
|
||||||
char_stack.push(next);
|
|
||||||
state = ParseState::NumericDecimal;
|
|
||||||
} else {
|
|
||||||
self.parse_string.push(next);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ParseState::Numeric => {
|
ParseState::Numeric => {
|
||||||
if next.is_numeric() {
|
if self.isnum(nextchar) {
|
||||||
char_stack.push(next);
|
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
|
||||||
} else if next == '.' || (next == ',' && char_stack.len() >= 2) {
|
token.as_mut().unwrap().push(nextchar);
|
||||||
char_stack.push(next);
|
} else if nextchar == '.' || (nextchar == ',' && token.as_ref().unwrap().len() >= 2) {
|
||||||
|
token.as_mut().unwrap().push(nextchar);
|
||||||
state = ParseState::NumericDecimal;
|
state = ParseState::NumericDecimal;
|
||||||
} else {
|
} else {
|
||||||
// We don't recognize the character, so push it back
|
self.parse_string.push(nextchar);
|
||||||
// to be handled later
|
|
||||||
self.parse_string.push(next);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
ParseState::AlphaDecimal => {
|
||||||
|
seenletters = true;
|
||||||
|
if nextchar == '.' || self.isword(nextchar) {
|
||||||
|
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
|
||||||
|
token.as_mut().unwrap().push(nextchar);
|
||||||
|
} else if self.isnum(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') {
|
||||||
|
token.as_mut().unwrap().push(nextchar);
|
||||||
|
state = ParseState::NumericDecimal;
|
||||||
|
} else {
|
||||||
|
self.parse_string.push(nextchar);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
},
|
||||||
ParseState::NumericDecimal => {
|
ParseState::NumericDecimal => {
|
||||||
if next == '.' || next.is_numeric() {
|
if nextchar == '.' || self.isnum(nextchar) {
|
||||||
char_stack.push(next);
|
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
|
||||||
} else if next.is_alphabetic() && char_stack.last().unwrap().clone() == '.' {
|
token.as_mut().unwrap().push(nextchar);
|
||||||
char_stack.push(next);
|
} else if self.isword(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') {
|
||||||
|
token.as_mut().unwrap().push(nextchar);
|
||||||
state = ParseState::AlphaDecimal;
|
state = ParseState::AlphaDecimal;
|
||||||
} else {
|
} else {
|
||||||
self.parse_string.push(next);
|
self.parse_string.push(nextchar);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// I like Python's version of this much better:
|
// Python uses the state to short-circuit and make sure it doesn't run into issues with None
|
||||||
// needs_split = seen_letters or char_stack.count('.') > 1 or char_stack[-1] in '.,'
|
// We do something slightly different to express the same logic
|
||||||
let dot_count = char_stack.iter().fold(0, |count, character| {
|
if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal {
|
||||||
count + (if character == &'.' { 1 } else { 0 })
|
// UNWRAP: The state check guarantees that we have a value
|
||||||
});
|
let dot_count = token.as_ref().unwrap().chars().filter(|c| *c == '.').count();
|
||||||
let needs_split = seen_letters || dot_count > 1 || char_stack.last().unwrap() == &'.'
|
let last_char = token.as_ref().unwrap().chars().last();
|
||||||
|| char_stack.last().unwrap() == &',';
|
let last_splittable = last_char == Some('.') || last_char == Some(',');
|
||||||
let final_string: String = char_stack.into_iter().collect();
|
|
||||||
|
if seenletters || dot_count > 1 || last_splittable {
|
||||||
let mut tokens = match state {
|
let mut l = self.decimal_split(token.as_ref().unwrap());
|
||||||
ParseState::Empty => vec![final_string],
|
let remaining = l.split_off(1);
|
||||||
ParseState::Alpha => vec![final_string],
|
|
||||||
ParseState::Numeric => vec![final_string],
|
token = Some(l[0].clone());
|
||||||
ParseState::AlphaDecimal => {
|
for t in remaining {
|
||||||
if needs_split {
|
self.token_stack.push(t);
|
||||||
decimal_split(&final_string, false)
|
|
||||||
} else {
|
|
||||||
vec![final_string]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ParseState::NumericDecimal => {
|
|
||||||
if needs_split {
|
if state == ParseState::NumericDecimal && dot_count == 0 {
|
||||||
decimal_split(&final_string, dot_count == 0)
|
token = Some(token.unwrap().replace(',', "."));
|
||||||
} else {
|
|
||||||
vec![final_string]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}.into_iter()
|
|
||||||
.rev()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
self.token_stack.append(&mut tokens);
|
|
||||||
// UNWRAP: Previous match guaranteed that at least one token was added
|
|
||||||
let token = self.token_stack.pop().unwrap();
|
|
||||||
if state == ParseState::NumericDecimal && !token.contains(".") {
|
|
||||||
Some(token.replace(",", "."))
|
|
||||||
} else {
|
|
||||||
Some(token)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
token
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decimal_split(characters: &str, cast_period: bool) -> Vec<String> {
|
#[cfg(test)]
|
||||||
let mut token_stack: Vec<String> = Vec::new();
|
mod tests {
|
||||||
let mut char_stack: Vec<char> = Vec::new();
|
|
||||||
let mut state = ParseState::Empty;
|
|
||||||
|
|
||||||
for c in characters.chars() {
|
use Tokenizer;
|
||||||
match state {
|
|
||||||
ParseState::Empty => {
|
#[test]
|
||||||
if c.is_alphabetic() {
|
fn test_basic() {
|
||||||
char_stack.push(c);
|
let tokens: Vec<String> = Tokenizer::new("September of 2003,").collect();
|
||||||
state = ParseState::Alpha;
|
assert_eq!(tokens, vec!["September", " ", "of", " ", "2003", ","]);
|
||||||
} else if c.is_numeric() {
|
|
||||||
char_stack.push(c);
|
|
||||||
state = ParseState::Numeric;
|
|
||||||
} else {
|
|
||||||
let character = if cast_period { '.' } else { c };
|
|
||||||
token_stack.push(character.to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ParseState::Alpha => {
|
|
||||||
if c.is_alphabetic() {
|
|
||||||
char_stack.push(c);
|
|
||||||
} else {
|
|
||||||
token_stack.push(char_stack.iter().collect());
|
|
||||||
char_stack.clear();
|
|
||||||
let character = if cast_period { '.' } else { c };
|
|
||||||
token_stack.push(character.to_string());
|
|
||||||
state = ParseState::Empty;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ParseState::Numeric => {
|
|
||||||
if c.is_numeric() {
|
|
||||||
char_stack.push(c);
|
|
||||||
} else {
|
|
||||||
token_stack.push(char_stack.iter().collect());
|
|
||||||
char_stack.clear();
|
|
||||||
let character = if cast_period { '.' } else { c };
|
|
||||||
token_stack.push(character.to_string());
|
|
||||||
state = ParseState::Empty;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => panic!("Invalid parse state during decimal_split()"),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match state {
|
|
||||||
ParseState::Alpha => token_stack.push(char_stack.iter().collect()),
|
|
||||||
ParseState::Numeric => token_stack.push(char_stack.iter().collect()),
|
|
||||||
ParseState::Empty => (),
|
|
||||||
_ => panic!("Invalid parse state during decimal_split()"),
|
|
||||||
}
|
|
||||||
|
|
||||||
token_stack
|
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
use std::cmp::max;
|
|
||||||
|
|
||||||
use ParseResult;
|
use ParseResult;
|
||||||
use ParseError;
|
use ParseError;
|
||||||
|
|
||||||
@ -101,6 +99,8 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Rust warns about unused imports here, but they're definitely used.
|
||||||
|
#[allow(unused_imports)]
|
||||||
mod test {
|
mod test {
|
||||||
|
|
||||||
use weekday::day_of_week;
|
use weekday::day_of_week;
|
||||||
|
Loading…
Reference in New Issue
Block a user