From de47318411f429448c40ea09d191608754257489 Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Fri, 25 May 2018 00:00:15 -0400 Subject: [PATCH] Baby steps --- Cargo.toml | 3 +- build_tests.py | 2 + src/lib.rs | 166 +++++++++++++++++++++++-------- src/tests/compat_parse.rs | 12 +++ src/tests/compat_split_string.rs | 16 +++ 5 files changed, 156 insertions(+), 43 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4445eee..28a4c68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,4 +4,5 @@ version = "0.1.0" authors = ["Bradlee Speice "] [dependencies] -chrono = "0.4" \ No newline at end of file +chrono = "0.4" +rust_decimal = "0.8" \ No newline at end of file diff --git a/build_tests.py b/build_tests.py index f838531..5d85fdc 100644 --- a/build_tests.py +++ b/build_tests.py @@ -8,6 +8,8 @@ TEST_STRINGS = [ '2018.5.15', 'May 5, 2018', 'Mar. 5, 2018', + '19990101T23', + '19990101T2359', ] AUTOGEN_HEADER = ''' diff --git a/src/lib.rs b/src/lib.rs index 8bdf97a..128ebf3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ #![allow(unused)] extern crate chrono; +extern crate rust_decimal; use chrono::DateTime; use chrono::Datelike; @@ -10,26 +11,48 @@ use chrono::Local; use chrono::NaiveDateTime; use chrono::NaiveTime; use chrono::Utc; +use rust_decimal::Decimal; +use rust_decimal::Error as DecimalError; use std::collections::HashMap; +use std::num::ParseIntError; +use std::str::FromStr; use std::vec::Vec; #[cfg(test)] mod tests; -#[derive(Debug, PartialEq)] -pub enum ParseError { - InvalidMonth, -} -enum ParseInternalError { +#[derive(Debug, PartialEq)] +pub enum ParseInternalError { // Errors that indicate internal bugs YMDEarlyResolve, YMDValueUnset, + ParseIndexError, + InvalidDecimal, + InvalidInteger, // Python-style errors ValueError(String), } +impl From for ParseInternalError { + fn from(err: DecimalError) -> Self { ParseInternalError::InvalidDecimal } +} + +impl From for ParseInternalError { + fn from(err: ParseIntError) -> Self { ParseInternalError::InvalidInteger } +} + +#[derive(Debug, PartialEq)] +pub enum ParseError { + InternalError(ParseInternalError), + InvalidMonth, +} + +impl From for ParseError { + fn from(err: ParseInternalError) -> Self { ParseError::InternalError(err) } +} + type ParseResult = Result; type ParseIResult = Result; @@ -267,8 +290,8 @@ struct ParserInfo { tzoffset: HashMap, dayfirst: bool, yearfirst: bool, - year: u32, - century: u32, + year: i32, + century: i32, } impl Default for ParserInfo { @@ -317,8 +340,8 @@ impl Default for ParserInfo { tzoffset: parse_info(vec![vec![]]), dayfirst: false, yearfirst: false, - year: year as u32, - century: century as u32, + year: year, + century: century, } } } @@ -360,7 +383,7 @@ impl ParserInfo { } } - fn convertyear(&self, year: u32, century_specified: bool) -> u32 { + fn convertyear(&self, year: i32, century_specified: bool) -> i32 { let mut year = year; if year < 100 && !century_specified { @@ -374,6 +397,20 @@ impl ParserInfo { year } + + // TODO: Should this be moved elsewhere? + fn validate(&self, res: &mut ParsingResult) -> bool { + if let Some(y) = res.year { res.year = Some(self.convertyear(y, res.century_specified)) }; + + if res.tzoffset == 0 && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) { + res.tzname = Some("UTC".to_owned()); + res.tzoffset = 0; + } else if res.tzoffset != 0 && res.tzname.is_some() && self.get_utczone(res.tzname.as_ref().unwrap()) { + res.tzoffset = 0; + } + + true + } } fn days_in_month(year: i32, month: i32) -> Result { @@ -383,11 +420,7 @@ fn days_in_month(year: i32, month: i32) -> Result { }; match month { - 2 => if leap_year { - Ok(29) - } else { - Ok(28) - }, + 2 => if leap_year { Ok(29) } else { Ok(28) }, 1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31), 4 | 6 | 9 | 11 => Ok(30), _ => Err(ParseError::InvalidMonth), @@ -401,6 +434,7 @@ enum YMDLabel { Day, } +#[derive(Debug, Default)] struct YMD { _ymd: Vec, // TODO: This seems like a super weird way to store things century_specified: bool, @@ -410,6 +444,9 @@ struct YMD { } impl YMD { + + fn len(&self) -> usize { self._ymd.len() } + fn could_be_day(&self, val: i32) -> ParseResult { if self.dstridx.is_some() { Ok(false) @@ -640,40 +677,32 @@ impl YMD { } } +#[derive(Default)] struct ParsingResult { - year: i32, - month: i32, - day: i32, - weekday: bool, - hour: i32, - minute: i32, - second: i32, - microsecond: i32, - tzname: i32, + year: Option, + month: Option, + day: Option, + weekday: Option, + hour: Option, + minute: Option, + second: Option, + microsecond: Option, + tzname: Option, tzoffset: i32, - ampm: bool, + ampm: Option, + century_specified: bool, any_unused_tokens: Vec, } +#[derive(Default)] struct Parser { info: ParserInfo, } -impl Default for Parser { - fn default() -> Self { - Parser { - info: ParserInfo::default(), - } - } -} - impl Parser { - pub fn new(info: ParserInfo) -> Self { - Parser { info: info } - } pub fn parse( - &self, + &mut self, timestr: String, default: Option, ignoretz: bool, @@ -686,20 +715,46 @@ impl Parser { // TODO: What should be done with the tokens? let (res, tokens) = - self.parse_with_tokens(timestr, self.info.dayfirst, self.info.yearfirst, true, true)?; + self.parse_with_tokens(timestr, None, None, false, false)?; let naive = self.build_naive(&res, default_ts); Ok(self.build_tzaware(naive, &res, default_ts)) } fn parse_with_tokens( - &self, + &mut self, timestr: String, - dayfirst: bool, - yearfirst: bool, + dayfirst: Option, + yearfirst: Option, fuzzy: bool, fuzzy_with_tokens: bool, ) -> Result<(ParsingResult, Vec), ParseError> { + let fuzzy = if fuzzy_with_tokens { true } else { fuzzy }; + // This is probably a stylistic abomination + let dayfirst = if let Some(dayfirst) = dayfirst { dayfirst } else { self.info.dayfirst }; + let yearfirst = if let Some(yearfirst) = yearfirst { yearfirst } else { self.info.yearfirst }; + + let mut res = ParsingResult::default(); + + let l = tokenize(×tr); + let skipped_idxs: Vec = Vec::new(); + + let ymd = YMD::default(); + + let len_l = l.len(); + let mut i = 0; + + while i < len_l { + + let value_repr = l.get(i).ok_or(ParseInternalError::ParseIndexError)?; + + let value = value_repr.parse::(); + + if let Ok(v) = value { + i = self.parse_numeric_token(&l, i, &self.info, &ymd, &mut res, fuzzy)?; + } + } + Err(ParseError::InvalidMonth) } @@ -716,10 +771,37 @@ impl Parser { Local::now().with_timezone(&FixedOffset::east(0)) } + + fn parse_numeric_token(&self, tokens: &Vec, idx: usize, info: &ParserInfo, ymd: &YMD, res: &mut ParsingResult, fuzzy: bool) -> Result { + let value_repr = &tokens[idx]; + let value = Decimal::from_str(&value_repr)?; + + let len_li = value_repr.len(); + let len_l = tokens.len(); + + let mut s: Option<&str> = None; + + // TODO: I miss the `x in y` syntax + // TODO: Decompose this logic a bit + if ymd.len() == 3 && (len_li == 2 || len_li == 4) && + res.hour.is_none() && ( + idx + 1 >= len_l || + (tokens[idx + 1] != ":" && info.get_hms(&tokens[idx + 1]).is_none())) { + + // 1990101T32[59] + s = Some(&tokens[idx]); + res.hour = Some(s.unwrap()[0..2].parse::()?); + + if len_li == 4 { res.minute = Some(s.unwrap()[2..4].parse::()?) } + } + + Ok(idx) + } } fn parse_with_info(timestr: String, info: ParserInfo) -> Result, ParseError> { - let parser = Parser::new(info); + // TODO: Is `::new()` more stylistic? + let mut parser = Parser { info: info }; parser.parse(timestr, None, false, vec![]) } diff --git a/src/tests/compat_parse.rs b/src/tests/compat_parse.rs index 1a78068..cae86ed 100644 --- a/src/tests/compat_parse.rs +++ b/src/tests/compat_parse.rs @@ -27,4 +27,16 @@ fn test_python_compat() { .to_rfc3339_opts(SecondsFormat::Micros, false), "2018-03-05 05:00:00+00:00" ); + assert_eq!( + parse("19990101T23".to_owned()) + .unwrap() + .to_rfc3339_opts(SecondsFormat::Micros, false), + "1999-01-02 04:00:00+00:00" + ); + assert_eq!( + parse("19990101T2359".to_owned()) + .unwrap() + .to_rfc3339_opts(SecondsFormat::Micros, false), + "1999-01-02 04:59:00+00:00" + ); } diff --git a/src/tests/compat_split_string.rs b/src/tests/compat_split_string.rs index cdcc3e7..2bbd059 100644 --- a/src/tests/compat_split_string.rs +++ b/src/tests/compat_split_string.rs @@ -40,4 +40,20 @@ fn test_python_compat() { "2018".to_owned(), ] ); + assert_eq!( + tokenize("19990101T23"), + vec![ + "19990101".to_owned(), + "T".to_owned(), + "23".to_owned(), + ] + ); + assert_eq!( + tokenize("19990101T2359"), + vec![ + "19990101".to_owned(), + "T".to_owned(), + "2359".to_owned(), + ] + ); }