diff --git a/Cargo.toml b/Cargo.toml index 28a4c68..08d86f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,6 @@ authors = ["Bradlee Speice "] [dependencies] chrono = "0.4" +lazy_static = "*" +num-traits = "0.2" rust_decimal = "0.8" \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 128ebf3..b5a5252 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,11 @@ #![allow(dead_code)] #![allow(unused)] +#[macro_use] +extern crate lazy_static; + extern crate chrono; +extern crate num_traits; extern crate rust_decimal; use chrono::DateTime; @@ -11,6 +15,7 @@ use chrono::Local; use chrono::NaiveDateTime; use chrono::NaiveTime; use chrono::Utc; +use num_traits::cast::ToPrimitive; use rust_decimal::Decimal; use rust_decimal::Error as DecimalError; use std::collections::HashMap; @@ -21,6 +26,12 @@ use std::vec::Vec; #[cfg(test)] mod tests; +lazy_static! { + static ref ZERO: Decimal = Decimal::new(0, 0); + static ref ONE: Decimal = Decimal::new(1, 0); + static ref TWENTY_FOUR: Decimal = Decimal::new(24, 0); + static ref SIXTY: Decimal = Decimal::new(60, 0); +} #[derive(Debug, PartialEq)] pub enum ParseInternalError { @@ -36,21 +47,29 @@ pub enum ParseInternalError { } impl From for ParseInternalError { - fn from(err: DecimalError) -> Self { ParseInternalError::InvalidDecimal } + fn from(err: DecimalError) -> Self { + ParseInternalError::InvalidDecimal + } } impl From for ParseInternalError { - fn from(err: ParseIntError) -> Self { ParseInternalError::InvalidInteger } + fn from(err: ParseIntError) -> Self { + ParseInternalError::InvalidInteger + } } -#[derive(Debug, PartialEq)] +#[derive(Debug)] pub enum ParseError { InternalError(ParseInternalError), InvalidMonth, + UnrecognizedToken(String), + InvalidParseResult(ParsingResult), } impl From for ParseError { - fn from(err: ParseInternalError) -> Self { ParseError::InternalError(err) } + fn from(err: ParseInternalError) -> Self { + ParseError::InternalError(err) + } } type ParseResult = Result; @@ -279,6 +298,7 @@ fn parse_info(vec: Vec>) -> HashMap { m } +#[derive(Debug, PartialEq)] struct ParserInfo { jump: HashMap, weekday: HashMap, @@ -400,13 +420,17 @@ impl ParserInfo { // TODO: Should this be moved elsewhere? fn validate(&self, res: &mut ParsingResult) -> bool { - if let Some(y) = res.year { res.year = Some(self.convertyear(y, res.century_specified)) }; - - if res.tzoffset == 0 && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) { + if let Some(y) = res.year { + res.year = Some(self.convertyear(y, res.century_specified)) + }; + + if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) { res.tzname = Some("UTC".to_owned()); - res.tzoffset = 0; - } else if res.tzoffset != 0 && res.tzname.is_some() && self.get_utczone(res.tzname.as_ref().unwrap()) { - res.tzoffset = 0; + res.tzoffset = Some(0); + } else if res.tzoffset != Some(0) && res.tzname.is_some() + && self.get_utczone(res.tzname.as_ref().unwrap()) + { + res.tzoffset = Some(0); } true @@ -420,7 +444,11 @@ fn days_in_month(year: i32, month: i32) -> Result { }; match month { - 2 => if leap_year { Ok(29) } else { Ok(28) }, + 2 => if leap_year { + Ok(29) + } else { + Ok(28) + }, 1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31), 4 | 6 | 9 | 11 => Ok(30), _ => Err(ParseError::InvalidMonth), @@ -444,23 +472,24 @@ struct YMD { } impl YMD { + fn len(&self) -> usize { + self._ymd.len() + } - fn len(&self) -> usize { self._ymd.len() } - - fn could_be_day(&self, val: i32) -> ParseResult { + fn could_be_day(&self, val: i32) -> bool { if self.dstridx.is_some() { - Ok(false) + false } else if self.mstridx.is_none() { - Ok((1 <= val) && (val <= 31)) + (1 <= val) && (val <= 31) } else if self.ystridx.is_none() { // UNWRAP: mstridx guaranteed to have a value // TODO: Justify unwrap for self._ymd let month = self._ymd[self.mstridx.unwrap()]; - Ok(1 <= val && (val <= days_in_month(2000, month)?)) + 1 <= val && (val <= days_in_month(2000, month).unwrap()) } else { let month = self._ymd[self.mstridx.unwrap()]; let year = self._ymd[self.ystridx.unwrap()]; - Ok(1 <= val && (val <= days_in_month(year, month)?)) + 1 <= val && (val <= days_in_month(year, month).unwrap()) } } @@ -677,8 +706,8 @@ impl YMD { } } -#[derive(Default)] -struct ParsingResult { +#[derive(Default, Debug)] +pub struct ParsingResult { year: Option, month: Option, day: Option, @@ -688,8 +717,8 @@ struct ParsingResult { second: Option, microsecond: Option, tzname: Option, - tzoffset: i32, - ampm: Option, + tzoffset: Option, + ampm: Option, century_specified: bool, any_unused_tokens: Vec, } @@ -700,25 +729,29 @@ struct Parser { } impl Parser { - pub fn parse( &mut self, timestr: String, default: Option, ignoretz: bool, tzinfos: Vec, - ) -> Result, ParseError> { + ) -> Result<(NaiveDateTime, Option, Option>), ParseError> { let now = Local::now().naive_local(); let default_date = default.unwrap_or(now).date(); let default_ts = NaiveDateTime::new(default_date, NaiveTime::from_hms(0, 0, 0)); // TODO: What should be done with the tokens? - let (res, tokens) = - self.parse_with_tokens(timestr, None, None, false, false)?; + let (res, tokens) = self.parse_with_tokens(timestr, None, None, false, false)?; let naive = self.build_naive(&res, default_ts); - Ok(self.build_tzaware(naive, &res, default_ts)) + + if !ignoretz { + let offset = self.build_tzaware(&naive, &res, default_ts); + Ok((naive, Some(offset.unwrap()), tokens)) + } else { + Ok((naive, None, tokens)) + } } fn parse_with_tokens( @@ -728,34 +761,178 @@ impl Parser { yearfirst: Option, fuzzy: bool, fuzzy_with_tokens: bool, - ) -> Result<(ParsingResult, Vec), ParseError> { + ) -> Result<(ParsingResult, Option>), ParseError> { let fuzzy = if fuzzy_with_tokens { true } else { fuzzy }; // This is probably a stylistic abomination - let dayfirst = if let Some(dayfirst) = dayfirst { dayfirst } else { self.info.dayfirst }; - let yearfirst = if let Some(yearfirst) = yearfirst { yearfirst } else { self.info.yearfirst }; + let dayfirst = if let Some(dayfirst) = dayfirst { + dayfirst + } else { + self.info.dayfirst + }; + let yearfirst = if let Some(yearfirst) = yearfirst { + yearfirst + } else { + self.info.yearfirst + }; let mut res = ParsingResult::default(); - - let l = tokenize(×tr); - let skipped_idxs: Vec = Vec::new(); - let ymd = YMD::default(); + let mut l = tokenize(×tr); + let mut skipped_idxs: Vec = Vec::new(); + + let mut ymd = YMD::default(); let len_l = l.len(); let mut i = 0; while i < len_l { - - let value_repr = l.get(i).ok_or(ParseInternalError::ParseIndexError)?; + let value_repr = l[i].clone(); let value = value_repr.parse::(); if let Ok(v) = value { - i = self.parse_numeric_token(&l, i, &self.info, &ymd, &mut res, fuzzy)?; + i = self.parse_numeric_token(&l, i, &self.info, &mut ymd, &mut res, fuzzy)?; + } else if let Some(value) = self.info.get_weekday(&l[i]) { + res.weekday = Some(value != 0); + } else if let Some(value) = self.info.get_month(&l[i]) { + ymd.append(value as i32, Some(YMDLabel::Month)); + + if i + 1 < len_l { + if l[i + 1] == "-" || l[i + 1] == "/" { + // Jan-01[-99] + let sep = &l[i + 1]; + // TODO: This seems like a very unsafe unwrap + ymd.append(l[i + 2].parse::().unwrap(), None); + + if i + 3 < len_l && &l[i + 3] == sep { + // Jan-01-99 + ymd.append(l[i + 4].parse::().unwrap(), None); + i += 2; + } + + i += 2; + } else if (i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " " + && self.info.get_pertain(&l[i + 2])) + { + // Jan of 01 + if let Some(value) = l[i + 4].parse::().ok() { + let year = self.info.convertyear(value, false); + ymd.append(year, Some(YMDLabel::Year)); + } + + i += 4; + } + } + } else if let Some(value) = self.info.get_ampm(&l[i]) { + let is_ampm = self.ampm_valid(res.hour, res.ampm, fuzzy); + + if is_ampm { + res.hour = Some(self.adjust_ampm(res.hour.unwrap(), value)); + res.ampm = Some(value); + } else if fuzzy { + skipped_idxs.push(i); + } + } else if self.could_be_tzname(res.hour, res.tzname.clone(), res.tzoffset, &l[i]) { + res.tzname = Some(l[i].clone()); + + let tzname = res.tzname.clone().unwrap(); + res.tzoffset = self.info.get_tzoffset(&tzname).map(|t| t as i32); + + if i + 1 < len_l && (l[i + 1] == "+" || l[i + 1] == "-") { + // GMT+3 + // According to dateutil docs - reverse the size, as GMT+3 means + // "my time +3 is GMT" not "GMT +3 is my time" + + // TODO: Is there a better way of in-place modifying a vector? + let item = if l[i + 1] == "+" { + "-".to_owned() + } else { + "-".to_owned() + }; + l.remove(i + 1); + l.insert(i + 1, item); + + res.tzoffset = None; + + if self.info.get_utczone(&tzname) { + res.tzname = None; + } + } + } else if res.hour.is_some() && (l[i] == "+" || l[i] == "-") { + let signal = if l[i] == "+" { 1 } else { -1 }; + let len_li = l[i].len(); + + let mut hour_offset: Option = None; + let mut min_offset: Option = None; + + // TODO: check that l[i + 1] is integer? + if len_li == 4 { + // -0300 + hour_offset = Some(l[i + 1][..2].parse::().unwrap()); + min_offset = Some(l[i + 1][2..4].parse::().unwrap()); + } else if i + 2 < len_l && l[i + 2] == ":" { + // -03:00 + hour_offset = Some(l[i + 1].parse::().unwrap()); + min_offset = Some(l[i + 3].parse::().unwrap()); + i += 2; + } else if len_li <= 2 { + // -[0]3 + hour_offset = Some(l[i + 1][..2].parse::().unwrap()); + min_offset = Some(0); + } + + res.tzoffset = + Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60)); + + let tzname = res.tzname.clone(); + if i + 5 < len_l && self.info.get_jump(&l[i + 2]) && l[i + 3] == "(" + && l[i + 5] == ")" && 3 <= l[i + 4].len() + && self.could_be_tzname(res.hour, tzname, None, &l[i + 4]) + { + // (GMT) + res.tzname = Some(l[i + 4].clone()); + i += 4; + } + + i += 1; + } else if !self.info.get_jump(&l[i]) || fuzzy { + return Err(ParseError::UnrecognizedToken(l[i].clone())); + } else { + skipped_idxs.push(i); } + + i += 1; } - Err(ParseError::InvalidMonth) + let (year, month, day) = ymd.resolve_ymd(yearfirst, dayfirst)?; + + res.century_specified = ymd.century_specified; + res.year = Some(year); + res.month = Some(month); + res.day = Some(day); + + if !self.info.validate(&mut res) { + Err(ParseError::InvalidParseResult(res)) + } else if fuzzy_with_tokens { + let skipped_tokens = skipped_idxs.into_iter().map(|i| l[i].clone()).collect(); + Ok((res, Some(skipped_tokens))) + } else { + Ok((res, None)) + } + } + + fn could_be_tzname( + &self, + hour: Option, + tzname: Option, + tzoffset: Option, + token: &str, + ) -> bool { + false + } + + fn ampm_valid(&self, hour: Option, ampm: Option, fuzzy: bool) -> bool { + false } fn build_naive(&self, res: &ParsingResult, default: NaiveDateTime) -> NaiveDateTime { @@ -764,47 +941,280 @@ impl Parser { fn build_tzaware( &self, - dt: NaiveDateTime, + dt: &NaiveDateTime, res: &ParsingResult, default: NaiveDateTime, - ) -> DateTime { - - Local::now().with_timezone(&FixedOffset::east(0)) + ) -> Result { + Ok(FixedOffset::east(0)) } - fn parse_numeric_token(&self, tokens: &Vec, idx: usize, info: &ParserInfo, ymd: &YMD, res: &mut ParsingResult, fuzzy: bool) -> Result { + fn parse_numeric_token( + &self, + tokens: &Vec, + idx: usize, + info: &ParserInfo, + ymd: &mut YMD, + res: &mut ParsingResult, + fuzzy: bool, + ) -> Result { + let mut idx = idx; let value_repr = &tokens[idx]; - let value = Decimal::from_str(&value_repr)?; + let mut value = Decimal::from_str(&value_repr).unwrap(); let len_li = value_repr.len(); let len_l = tokens.len(); - let mut s: Option<&str> = None; - // TODO: I miss the `x in y` syntax // TODO: Decompose this logic a bit - if ymd.len() == 3 && (len_li == 2 || len_li == 4) && - res.hour.is_none() && ( - idx + 1 >= len_l || - (tokens[idx + 1] != ":" && info.get_hms(&tokens[idx + 1]).is_none())) { - + if ymd.len() == 3 && (len_li == 2 || len_li == 4) && res.hour.is_none() + && (idx + 1 >= len_l + || (tokens[idx + 1] != ":" && info.get_hms(&tokens[idx + 1]).is_none())) + { // 1990101T32[59] - s = Some(&tokens[idx]); - res.hour = Some(s.unwrap()[0..2].parse::()?); + let s = &tokens[idx]; + res.hour = s[0..2].parse::().ok(); - if len_li == 4 { res.minute = Some(s.unwrap()[2..4].parse::()?) } + if len_li == 4 { + res.minute = Some(s[2..4].parse::()?) + } + } else if len_li == 6 || (len_li > 6 && tokens[idx].find(".") == Some(6)) { + // YYMMDD or HHMMSS[.ss] + let s = &tokens[idx]; + + if ymd.len() == 0 && tokens[idx].find(".") == None { + ymd.append(s[0..2].parse::().unwrap(), None); + ymd.append(s[2..4].parse::().unwrap(), None); + ymd.append(s[4..6].parse::().unwrap(), None); + } else { + // 19990101T235959[.59] + res.hour = s[0..2].parse::().ok(); + res.minute = s[2..4].parse::().ok(); + + let t = self.parsems(&s[4..])?; + res.second = Some(t.0); + res.microsecond = Some(t.1); + } + } else if vec![8, 12, 14].contains(&len_li) { + // YYMMDD + let s = &tokens[idx]; + ymd.append(s[..4].parse::().unwrap(), Some(YMDLabel::Year)); + ymd.append(s[4..6].parse::().unwrap(), None); + ymd.append(s[6..8].parse::().unwrap(), None); + + if len_li > 8 { + res.hour = Some(s[8..10].parse::()?); + res.minute = Some(s[10..12].parse::()?); + + if len_li > 12 { + res.second = Some(s[12..].parse::()?); + } + } + } else if let Some(hms_idx) = self.find_hms_index(idx, tokens, info, true) { + // HH[ ]h or MM[ ]m or SS[.ss][ ]s + let (idx, hms) = self.parse_hms(idx, tokens, info, Some(hms_idx)); + if hms.is_some() { + // TODO: This unwrap is unjustified. + self.assign_hms(res, value_repr, hms.unwrap()); + } + } else if idx + 2 < len_l && tokens[idx + 1] == ":" { + // HH:MM[:SS[.ss]] + // TODO: Better story around Decimal handling + res.hour = Some(value.floor().to_i64().unwrap() as i32); + // TODO: Rescope `value` here? + value = self.to_decimal(&tokens[idx + 2]); + let min_sec = self.parse_min_sec(value); + res.minute = Some(min_sec.0); + res.second = min_sec.1; + + if idx + 4 < len_l && tokens[idx + 3] == ":" { + // TODO: (x, y) = (a, b) syntax? + let ms = self.parsems(&tokens[idx + 4]).unwrap(); + res.second = Some(ms.0); + res.microsecond = Some(ms.1); + + idx += 2; + } + idx += 2; + } else if idx + 1 < len_l + && (tokens[idx + 1] == "-" || tokens[idx + 1] == "/" || tokens[idx + 1] == ".") + { + // TODO: There's got to be a better way of handling the condition above + let sep = &tokens[idx + 1]; + ymd.append(value_repr.parse::().unwrap(), None); + + if idx + 2 < len_l && !info.get_jump(&tokens[idx + 2]) { + if let Ok(val) = tokens[idx + 2].parse::() { + ymd.append(val, None); + } else if let Some(val) = info.get_month(&tokens[idx + 2]) { + ymd.append(val as i32, Some(YMDLabel::Month)); + } + + if idx + 3 < len_l && &tokens[idx + 3] == sep { + if let Some(value) = info.get_month(&tokens[idx + 4]) { + ymd.append(value as i32, Some(YMDLabel::Month)); + } else { + ymd.append(tokens[idx + 4].parse::().unwrap(), None); + } + + idx += 2; + } + + idx += 1; + } + + idx += 1 + } else if idx + 1 >= len_l || info.get_jump(&tokens[idx + 1]) { + if idx + 2 < len_l && info.get_ampm(&tokens[idx + 2]).is_some() { + let hour = value.to_i64().unwrap() as i32; + let ampm = info.get_ampm(&tokens[idx + 2]).unwrap(); + res.hour = Some(self.adjust_ampm(hour, ampm)); + } + } else if info.get_ampm(&tokens[idx + 1]).is_some() + && (*ZERO <= value && value < *TWENTY_FOUR) + { + // 12am + let hour = value.to_i64().unwrap() as i32; + res.hour = Some(self.adjust_ampm(hour, info.get_ampm(&tokens[idx + 1]).unwrap())); + idx += 1; + } else if ymd.could_be_day(value.to_i64().unwrap() as i32) { + ymd.append(value.to_i64().unwrap() as i32, None); + } else if !fuzzy { + return Err(ParseInternalError::ValueError("".to_owned())); } Ok(idx) } + + fn adjust_ampm(&self, hour: i32, ampm: usize) -> i32 { + if hour < 12 && ampm == 1 { + hour + 12 + } else if hour == 12 && ampm == 0 { + 0 + } else { + hour + } + } + + fn parsems(&self, seconds_str: &str) -> Result<(i32, i32), ParseInternalError> { + if seconds_str.contains(".") { + let split: Vec<&str> = seconds_str.split(".").collect(); + let (i, f): (&str, &str) = (split[0], split[1]); + + let i_parse = i.parse::()?; + let f_parse = ljust(f, 6, '0').parse::()?; + Ok((i_parse, f_parse)) + } else { + Ok((seconds_str.parse::()?, 0)) + } + } + + fn find_hms_index( + &self, + idx: usize, + tokens: &Vec, + info: &ParserInfo, + allow_jump: bool, + ) -> Option { + let len_l = tokens.len(); + let mut hms_idx = None; + + if idx + 1 < len_l && info.get_hms(&tokens[idx + 1]).is_some() { + hms_idx = Some(idx + 1) + } else if allow_jump && idx + 2 < len_l && tokens[idx + 1] == " " + && info.get_hms(&tokens[idx + 2]).is_some() + { + hms_idx = Some(idx + 2) + } else if idx > 0 && info.get_hms(&tokens[idx - 1]).is_some() { + hms_idx = Some(idx - 1) + } + // TODO: The condition for this in Python seems a bit ambiguous + else if idx == len_l - 1 && tokens[idx - 1] == " " + && info.get_hms(&tokens[idx - 2]).is_some() + { + hms_idx = Some(idx - 2) + } + + hms_idx + } + + fn parse_hms( + &self, + idx: usize, + tokens: &Vec, + info: &ParserInfo, + hms_index: Option, + ) -> (usize, Option) { + if hms_index.is_none() { + (idx, None) + } else if hms_index.unwrap() > idx { + ( + hms_index.unwrap(), + info.get_hms(&tokens[hms_index.unwrap()]), + ) + } else { + ( + idx, + info.get_hms(&tokens[hms_index.unwrap()]).map(|u| u + 1), + ) + } + } + + fn assign_hms(&self, res: &mut ParsingResult, value_repr: &str, hms: usize) { + let value = self.to_decimal(value_repr); + + if hms == 0 { + res.hour = Some(value.to_i64().unwrap() as i32); + if close_to_integer(&value) { + // TODO: High probability of issues with rounding here. + res.minute = Some((*SIXTY * (value % *ONE)).to_i64().unwrap() as i32); + } + } else if hms == 1 { + let (min, sec) = self.parse_min_sec(value); + } else if hms == 2 { + let (sec, micro) = self.parsems(value_repr).unwrap(); + } + } + + fn to_decimal(&self, value: &str) -> Decimal { + // TODO: Actual decimals, and handling infinity + Decimal::from_str(value).unwrap() + } + + fn parse_min_sec(&self, value: Decimal) -> (i32, Option) { + let minute = value.floor().to_i64().unwrap() as i32; + let mut second = None; + + let sec_remainder = value % *ONE; + if sec_remainder != *ZERO { + second = Some((*SIXTY * sec_remainder).floor().to_i64().unwrap() as i32); + } + + (minute, second) + } } -fn parse_with_info(timestr: String, info: ParserInfo) -> Result, ParseError> { +fn close_to_integer(value: &Decimal) -> bool { + value % *ONE == *ZERO +} + +fn ljust(s: &str, chars: usize, replace: char) -> String { + if s.len() >= chars { + s[..chars].to_owned() + } else { + format!("{}{}", s, replace.to_string().repeat(chars - s.len())) + } +} + +fn parse_with_info( + timestr: String, + info: ParserInfo, +) -> ParseResult<(NaiveDateTime, Option, Option>)> { // TODO: Is `::new()` more stylistic? let mut parser = Parser { info: info }; parser.parse(timestr, None, false, vec![]) } -fn parse(timestr: String) -> Result, ParseError> { - parse_with_info(timestr, ParserInfo::default()) +fn parse(timestr: String) -> ParseResult<(NaiveDateTime, Option)> { + let parse_result = parse_with_info(timestr, ParserInfo::default())?; + Ok((parse_result.0, parse_result.1)) } diff --git a/src/tests/compat_parse.rs b/src/tests/compat_parse.rs index cae86ed..be886f3 100644 --- a/src/tests/compat_parse.rs +++ b/src/tests/compat_parse.rs @@ -1,4 +1,3 @@ - // WARNING // This file was auto-generated using the `build_tests.py` script. // Please do not edit it manually. diff --git a/src/tests/compat_split_string.rs b/src/tests/compat_split_string.rs index 2bbd059..cd3c9a5 100644 --- a/src/tests/compat_split_string.rs +++ b/src/tests/compat_split_string.rs @@ -1,4 +1,3 @@ - // WARNING // This file was auto-generated using the `build_tests.py` script. // Please do not edit it manually. @@ -42,18 +41,10 @@ fn test_python_compat() { ); assert_eq!( tokenize("19990101T23"), - vec![ - "19990101".to_owned(), - "T".to_owned(), - "23".to_owned(), - ] + vec!["19990101".to_owned(), "T".to_owned(), "23".to_owned()] ); assert_eq!( tokenize("19990101T2359"), - vec![ - "19990101".to_owned(), - "T".to_owned(), - "2359".to_owned(), - ] + vec!["19990101".to_owned(), "T".to_owned(), "2359".to_owned()] ); }