1
0
mirror of https://github.com/bspeice/dtparse synced 2024-12-22 12:28:08 -05:00
This commit is contained in:
007gzs 2024-09-03 15:10:46 +08:00
parent 081cd7bea0
commit 6953afb5e2
6 changed files with 2738 additions and 812 deletions

View File

@ -43,6 +43,9 @@ fn main() {
) )
.unwrap() .unwrap()
.0, .0,
NaiveDate::from_ymd_opt(2015, 9, 10).unwrap().and_hms_opt(10, 20, 0).unwrap() NaiveDate::from_ymd_opt(2015, 9, 10)
.unwrap()
.and_hms_opt(10, 20, 0)
.unwrap()
); );
} }

View File

@ -151,6 +151,8 @@ pub enum ParseError {
YearMonthDayError(&'static str), YearMonthDayError(&'static str),
/// Parser unable to find any date/time-related content in the supplied string /// Parser unable to find any date/time-related content in the supplied string
NoDate, NoDate,
/// need Some but got None
System,
} }
impl fmt::Display for ParseError { impl fmt::Display for ParseError {
@ -305,13 +307,7 @@ impl ParserInfo {
} }
fn ampm_index(&self, name: &str) -> Option<bool> { fn ampm_index(&self, name: &str) -> Option<bool> {
if let Some(v) = self.ampm.get(&name.to_lowercase()) { self.ampm.get(&name.to_lowercase()).map(|v| *v == 1)
// Python technically uses numbers here, but given that the numbers are
// only 0 and 1, it's easier to use booleans
Some(*v == 1)
} else {
None
}
} }
fn pertain_index(&self, name: &str) -> bool { fn pertain_index(&self, name: &str) -> bool {
@ -367,6 +363,12 @@ impl ParserInfo {
} }
} }
fn option_to_result<T>(opt: Option<T>) -> Result<T, ParseError> {
match opt {
Some(v) => Ok(v),
None => Err(ParseError::System),
}
}
fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> { fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
let leap_year = match year % 4 { let leap_year = match year % 4 {
0 => year % 400 != 0, 0 => year % 400 != 0,
@ -412,7 +414,7 @@ impl YMD {
if self.dstridx.is_some() { if self.dstridx.is_some() {
false false
} else if self.mstridx.is_none() { } else if self.mstridx.is_none() {
(1 <= val) && (val <= 31) (1..=31).contains(&val)
} else if self.ystridx.is_none() { } else if self.ystridx.is_none() {
// UNWRAP: Earlier condition catches mstridx missing // UNWRAP: Earlier condition catches mstridx missing
let month = self._ymd[self.mstridx.unwrap()]; let month = self._ymd[self.mstridx.unwrap()];
@ -715,7 +717,8 @@ impl Parser {
) -> ParseResult<(NaiveDateTime, Option<FixedOffset>, Option<Vec<String>>)> { ) -> ParseResult<(NaiveDateTime, Option<FixedOffset>, Option<Vec<String>>)> {
let default_date = default.unwrap_or(&Local::now().naive_local()).date(); let default_date = default.unwrap_or(&Local::now().naive_local()).date();
let default_ts = NaiveDateTime::new(default_date, NaiveTime::from_hms_opt(0, 0, 0).unwrap()); let default_ts =
NaiveDateTime::new(default_date, NaiveTime::from_hms_opt(0, 0, 0).unwrap());
let (res, tokens) = let (res, tokens) =
self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?; self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?;
@ -758,7 +761,7 @@ impl Parser {
let mut res = ParsingResult::default(); let mut res = ParsingResult::default();
let mut l = tokenize(&timestr); let mut l = tokenize(timestr);
let mut skipped_idxs: Vec<usize> = Vec::new(); let mut skipped_idxs: Vec<usize> = Vec::new();
let mut ymd = YMD::default(); let mut ymd = YMD::default();
@ -814,9 +817,8 @@ impl Parser {
skipped_idxs.push(i); skipped_idxs.push(i);
} }
} else if self.could_be_tzname(res.hour, &res.tzname, res.tzoffset, &l[i]) { } else if self.could_be_tzname(res.hour, &res.tzname, res.tzoffset, &l[i]) {
res.tzname = Some(l[i].clone()); let tzname = l[i].clone();
res.tzname = Some(tzname.clone());
let tzname = res.tzname.clone().unwrap();
res.tzoffset = self.info.tzoffset_index(&tzname).map(|t| t as i32); res.tzoffset = self.info.tzoffset_index(&tzname).map(|t| t as i32);
if i + 1 < len_l && (l[i + 1] == "+" || l[i + 1] == "-") { if i + 1 < len_l && (l[i + 1] == "+" || l[i + 1] == "-") {
@ -861,9 +863,9 @@ impl Parser {
hour_offset = Some(l[i + 1][..range_len].parse::<i32>()?); hour_offset = Some(l[i + 1][..range_len].parse::<i32>()?);
min_offset = Some(0); min_offset = Some(0);
} }
if let (Some(hour_offset), Some(min_offset)) = (hour_offset, min_offset) {
res.tzoffset = res.tzoffset = Some(signal * (hour_offset * 3600 + min_offset * 60));
Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60)); }
let tzname = res.tzname.clone(); let tzname = res.tzname.clone();
if i + 5 < len_l if i + 5 < len_l
@ -923,21 +925,24 @@ impl Parser {
&& (all_ascii_upper || self.info.utczone.contains_key(token)) && (all_ascii_upper || self.info.utczone.contains_key(token))
} }
#[allow(clippy::unnecessary_unwrap)]
fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> { fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> {
let mut val_is_ampm = !(fuzzy && ampm.is_some()); let mut val_is_ampm = !(fuzzy && ampm.is_some());
match hour {
if hour.is_none() { Some(hour) => {
if !(0..=12).contains(&hour) {
if fuzzy {
val_is_ampm = false;
} else {
return Err(ParseError::ImpossibleTimestamp("Invalid hour"));
}
}
}
None => {
if fuzzy { if fuzzy {
val_is_ampm = false; val_is_ampm = false;
} else { } else {
return Err(ParseError::AmPmWithoutHour); return Err(ParseError::AmPmWithoutHour);
} }
} else if !(0 <= hour.unwrap() && hour.unwrap() <= 12) {
if fuzzy {
val_is_ampm = false;
} else {
return Err(ParseError::ImpossibleTimestamp("Invalid hour"));
} }
} }
@ -951,12 +956,9 @@ impl Parser {
) -> ParseResult<NaiveDateTime> { ) -> ParseResult<NaiveDateTime> {
let y = res.year.unwrap_or_else(|| default.year()); let y = res.year.unwrap_or_else(|| default.year());
let m = res.month.unwrap_or_else(|| default.month() as i32) as u32; let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
let d_offset = if let (Some(weekday), None) = (res.weekday, res.day) {
let d_offset = if res.weekday.is_some() && res.day.is_none() {
let dow = day_of_week(y as u32, m, default.day())?; let dow = day_of_week(y as u32, m, default.day())?;
let actual_weekday = (weekday + 1) % 7;
// UNWRAP: We've already check res.weekday() is some
let actual_weekday = (res.weekday.unwrap() + 1) % 7;
let other = DayOfWeek::from_numeral(actual_weekday as u32); let other = DayOfWeek::from_numeral(actual_weekday as u32);
Duration::days(i64::from(dow.difference(&other))) Duration::days(i64::from(dow.difference(&other)))
} else { } else {
@ -972,16 +974,16 @@ impl Parser {
days_in_month(y, m as i32)?, days_in_month(y, m as i32)?,
), ),
) )
.ok_or_else(|| ParseError::ImpossibleTimestamp("Invalid date range given"))?; .ok_or(ParseError::ImpossibleTimestamp("Invalid date range given"))?;
let d = d + d_offset; let d = d + d_offset;
let hour = res.hour.unwrap_or(default.hour() as i32) as u32; let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
let minute = res.minute.unwrap_or(default.minute() as i32) as u32; let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
let second = res.second.unwrap_or(default.second() as i32) as u32; let second = res.second.unwrap_or(default.second() as i32) as u32;
let nanosecond = res let nanosecond =
.nanosecond res.nanosecond
.unwrap_or(default.timestamp_subsec_nanos() as i64) as u32; .unwrap_or(default.and_utc().timestamp_subsec_nanos() as i64) as u32;
let t = let t =
NaiveTime::from_hms_nano_opt(hour, minute, second, nanosecond).ok_or_else(|| { NaiveTime::from_hms_nano_opt(hour, minute, second, nanosecond).ok_or_else(|| {
if hour >= 24 { if hour >= 24 {
@ -1008,11 +1010,11 @@ impl Parser {
) -> ParseResult<Option<FixedOffset>> { ) -> ParseResult<Option<FixedOffset>> {
if let Some(offset) = res.tzoffset { if let Some(offset) = res.tzoffset {
Ok(FixedOffset::east_opt(offset)) Ok(FixedOffset::east_opt(offset))
} else if res.tzoffset == None } else if res.tzoffset.is_none()
&& (res.tzname == Some(" ".to_owned()) && (res.tzname == Some(" ".to_owned())
|| res.tzname == Some(".".to_owned()) || res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned()) || res.tzname == Some("-".to_owned())
|| res.tzname == None) || res.tzname.is_none())
{ {
Ok(None) Ok(None)
} else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) { } else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
@ -1027,7 +1029,6 @@ impl Parser {
} }
} }
#[allow(clippy::unnecessary_unwrap)]
fn parse_numeric_token( fn parse_numeric_token(
&self, &self,
tokens: &[String], tokens: &[String],
@ -1039,7 +1040,7 @@ impl Parser {
) -> ParseResult<usize> { ) -> ParseResult<usize> {
let mut idx = idx; let mut idx = idx;
let value_repr = &tokens[idx]; let value_repr = &tokens[idx];
let mut value = Decimal::from_str(&value_repr).unwrap(); let mut value = Decimal::from_str(value_repr)?;
let len_li = value_repr.len(); let len_li = value_repr.len();
let len_l = tokens.len(); let len_l = tokens.len();
@ -1063,7 +1064,7 @@ impl Parser {
// YYMMDD or HHMMSS[.ss] // YYMMDD or HHMMSS[.ss]
let s = &tokens[idx]; let s = &tokens[idx];
if ymd.len() == 0 && tokens[idx].find('.') == None { if ymd.len() == 0 && tokens[idx].find('.').is_none() {
ymd.append(s[0..2].parse::<i32>()?, &s[0..2], None)?; ymd.append(s[0..2].parse::<i32>()?, &s[0..2], None)?;
ymd.append(s[2..4].parse::<i32>()?, &s[2..4], None)?; ymd.append(s[2..4].parse::<i32>()?, &s[2..4], None)?;
ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?; ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?;
@ -1076,7 +1077,7 @@ impl Parser {
res.second = Some(t.0); res.second = Some(t.0);
res.nanosecond = Some(t.1); res.nanosecond = Some(t.1);
} }
} else if vec![8, 12, 14].contains(&len_li) { } else if [8, 12, 14].contains(&len_li) {
// YYMMDD // YYMMDD
let s = &tokens[idx]; let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>()?, &s[..4], Some(YMDLabel::Year))?; ymd.append(s[..4].parse::<i32>()?, &s[..4], Some(YMDLabel::Year))?;
@ -1101,7 +1102,9 @@ impl Parser {
} else if idx + 2 < len_l && tokens[idx + 1] == ":" { } else if idx + 2 < len_l && tokens[idx + 1] == ":" {
// HH:MM[:SS[.ss]] // HH:MM[:SS[.ss]]
// TODO: Better story around Decimal handling // TODO: Better story around Decimal handling
res.hour = Some(value.floor().to_i64().unwrap() as i32); if let Some(v) = value.floor().to_i64() {
res.hour = Some(v as i32);
}
// TODO: Rescope `value` here? // TODO: Rescope `value` here?
value = self.to_decimal(&tokens[idx + 2])?; value = self.to_decimal(&tokens[idx + 2])?;
let min_sec = self.parse_min_sec(value); let min_sec = self.parse_min_sec(value);
@ -1110,7 +1113,7 @@ impl Parser {
if idx + 4 < len_l && tokens[idx + 3] == ":" { if idx + 4 < len_l && tokens[idx + 3] == ":" {
// TODO: (x, y) = (a, b) syntax? // TODO: (x, y) = (a, b) syntax?
let ms = self.parsems(&tokens[idx + 4]).unwrap(); let ms = self.parsems(&tokens[idx + 4])?;
res.second = Some(ms.0); res.second = Some(ms.0);
res.nanosecond = Some(ms.1); res.nanosecond = Some(ms.1);
@ -1122,7 +1125,7 @@ impl Parser {
{ {
// TODO: There's got to be a better way of handling the condition above // TODO: There's got to be a better way of handling the condition above
let sep = &tokens[idx + 1]; let sep = &tokens[idx + 1];
ymd.append(value_repr.parse::<i32>()?, &value_repr, None)?; ymd.append(value_repr.parse::<i32>()?, value_repr, None)?;
if idx + 2 < len_l && !info.jump_index(&tokens[idx + 2]) { if idx + 2 < len_l && !info.jump_index(&tokens[idx + 2]) {
if let Ok(val) = tokens[idx + 2].parse::<i32>() { if let Ok(val) = tokens[idx + 2].parse::<i32>() {
@ -1151,14 +1154,17 @@ impl Parser {
idx += 1 idx += 1
} else if idx + 1 >= len_l || info.jump_index(&tokens[idx + 1]) { } else if idx + 1 >= len_l || info.jump_index(&tokens[idx + 1]) {
if idx + 2 < len_l && info.ampm_index(&tokens[idx + 2]).is_some() { if idx + 2 < len_l && info.ampm_index(&tokens[idx + 2]).is_some() {
let hour = value.to_i64().unwrap() as i32; let hour = option_to_result(value.to_i64())? as i32;
let ampm = info.ampm_index(&tokens[idx + 2]).unwrap(); let ampm = option_to_result(info.ampm_index(&tokens[idx + 2]))?;
res.hour = Some(self.adjust_ampm(hour, ampm)); res.hour = Some(self.adjust_ampm(hour, ampm));
idx += 1; idx += 1;
} else { } else {
//let value = value.floor().to_i32().ok_or(Err(ParseError::InvalidNumeric())) //let value = value.floor().to_i32().ok_or(Err(ParseError::InvalidNumeric()))
let value = value.floor().to_i32().ok_or_else(|| ParseError::InvalidNumeric(value_repr.to_owned()))?; let value = value
ymd.append(value, &value_repr, None)?; .floor()
.to_i32()
.ok_or_else(|| ParseError::InvalidNumeric(value_repr.to_owned()))?;
ymd.append(value, value_repr, None)?;
} }
idx += 1; idx += 1;
@ -1166,11 +1172,12 @@ impl Parser {
&& (*ZERO <= value && value < *TWENTY_FOUR) && (*ZERO <= value && value < *TWENTY_FOUR)
{ {
// 12am // 12am
let hour = value.to_i64().unwrap() as i32; let hour = option_to_result(value.to_i64())? as i32;
res.hour = Some(self.adjust_ampm(hour, info.ampm_index(&tokens[idx + 1]).unwrap())); res.hour =
Some(self.adjust_ampm(hour, option_to_result(info.ampm_index(&tokens[idx + 1]))?));
idx += 1; idx += 1;
} else if ymd.could_be_day(value.to_i64().unwrap() as i32) { } else if ymd.could_be_day(option_to_result(value.to_i32())? as i32) {
ymd.append(value.to_i64().unwrap() as i32, &value_repr, None)?; ymd.append(option_to_result(value.to_i64())? as i32, value_repr, None)?;
} else if !fuzzy { } else if !fuzzy {
return Err(ParseError::UnrecognizedFormat); return Err(ParseError::UnrecognizedFormat);
} }
@ -1256,13 +1263,10 @@ impl Parser {
info: &ParserInfo, info: &ParserInfo,
hms_index: Option<usize>, hms_index: Option<usize>,
) -> (usize, Option<usize>) { ) -> (usize, Option<usize>) {
if hms_index.is_none() { match hms_index {
(idx, None) Some(index) => {
} else if hms_index.unwrap() > idx { if index > idx {
( (index, info.hms_index(&tokens[hms_index.unwrap()]))
hms_index.unwrap(),
info.hms_index(&tokens[hms_index.unwrap()]),
)
} else { } else {
( (
idx, idx,
@ -1270,6 +1274,9 @@ impl Parser {
) )
} }
} }
None => (idx, None),
}
}
fn assign_hms(&self, res: &mut ParsingResult, value_repr: &str, hms: usize) -> ParseResult<()> { fn assign_hms(&self, res: &mut ParsingResult, value_repr: &str, hms: usize) -> ParseResult<()> {
let value = self.to_decimal(value_repr)?; let value = self.to_decimal(value_repr)?;
@ -1277,14 +1284,14 @@ impl Parser {
if hms == 0 { if hms == 0 {
res.hour = value.to_i32(); res.hour = value.to_i32();
if !close_to_integer(&value) { if !close_to_integer(&value) {
res.minute = Some((*SIXTY * (value % *ONE)).to_i64().unwrap() as i32); res.minute = Some(option_to_result((*SIXTY * (value % *ONE)).to_i64())? as i32);
} }
} else if hms == 1 { } else if hms == 1 {
let (min, sec) = self.parse_min_sec(value); let (min, sec) = self.parse_min_sec(value);
res.minute = Some(min); res.minute = Some(min);
res.second = sec; res.second = sec;
} else if hms == 2 { } else if hms == 2 {
let (sec, micro) = self.parsems(value_repr).unwrap(); let (sec, micro) = self.parsems(value_repr)?;
res.second = Some(sec); res.second = Some(sec);
res.nanosecond = Some(micro); res.nanosecond = Some(micro);
} }
@ -1293,7 +1300,7 @@ impl Parser {
} }
fn to_decimal(&self, value: &str) -> ParseResult<Decimal> { fn to_decimal(&self, value: &str) -> ParseResult<Decimal> {
Decimal::from_str(value).or_else(|_| Err(ParseError::InvalidNumeric(value.to_owned()))) Decimal::from_str(value).map_err(|_| ParseError::InvalidNumeric(value.to_owned()))
} }
fn parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>) { fn parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>) {

View File

@ -1,5 +1,5 @@
use base64::Engine;
use base64::engine::general_purpose::STANDARD; use base64::engine::general_purpose::STANDARD;
use base64::Engine;
use chrono::NaiveDate; use chrono::NaiveDate;
use std::collections::HashMap; use std::collections::HashMap;
use std::str; use std::str;
@ -21,7 +21,10 @@ fn test_fuzz() {
Err(ParseError::UnrecognizedFormat) Err(ParseError::UnrecognizedFormat)
); );
let default = NaiveDate::from_ymd_opt(2016, 6, 29).unwrap().and_hms_opt(0, 0, 0).unwrap(); let default = NaiveDate::from_ymd_opt(2016, 6, 29)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap();
let p = Parser::default(); let p = Parser::default();
let res = p.parse( let res = p.parse(
"\x0D\x31", "\x0D\x31",
@ -66,12 +69,20 @@ fn empty_string() {
#[test] #[test]
fn github_33() { fn github_33() {
assert_eq!(parse("66:'"), Err(ParseError::InvalidNumeric("'".to_owned()))) assert_eq!(
parse("66:'"),
Err(ParseError::InvalidNumeric("'".to_owned()))
)
} }
#[test] #[test]
fn github_32() { fn github_32() {
assert_eq!(parse("99999999999999999999999"), Err(ParseError::InvalidNumeric("99999999999999999999999".to_owned()))) assert_eq!(
parse("99999999999999999999999"),
Err(ParseError::InvalidNumeric(
"99999999999999999999999".to_owned()
))
)
} }
#[test] #[test]

View File

@ -2,13 +2,19 @@ mod fuzzing;
mod pycompat_parser; mod pycompat_parser;
mod pycompat_tokenizer; mod pycompat_tokenizer;
use chrono::NaiveDate;
use crate::parse; use crate::parse;
use chrono::NaiveDate;
#[test] #[test]
fn nanosecond_precision() { fn nanosecond_precision() {
assert_eq!( assert_eq!(
parse("2008.12.29T08:09:10.123456789").unwrap(), parse("2008.12.29T08:09:10.123456789").unwrap(),
(NaiveDate::from_ymd_opt(2008, 12, 29).unwrap().and_hms_nano_opt(8, 9, 10, 123_456_789).unwrap(), None) (
NaiveDate::from_ymd_opt(2008, 12, 29)
.unwrap()
.and_hms_nano_opt(8, 9, 10, 123_456_789)
.unwrap(),
None
)
) )
} }

File diff suppressed because it is too large Load Diff

View File

@ -55,7 +55,7 @@ impl DayOfWeek {
pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> { pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
// From https://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week#Schwerdtfeger's_method // From https://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week#Schwerdtfeger's_method
let (c, g) = match month { let (c, g) = match month {
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => { 3..=12 => {
let c = year / 100; let c = year / 100;
(c, year - 100 * c) (c, year - 100 * c)
} }