1
0
mirror of https://github.com/bspeice/dtparse synced 2024-11-14 09:58:09 -05:00
This commit is contained in:
007gzs 2024-09-03 15:10:46 +08:00
parent 081cd7bea0
commit 6953afb5e2
6 changed files with 2738 additions and 812 deletions

View File

@ -43,6 +43,9 @@ fn main() {
)
.unwrap()
.0,
NaiveDate::from_ymd_opt(2015, 9, 10).unwrap().and_hms_opt(10, 20, 0).unwrap()
NaiveDate::from_ymd_opt(2015, 9, 10)
.unwrap()
.and_hms_opt(10, 20, 0)
.unwrap()
);
}

View File

@ -151,6 +151,8 @@ pub enum ParseError {
YearMonthDayError(&'static str),
/// Parser unable to find any date/time-related content in the supplied string
NoDate,
/// need Some but got None
System,
}
impl fmt::Display for ParseError {
@ -305,13 +307,7 @@ impl ParserInfo {
}
fn ampm_index(&self, name: &str) -> Option<bool> {
if let Some(v) = self.ampm.get(&name.to_lowercase()) {
// Python technically uses numbers here, but given that the numbers are
// only 0 and 1, it's easier to use booleans
Some(*v == 1)
} else {
None
}
self.ampm.get(&name.to_lowercase()).map(|v| *v == 1)
}
fn pertain_index(&self, name: &str) -> bool {
@ -367,6 +363,12 @@ impl ParserInfo {
}
}
fn option_to_result<T>(opt: Option<T>) -> Result<T, ParseError> {
match opt {
Some(v) => Ok(v),
None => Err(ParseError::System),
}
}
fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
let leap_year = match year % 4 {
0 => year % 400 != 0,
@ -412,7 +414,7 @@ impl YMD {
if self.dstridx.is_some() {
false
} else if self.mstridx.is_none() {
(1 <= val) && (val <= 31)
(1..=31).contains(&val)
} else if self.ystridx.is_none() {
// UNWRAP: Earlier condition catches mstridx missing
let month = self._ymd[self.mstridx.unwrap()];
@ -715,7 +717,8 @@ impl Parser {
) -> ParseResult<(NaiveDateTime, Option<FixedOffset>, Option<Vec<String>>)> {
let default_date = default.unwrap_or(&Local::now().naive_local()).date();
let default_ts = NaiveDateTime::new(default_date, NaiveTime::from_hms_opt(0, 0, 0).unwrap());
let default_ts =
NaiveDateTime::new(default_date, NaiveTime::from_hms_opt(0, 0, 0).unwrap());
let (res, tokens) =
self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?;
@ -758,7 +761,7 @@ impl Parser {
let mut res = ParsingResult::default();
let mut l = tokenize(&timestr);
let mut l = tokenize(timestr);
let mut skipped_idxs: Vec<usize> = Vec::new();
let mut ymd = YMD::default();
@ -814,9 +817,8 @@ impl Parser {
skipped_idxs.push(i);
}
} else if self.could_be_tzname(res.hour, &res.tzname, res.tzoffset, &l[i]) {
res.tzname = Some(l[i].clone());
let tzname = res.tzname.clone().unwrap();
let tzname = l[i].clone();
res.tzname = Some(tzname.clone());
res.tzoffset = self.info.tzoffset_index(&tzname).map(|t| t as i32);
if i + 1 < len_l && (l[i + 1] == "+" || l[i + 1] == "-") {
@ -861,9 +863,9 @@ impl Parser {
hour_offset = Some(l[i + 1][..range_len].parse::<i32>()?);
min_offset = Some(0);
}
res.tzoffset =
Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));
if let (Some(hour_offset), Some(min_offset)) = (hour_offset, min_offset) {
res.tzoffset = Some(signal * (hour_offset * 3600 + min_offset * 60));
}
let tzname = res.tzname.clone();
if i + 5 < len_l
@ -923,21 +925,24 @@ impl Parser {
&& (all_ascii_upper || self.info.utczone.contains_key(token))
}
#[allow(clippy::unnecessary_unwrap)]
fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> {
let mut val_is_ampm = !(fuzzy && ampm.is_some());
if hour.is_none() {
if fuzzy {
val_is_ampm = false;
} else {
return Err(ParseError::AmPmWithoutHour);
match hour {
Some(hour) => {
if !(0..=12).contains(&hour) {
if fuzzy {
val_is_ampm = false;
} else {
return Err(ParseError::ImpossibleTimestamp("Invalid hour"));
}
}
}
} else if !(0 <= hour.unwrap() && hour.unwrap() <= 12) {
if fuzzy {
val_is_ampm = false;
} else {
return Err(ParseError::ImpossibleTimestamp("Invalid hour"));
None => {
if fuzzy {
val_is_ampm = false;
} else {
return Err(ParseError::AmPmWithoutHour);
}
}
}
@ -951,12 +956,9 @@ impl Parser {
) -> ParseResult<NaiveDateTime> {
let y = res.year.unwrap_or_else(|| default.year());
let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
let d_offset = if res.weekday.is_some() && res.day.is_none() {
let d_offset = if let (Some(weekday), None) = (res.weekday, res.day) {
let dow = day_of_week(y as u32, m, default.day())?;
// UNWRAP: We've already check res.weekday() is some
let actual_weekday = (res.weekday.unwrap() + 1) % 7;
let actual_weekday = (weekday + 1) % 7;
let other = DayOfWeek::from_numeral(actual_weekday as u32);
Duration::days(i64::from(dow.difference(&other)))
} else {
@ -972,16 +974,16 @@ impl Parser {
days_in_month(y, m as i32)?,
),
)
.ok_or_else(|| ParseError::ImpossibleTimestamp("Invalid date range given"))?;
.ok_or(ParseError::ImpossibleTimestamp("Invalid date range given"))?;
let d = d + d_offset;
let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
let second = res.second.unwrap_or(default.second() as i32) as u32;
let nanosecond = res
.nanosecond
.unwrap_or(default.timestamp_subsec_nanos() as i64) as u32;
let nanosecond =
res.nanosecond
.unwrap_or(default.and_utc().timestamp_subsec_nanos() as i64) as u32;
let t =
NaiveTime::from_hms_nano_opt(hour, minute, second, nanosecond).ok_or_else(|| {
if hour >= 24 {
@ -1008,11 +1010,11 @@ impl Parser {
) -> ParseResult<Option<FixedOffset>> {
if let Some(offset) = res.tzoffset {
Ok(FixedOffset::east_opt(offset))
} else if res.tzoffset == None
} else if res.tzoffset.is_none()
&& (res.tzname == Some(" ".to_owned())
|| res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned())
|| res.tzname == None)
|| res.tzname.is_none())
{
Ok(None)
} else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
@ -1027,7 +1029,6 @@ impl Parser {
}
}
#[allow(clippy::unnecessary_unwrap)]
fn parse_numeric_token(
&self,
tokens: &[String],
@ -1039,7 +1040,7 @@ impl Parser {
) -> ParseResult<usize> {
let mut idx = idx;
let value_repr = &tokens[idx];
let mut value = Decimal::from_str(&value_repr).unwrap();
let mut value = Decimal::from_str(value_repr)?;
let len_li = value_repr.len();
let len_l = tokens.len();
@ -1063,7 +1064,7 @@ impl Parser {
// YYMMDD or HHMMSS[.ss]
let s = &tokens[idx];
if ymd.len() == 0 && tokens[idx].find('.') == None {
if ymd.len() == 0 && tokens[idx].find('.').is_none() {
ymd.append(s[0..2].parse::<i32>()?, &s[0..2], None)?;
ymd.append(s[2..4].parse::<i32>()?, &s[2..4], None)?;
ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?;
@ -1076,7 +1077,7 @@ impl Parser {
res.second = Some(t.0);
res.nanosecond = Some(t.1);
}
} else if vec![8, 12, 14].contains(&len_li) {
} else if [8, 12, 14].contains(&len_li) {
// YYMMDD
let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>()?, &s[..4], Some(YMDLabel::Year))?;
@ -1101,7 +1102,9 @@ impl Parser {
} else if idx + 2 < len_l && tokens[idx + 1] == ":" {
// HH:MM[:SS[.ss]]
// TODO: Better story around Decimal handling
res.hour = Some(value.floor().to_i64().unwrap() as i32);
if let Some(v) = value.floor().to_i64() {
res.hour = Some(v as i32);
}
// TODO: Rescope `value` here?
value = self.to_decimal(&tokens[idx + 2])?;
let min_sec = self.parse_min_sec(value);
@ -1110,7 +1113,7 @@ impl Parser {
if idx + 4 < len_l && tokens[idx + 3] == ":" {
// TODO: (x, y) = (a, b) syntax?
let ms = self.parsems(&tokens[idx + 4]).unwrap();
let ms = self.parsems(&tokens[idx + 4])?;
res.second = Some(ms.0);
res.nanosecond = Some(ms.1);
@ -1122,7 +1125,7 @@ impl Parser {
{
// TODO: There's got to be a better way of handling the condition above
let sep = &tokens[idx + 1];
ymd.append(value_repr.parse::<i32>()?, &value_repr, None)?;
ymd.append(value_repr.parse::<i32>()?, value_repr, None)?;
if idx + 2 < len_l && !info.jump_index(&tokens[idx + 2]) {
if let Ok(val) = tokens[idx + 2].parse::<i32>() {
@ -1151,14 +1154,17 @@ impl Parser {
idx += 1
} else if idx + 1 >= len_l || info.jump_index(&tokens[idx + 1]) {
if idx + 2 < len_l && info.ampm_index(&tokens[idx + 2]).is_some() {
let hour = value.to_i64().unwrap() as i32;
let ampm = info.ampm_index(&tokens[idx + 2]).unwrap();
let hour = option_to_result(value.to_i64())? as i32;
let ampm = option_to_result(info.ampm_index(&tokens[idx + 2]))?;
res.hour = Some(self.adjust_ampm(hour, ampm));
idx += 1;
} else {
//let value = value.floor().to_i32().ok_or(Err(ParseError::InvalidNumeric()))
let value = value.floor().to_i32().ok_or_else(|| ParseError::InvalidNumeric(value_repr.to_owned()))?;
ymd.append(value, &value_repr, None)?;
let value = value
.floor()
.to_i32()
.ok_or_else(|| ParseError::InvalidNumeric(value_repr.to_owned()))?;
ymd.append(value, value_repr, None)?;
}
idx += 1;
@ -1166,11 +1172,12 @@ impl Parser {
&& (*ZERO <= value && value < *TWENTY_FOUR)
{
// 12am
let hour = value.to_i64().unwrap() as i32;
res.hour = Some(self.adjust_ampm(hour, info.ampm_index(&tokens[idx + 1]).unwrap()));
let hour = option_to_result(value.to_i64())? as i32;
res.hour =
Some(self.adjust_ampm(hour, option_to_result(info.ampm_index(&tokens[idx + 1]))?));
idx += 1;
} else if ymd.could_be_day(value.to_i64().unwrap() as i32) {
ymd.append(value.to_i64().unwrap() as i32, &value_repr, None)?;
} else if ymd.could_be_day(option_to_result(value.to_i32())? as i32) {
ymd.append(option_to_result(value.to_i64())? as i32, value_repr, None)?;
} else if !fuzzy {
return Err(ParseError::UnrecognizedFormat);
}
@ -1256,18 +1263,18 @@ impl Parser {
info: &ParserInfo,
hms_index: Option<usize>,
) -> (usize, Option<usize>) {
if hms_index.is_none() {
(idx, None)
} else if hms_index.unwrap() > idx {
(
hms_index.unwrap(),
info.hms_index(&tokens[hms_index.unwrap()]),
)
} else {
(
idx,
info.hms_index(&tokens[hms_index.unwrap()]).map(|u| u + 1),
)
match hms_index {
Some(index) => {
if index > idx {
(index, info.hms_index(&tokens[hms_index.unwrap()]))
} else {
(
idx,
info.hms_index(&tokens[hms_index.unwrap()]).map(|u| u + 1),
)
}
}
None => (idx, None),
}
}
@ -1277,14 +1284,14 @@ impl Parser {
if hms == 0 {
res.hour = value.to_i32();
if !close_to_integer(&value) {
res.minute = Some((*SIXTY * (value % *ONE)).to_i64().unwrap() as i32);
res.minute = Some(option_to_result((*SIXTY * (value % *ONE)).to_i64())? as i32);
}
} else if hms == 1 {
let (min, sec) = self.parse_min_sec(value);
res.minute = Some(min);
res.second = sec;
} else if hms == 2 {
let (sec, micro) = self.parsems(value_repr).unwrap();
let (sec, micro) = self.parsems(value_repr)?;
res.second = Some(sec);
res.nanosecond = Some(micro);
}
@ -1293,7 +1300,7 @@ impl Parser {
}
fn to_decimal(&self, value: &str) -> ParseResult<Decimal> {
Decimal::from_str(value).or_else(|_| Err(ParseError::InvalidNumeric(value.to_owned())))
Decimal::from_str(value).map_err(|_| ParseError::InvalidNumeric(value.to_owned()))
}
fn parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>) {

View File

@ -1,5 +1,5 @@
use base64::Engine;
use base64::engine::general_purpose::STANDARD;
use base64::Engine;
use chrono::NaiveDate;
use std::collections::HashMap;
use std::str;
@ -21,7 +21,10 @@ fn test_fuzz() {
Err(ParseError::UnrecognizedFormat)
);
let default = NaiveDate::from_ymd_opt(2016, 6, 29).unwrap().and_hms_opt(0, 0, 0).unwrap();
let default = NaiveDate::from_ymd_opt(2016, 6, 29)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap();
let p = Parser::default();
let res = p.parse(
"\x0D\x31",
@ -66,12 +69,20 @@ fn empty_string() {
#[test]
fn github_33() {
assert_eq!(parse("66:'"), Err(ParseError::InvalidNumeric("'".to_owned())))
assert_eq!(
parse("66:'"),
Err(ParseError::InvalidNumeric("'".to_owned()))
)
}
#[test]
fn github_32() {
assert_eq!(parse("99999999999999999999999"), Err(ParseError::InvalidNumeric("99999999999999999999999".to_owned())))
assert_eq!(
parse("99999999999999999999999"),
Err(ParseError::InvalidNumeric(
"99999999999999999999999".to_owned()
))
)
}
#[test]

View File

@ -2,13 +2,19 @@ mod fuzzing;
mod pycompat_parser;
mod pycompat_tokenizer;
use chrono::NaiveDate;
use crate::parse;
use chrono::NaiveDate;
#[test]
fn nanosecond_precision() {
assert_eq!(
parse("2008.12.29T08:09:10.123456789").unwrap(),
(NaiveDate::from_ymd_opt(2008, 12, 29).unwrap().and_hms_nano_opt(8, 9, 10, 123_456_789).unwrap(), None)
(
NaiveDate::from_ymd_opt(2008, 12, 29)
.unwrap()
.and_hms_nano_opt(8, 9, 10, 123_456_789)
.unwrap(),
None
)
)
}

File diff suppressed because it is too large Load Diff

View File

@ -55,7 +55,7 @@ impl DayOfWeek {
pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
// From https://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week#Schwerdtfeger's_method
let (c, g) = match month {
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => {
3..=12 => {
let c = year / 100;
(c, year - 100 * c)
}