1
0
mirror of https://github.com/bspeice/dtparse synced 2024-12-22 04:18:09 -05:00
This commit is contained in:
Bradlee Speice 2019-11-13 23:18:37 -05:00
parent 142712900f
commit 030ca4fced
7 changed files with 2808 additions and 856 deletions

View File

@ -8,7 +8,6 @@ use dtparse::ParserInfo;
use std::collections::HashMap; use std::collections::HashMap;
fn main() { fn main() {
// In this example, we'll just swap the default "months" parameter // In this example, we'll just swap the default "months" parameter
// with a version in Russian. Lovingly taken from: // with a version in Russian. Lovingly taken from:
// https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244 // https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244
@ -26,14 +25,24 @@ fn main() {
vec!["сен", "Сентябрь"], vec!["сен", "Сентябрь"],
vec!["окт", "Октябрь"], vec!["окт", "Октябрь"],
vec!["ноя", "Ноябрь"], vec!["ноя", "Ноябрь"],
vec!["дек", "Декабрь"] vec!["дек", "Декабрь"],
]); ]);
let p = Parser::new(info); let p = Parser::new(info);
assert_eq!( assert_eq!(
p.parse("10 Сентябрь 2015 10:20", None, None, false, false, None, false, &HashMap::new()) p.parse(
.unwrap().0, "10 Сентябрь 2015 10:20",
None,
None,
false,
false,
None,
false,
&HashMap::new()
)
.unwrap()
.0,
NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0) NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0)
); );
} }

View File

@ -4,23 +4,23 @@
//! # dtparse //! # dtparse
//! The fully-featured "even I couldn't understand that" time parser. //! The fully-featured "even I couldn't understand that" time parser.
//! Designed to take in strings and give back sensible dates and times. //! Designed to take in strings and give back sensible dates and times.
//! //!
//! dtparse has its foundations in the [`dateutil`](dateutil) library for //! dtparse has its foundations in the [`dateutil`](dateutil) library for
//! Python, which excels at taking "interesting" strings and trying to make //! Python, which excels at taking "interesting" strings and trying to make
//! sense of the dates and times they contain. A couple of quick examples //! sense of the dates and times they contain. A couple of quick examples
//! from the test cases should give some context: //! from the test cases should give some context:
//! //!
//! ```rust,ignore (tests-dont-compile-on-old-rust) //! ```rust,ignore (tests-dont-compile-on-old-rust)
//! # extern crate chrono; //! # extern crate chrono;
//! # extern crate dtparse; //! # extern crate dtparse;
//! use chrono::prelude::*; //! use chrono::prelude::*;
//! use dtparse::parse; //! use dtparse::parse;
//! //!
//! assert_eq!( //! assert_eq!(
//! parse("2008.12.30"), //! parse("2008.12.30"),
//! Ok((NaiveDate::from_ymd(2008, 12, 30).and_hms(0, 0, 0), None)) //! Ok((NaiveDate::from_ymd(2008, 12, 30).and_hms(0, 0, 0), None))
//! ); //! );
//! //!
//! // It can even handle timezones! //! // It can even handle timezones!
//! assert_eq!( //! assert_eq!(
//! parse("January 4, 2024; 18:30:04 +02:00"), //! parse("January 4, 2024; 18:30:04 +02:00"),
@ -30,17 +30,17 @@
//! )) //! ))
//! ); //! );
//! ``` //! ```
//! //!
//! And we can even handle fuzzy strings where dates/times aren't the //! And we can even handle fuzzy strings where dates/times aren't the
//! only content if we dig into the implementation a bit! //! only content if we dig into the implementation a bit!
//! //!
//! ```rust,ignore (tests-dont-compile-on-old-rust) //! ```rust,ignore (tests-dont-compile-on-old-rust)
//! # extern crate chrono; //! # extern crate chrono;
//! # extern crate dtparse; //! # extern crate dtparse;
//! use chrono::prelude::*; //! use chrono::prelude::*;
//! use dtparse::Parser; //! use dtparse::Parser;
//! # use std::collections::HashMap; //! # use std::collections::HashMap;
//! //!
//! let mut p = Parser::default(); //! let mut p = Parser::default();
//! assert_eq!( //! assert_eq!(
//! p.parse( //! p.parse(
@ -58,7 +58,7 @@
//! )) //! ))
//! ); //! );
//! ``` //! ```
//! //!
//! Further examples can be found in the `examples` directory on international usage. //! Further examples can be found in the `examples` directory on international usage.
//! //!
//! # Usage //! # Usage
@ -66,7 +66,7 @@
//! `dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX, //! `dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX,
//! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not //! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
//! tested against them. //! tested against them.
//! //!
//! [dateutil]: https://github.com/dateutil/dateutil //! [dateutil]: https://github.com/dateutil/dateutil
#[macro_use] #[macro_use]
@ -85,14 +85,14 @@ use chrono::NaiveDate;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use chrono::NaiveTime; use chrono::NaiveTime;
use chrono::Offset; use chrono::Offset;
use chrono::Timelike;
use chrono::TimeZone; use chrono::TimeZone;
use chrono::Timelike;
use chrono_tz::Tz; use chrono_tz::Tz;
use num_traits::cast::ToPrimitive; use num_traits::cast::ToPrimitive;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use rust_decimal::Error as DecimalError; use rust_decimal::Error as DecimalError;
use std::collections::HashMap;
use std::cmp::min; use std::cmp::min;
use std::collections::HashMap;
use std::num::ParseIntError; use std::num::ParseIntError;
use std::str::FromStr; use std::str::FromStr;
use std::vec::Vec; use std::vec::Vec;
@ -178,7 +178,7 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
} }
/// Container for specific tokens to be recognized during parsing. /// Container for specific tokens to be recognized during parsing.
/// ///
/// - `jump`: Values that indicate the end of a token for parsing and can be ignored /// - `jump`: Values that indicate the end of a token for parsing and can be ignored
/// - `weekday`: Names of the days of the week /// - `weekday`: Names of the days of the week
/// - `months`: Names of the months /// - `months`: Names of the months
@ -191,7 +191,7 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
/// - `yearfirst`: Upon encountering an ambiguous date, treat the first value as the year /// - `yearfirst`: Upon encountering an ambiguous date, treat the first value as the year
/// - `year`: The current year /// - `year`: The current year
/// - `century`: The first year in the current century /// - `century`: The first year in the current century
/// ///
/// Please note that if both `dayfirst` and `yearfirst` are true, years take precedence /// Please note that if both `dayfirst` and `yearfirst` are true, years take precedence
/// and will be parsed as "YDM" /// and will be parsed as "YDM"
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -232,12 +232,10 @@ impl Default for ParserInfo {
let century = year / 100 * 100; let century = year / 100 * 100;
ParserInfo { ParserInfo {
jump: parse_info(vec![ jump: parse_info(vec![vec![
vec![ " ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st",
" ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "nd", "rd", "th",
"st", "nd", "rd", "th", ]]),
],
]),
weekday: parse_info(vec![ weekday: parse_info(vec![
vec!["Mon", "Monday"], vec!["Mon", "Monday"],
vec!["Tue", "Tues", "Tuesday"], vec!["Tue", "Tues", "Tuesday"],
@ -345,7 +343,8 @@ impl ParserInfo {
if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) { if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) {
res.tzname = Some("UTC".to_owned()); res.tzname = Some("UTC".to_owned());
res.tzoffset = Some(0); res.tzoffset = Some(0);
} else if res.tzoffset != Some(0) && res.tzname.is_some() } else if res.tzoffset != Some(0)
&& res.tzname.is_some()
&& self.utczone_index(res.tzname.as_ref().unwrap()) && self.utczone_index(res.tzname.as_ref().unwrap())
{ {
res.tzoffset = Some(0); res.tzoffset = Some(0);
@ -362,16 +361,16 @@ fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
}; };
match month { match month {
2 => if leap_year { 2 => {
Ok(29) if leap_year {
} else { Ok(29)
Ok(28) } else {
}, Ok(28)
}
}
1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31), 1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31),
4 | 6 | 9 | 11 => Ok(30), 4 | 6 | 9 | 11 => Ok(30),
_ => { _ => Err(ParseError::ImpossibleTimestamp("Invalid month")),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
} }
} }
@ -425,9 +424,7 @@ impl YMD {
Some(YMDLabel::Month) => { Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month")) return Err(ParseError::ImpossibleTimestamp("Invalid month"))
} }
Some(YMDLabel::Day) => { Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
} }
} }
@ -439,9 +436,7 @@ impl YMD {
Some(YMDLabel::Month) => { Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month")) return Err(ParseError::ImpossibleTimestamp("Invalid month"))
} }
Some(YMDLabel::Day) => { Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
} }
} }
@ -502,19 +497,15 @@ impl YMD {
} }
if self._ymd.len() != strids.len() { if self._ymd.len() != strids.len() {
return Err(ParseError::YearMonthDayError("Tried to resolve year, month, and day without enough information")); return Err(ParseError::YearMonthDayError(
"Tried to resolve year, month, and day without enough information",
));
} }
Ok(( Ok((
strids strids.get(&YMDLabel::Year).map(|i| self._ymd[*i]),
.get(&YMDLabel::Year) strids.get(&YMDLabel::Month).map(|i| self._ymd[*i]),
.map(|i| self._ymd[*i]), strids.get(&YMDLabel::Day).map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Month)
.map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Day)
.map(|i| self._ymd[*i]),
)) ))
} }
@ -527,28 +518,24 @@ impl YMD {
let len_ymd = self._ymd.len(); let len_ymd = self._ymd.len();
let mut strids: HashMap<YMDLabel, usize> = HashMap::new(); let mut strids: HashMap<YMDLabel, usize> = HashMap::new();
self.ystridx self.ystridx.map(|u| strids.insert(YMDLabel::Year, u));
.map(|u| strids.insert(YMDLabel::Year, u)); self.mstridx.map(|u| strids.insert(YMDLabel::Month, u));
self.mstridx self.dstridx.map(|u| strids.insert(YMDLabel::Day, u));
.map(|u| strids.insert(YMDLabel::Month, u));
self.dstridx
.map(|u| strids.insert(YMDLabel::Day, u));
// TODO: More Rustiomatic way of doing this? // TODO: More Rustiomatic way of doing this?
if len_ymd == strids.len() && !strids.is_empty() if len_ymd == strids.len() && !strids.is_empty() || (len_ymd == 3 && strids.len() == 2) {
|| (len_ymd == 3 && strids.len() == 2)
{
return self.resolve_from_stridxs(&mut strids); return self.resolve_from_stridxs(&mut strids);
}; };
// Received year, month, day, and ??? // Received year, month, day, and ???
if len_ymd > 3 { if len_ymd > 3 {
return Err(ParseError::YearMonthDayError("Received extra tokens in resolving year, month, and day")); return Err(ParseError::YearMonthDayError(
"Received extra tokens in resolving year, month, and day",
));
} }
match (len_ymd, self.mstridx) { match (len_ymd, self.mstridx) {
(1, Some(val)) | (1, Some(val)) | (2, Some(val)) => {
(2, Some(val)) => {
let other = if len_ymd == 1 { let other = if len_ymd == 1 {
self._ymd[0] self._ymd[0]
} else { } else {
@ -558,7 +545,7 @@ impl YMD {
return Ok((Some(other), Some(self._ymd[val]), None)); return Ok((Some(other), Some(self._ymd[val]), None));
} }
return Ok((None, Some(self._ymd[val]), Some(other))); return Ok((None, Some(self._ymd[val]), Some(other)));
}, }
(2, None) => { (2, None) => {
if self._ymd[0] > 31 { if self._ymd[0] > 31 {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None)); return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None));
@ -570,28 +557,29 @@ impl YMD {
return Ok((None, Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((None, Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((None, Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((None, Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(3, Some(0)) => { (3, Some(0)) => {
if self._ymd[1] > 31 { if self._ymd[1] > 31 {
return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2]))); return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(3, Some(1)) => { (3, Some(1)) => {
if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) { if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2]))); return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
}, }
(3, Some(2)) => { (3, Some(2)) => {
// It was in the original docs, so: WTF!? // It was in the original docs, so: WTF!?
if self._ymd[1] > 31 { if self._ymd[1] > 31 {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1]))); return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
}, }
(3, None) => { (3, None) => {
if self._ymd[0] > 31 || self.ystridx == Some(0) if self._ymd[0] > 31
|| self.ystridx == Some(0)
|| (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31) || (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31)
{ {
if dayfirst && self._ymd[2] <= 12 { if dayfirst && self._ymd[2] <= 12 {
@ -602,8 +590,10 @@ impl YMD {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(_, _) => { return Ok((None, None, None)); }, (_, _) => {
return Ok((None, None, None));
}
} }
} }
} }
@ -635,7 +625,7 @@ pub struct Parser {
impl Parser { impl Parser {
/// Create a new `Parser` instance using the provided `ParserInfo`. /// Create a new `Parser` instance using the provided `ParserInfo`.
/// ///
/// This method allows you to set up a parser to handle different /// This method allows you to set up a parser to handle different
/// names for days of the week, months, etc., enabling customization /// names for days of the week, months, etc., enabling customization
/// for different languages or extra values. /// for different languages or extra values.
@ -646,27 +636,27 @@ impl Parser {
/// Main method to trigger parsing of a string using the previously-provided /// Main method to trigger parsing of a string using the previously-provided
/// parser information. Returns a naive timestamp along with timezone and /// parser information. Returns a naive timestamp along with timezone and
/// unused tokens if available. /// unused tokens if available.
/// ///
/// `dayfirst` and `yearfirst` force parser behavior in the event of ambiguous /// `dayfirst` and `yearfirst` force parser behavior in the event of ambiguous
/// dates. Consider the following scenarios where we parse the string '01.02.03' /// dates. Consider the following scenarios where we parse the string '01.02.03'
/// ///
/// - `dayfirst=Some(true)`, `yearfirst=None`: Results in `February 2, 2003` /// - `dayfirst=Some(true)`, `yearfirst=None`: Results in `February 2, 2003`
/// - `dayfirst=None`, `yearfirst=Some(true)`: Results in `February 3, 2001` /// - `dayfirst=None`, `yearfirst=Some(true)`: Results in `February 3, 2001`
/// - `dayfirst=Some(true)`, `yearfirst=Some(true)`: Results in `March 2, 2001` /// - `dayfirst=Some(true)`, `yearfirst=Some(true)`: Results in `March 2, 2001`
/// ///
/// `fuzzy` enables fuzzy parsing mode, allowing the parser to skip tokens if /// `fuzzy` enables fuzzy parsing mode, allowing the parser to skip tokens if
/// they are unrecognized. However, the unused tokens will not be returned /// they are unrecognized. However, the unused tokens will not be returned
/// unless `fuzzy_with_tokens` is set as `true`. /// unless `fuzzy_with_tokens` is set as `true`.
/// ///
/// `default` is the timestamp used to infer missing values, and is midnight /// `default` is the timestamp used to infer missing values, and is midnight
/// of the current day by default. For example, when parsing the text '2003', /// of the current day by default. For example, when parsing the text '2003',
/// we will use the current month and day as a default value, leading to a /// we will use the current month and day as a default value, leading to a
/// result of 'March 3, 2003' if the function was run using a default of /// result of 'March 3, 2003' if the function was run using a default of
/// March 3rd. /// March 3rd.
/// ///
/// `ignoretz` forces the parser to ignore timezone information even if it /// `ignoretz` forces the parser to ignore timezone information even if it
/// is recognized in the time string /// is recognized in the time string
/// ///
/// `tzinfos` is a map of timezone names to the offset seconds. For example, /// `tzinfos` is a map of timezone names to the offset seconds. For example,
/// the parser would ignore the 'EST' part of the string in '10 AM EST' /// the parser would ignore the 'EST' part of the string in '10 AM EST'
/// unless you added a `tzinfos` map of `{"EST": "14400"}`. Please note that /// unless you added a `tzinfos` map of `{"EST": "14400"}`. Please note that
@ -758,7 +748,9 @@ impl Parser {
} }
i += 2; i += 2;
} else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " " } else if i + 4 < len_l
&& l[i + 1] == l[i + 3]
&& l[i + 3] == " "
&& self.info.pertain_index(&l[i + 2]) && self.info.pertain_index(&l[i + 2])
{ {
// Jan of 01 // Jan of 01
@ -796,7 +788,7 @@ impl Parser {
} else { } else {
"+".to_owned() "+".to_owned()
}; };
l[i+1] = item; l[i + 1] = item;
res.tzoffset = None; res.tzoffset = None;
@ -832,8 +824,11 @@ impl Parser {
Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60)); Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));
let tzname = res.tzname.clone(); let tzname = res.tzname.clone();
if i + 5 < len_l && self.info.jump_index(&l[i + 2]) && l[i + 3] == "(" if i + 5 < len_l
&& l[i + 5] == ")" && 3 <= l[i + 4].len() && self.info.jump_index(&l[i + 2])
&& l[i + 3] == "("
&& l[i + 5] == ")"
&& 3 <= l[i + 4].len()
&& self.could_be_tzname(res.hour, &tzname, None, &l[i + 4]) && self.could_be_tzname(res.hour, &tzname, None, &l[i + 4])
{ {
// (GMT) // (GMT)
@ -879,7 +874,10 @@ impl Parser {
.chars() .chars()
.all(|c| 65u8 as char <= c && c <= 90u8 as char); .all(|c| 65u8 as char <= c && c <= 90u8 as char);
hour.is_some() && tzname.is_none() && tzoffset.is_none() && token.len() <= 5 hour.is_some()
&& tzname.is_none()
&& tzoffset.is_none()
&& token.len() <= 5
&& all_ascii_upper && all_ascii_upper
} }
@ -903,7 +901,11 @@ impl Parser {
Ok(val_is_ampm) Ok(val_is_ampm)
} }
fn build_naive(&self, res: &ParsingResult, default: &NaiveDateTime) -> ParseResult<NaiveDateTime> { fn build_naive(
&self,
res: &ParsingResult,
default: &NaiveDateTime,
) -> ParseResult<NaiveDateTime> {
let y = res.year.unwrap_or_else(|| default.year()); let y = res.year.unwrap_or_else(|| default.year());
let m = res.month.unwrap_or_else(|| default.month() as i32) as u32; let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
@ -923,7 +925,10 @@ impl Parser {
let d = NaiveDate::from_ymd( let d = NaiveDate::from_ymd(
y, y,
m, m,
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?) min(
res.day.unwrap_or(default.day() as i32) as u32,
days_in_month(y, m as i32)?,
),
); );
let d = d + d_offset; let d = d + d_offset;
@ -931,21 +936,23 @@ impl Parser {
let hour = res.hour.unwrap_or(default.hour() as i32) as u32; let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
let minute = res.minute.unwrap_or(default.minute() as i32) as u32; let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
let second = res.second.unwrap_or(default.second() as i32) as u32; let second = res.second.unwrap_or(default.second() as i32) as u32;
let microsecond = res.microsecond let microsecond = res
.microsecond
.unwrap_or(default.timestamp_subsec_micros() as i32) as u32; .unwrap_or(default.timestamp_subsec_micros() as i32) as u32;
let t = NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| { let t =
if hour >= 24 { NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
ParseError::ImpossibleTimestamp("Invalid hour") if hour >= 24 {
} else if minute >= 60 { ParseError::ImpossibleTimestamp("Invalid hour")
ParseError::ImpossibleTimestamp("Invalid minute") } else if minute >= 60 {
} else if second >= 60 { ParseError::ImpossibleTimestamp("Invalid minute")
ParseError::ImpossibleTimestamp("Invalid second") } else if second >= 60 {
} else if microsecond >= 2_000_000 { ParseError::ImpossibleTimestamp("Invalid second")
ParseError::ImpossibleTimestamp("Invalid microsecond") } else if microsecond >= 2_000_000 {
} else { ParseError::ImpossibleTimestamp("Invalid microsecond")
unreachable!(); } else {
} unreachable!();
})?; }
})?;
Ok(NaiveDateTime::new(d, t)) Ok(NaiveDateTime::new(d, t))
} }
@ -959,8 +966,10 @@ impl Parser {
if let Some(offset) = res.tzoffset { if let Some(offset) = res.tzoffset {
Ok(Some(FixedOffset::east(offset))) Ok(Some(FixedOffset::east(offset)))
} else if res.tzoffset == None } else if res.tzoffset == None
&& (res.tzname == Some(" ".to_owned()) || res.tzname == Some(".".to_owned()) && (res.tzname == Some(" ".to_owned())
|| res.tzname == Some("-".to_owned()) || res.tzname == None) || res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned())
|| res.tzname == None)
{ {
Ok(None) Ok(None)
} else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) { } else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
@ -1000,7 +1009,9 @@ impl Parser {
// TODO: I miss the `x in y` syntax // TODO: I miss the `x in y` syntax
// TODO: Decompose this logic a bit // TODO: Decompose this logic a bit
if ymd.len() == 3 && (len_li == 2 || len_li == 4) && res.hour.is_none() if ymd.len() == 3
&& (len_li == 2 || len_li == 4)
&& res.hour.is_none()
&& (idx + 1 >= len_l && (idx + 1 >= len_l
|| (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none())) || (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none()))
{ {
@ -1031,7 +1042,11 @@ impl Parser {
} else if vec![8, 12, 14].contains(&len_li) { } else if vec![8, 12, 14].contains(&len_li) {
// YYMMDD // YYMMDD
let s = &tokens[idx]; let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?; ymd.append(
s[..4].parse::<i32>().unwrap(),
&s[..4],
Some(YMDLabel::Year),
)?;
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?; ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?; ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?;
@ -1088,10 +1103,10 @@ impl Parser {
if let Some(value) = info.month_index(&tokens[idx + 4]) { if let Some(value) = info.month_index(&tokens[idx + 4]) {
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?; ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
} else if let Ok(val) = tokens[idx + 4].parse::<i32>() { } else if let Ok(val) = tokens[idx + 4].parse::<i32>() {
ymd.append(val, &tokens[idx + 4], None)?; ymd.append(val, &tokens[idx + 4], None)?;
} else { } else {
return Err(ParseError::UnrecognizedFormat); return Err(ParseError::UnrecognizedFormat);
} }
idx += 2; idx += 2;
} }
@ -1169,7 +1184,7 @@ impl Parser {
len_l - 2 len_l - 2
} else if idx > 1 { } else if idx > 1 {
idx - 2 idx - 2
} else if len_l == 0{ } else if len_l == 0 {
panic!("Attempting to find_hms_index() wih no tokens."); panic!("Attempting to find_hms_index() wih no tokens.");
} else { } else {
0 0
@ -1177,13 +1192,18 @@ impl Parser {
if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() { if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() {
hms_idx = Some(idx + 1) hms_idx = Some(idx + 1)
} else if allow_jump && idx + 2 < len_l && tokens[idx + 1] == " " } else if allow_jump
&& idx + 2 < len_l
&& tokens[idx + 1] == " "
&& info.hms_index(&tokens[idx + 2]).is_some() && info.hms_index(&tokens[idx + 2]).is_some()
{ {
hms_idx = Some(idx + 2) hms_idx = Some(idx + 2)
} else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() { } else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() {
hms_idx = Some(idx - 1) hms_idx = Some(idx - 1)
} else if len_l > 0 && idx > 0 && idx == len_l - 1 && tokens[idx - 1] == " " } else if len_l > 0
&& idx > 0
&& idx == len_l - 1
&& tokens[idx - 1] == " "
&& info.hms_index(&tokens[idx_minus_two]).is_some() && info.hms_index(&tokens[idx_minus_two]).is_some()
{ {
hms_idx = Some(idx - 2) hms_idx = Some(idx - 2)
@ -1288,7 +1308,7 @@ fn ljust(s: &str, chars: usize, replace: char) -> String {
/// Main entry point for using `dtparse`. The parse function is responsible for /// Main entry point for using `dtparse`. The parse function is responsible for
/// taking in a string representing some time value, and turning it into /// taking in a string representing some time value, and turning it into
/// a timestamp with optional timezone information if it can be identified. /// a timestamp with optional timezone information if it can be identified.
/// ///
/// The default implementation assumes English values for names of months, /// The default implementation assumes English values for names of months,
/// days of the week, etc. It is equivalent to Python's `dateutil.parser.parse()` /// days of the week, etc. It is equivalent to Python's `dateutil.parser.parse()`
pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> { pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {

View File

@ -7,18 +7,36 @@ use Parser;
#[test] #[test]
fn test_fuzz() { fn test_fuzz() {
assert_eq!(
assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::ImpossibleTimestamp("Invalid month"))); parse("\x2D\x38\x31\x39\x34\x38\x34"),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
);
// Garbage in the third delimited field // Garbage in the third delimited field
assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"), assert_eq!(
Err(ParseError::UnrecognizedFormat)); parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
Err(ParseError::UnrecognizedFormat)
);
// OverflowError: Python int too large to convert to C long // OverflowError: Python int too large to convert to C long
// assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour)); // assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour));
let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0); let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0);
let p = Parser::default(); let p = Parser::default();
let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, &HashMap::new()).unwrap(); let res = p
.parse(
"\x0D\x31",
None,
None,
false,
false,
Some(&default),
false,
&HashMap::new(),
)
.unwrap();
assert_eq!(res.0, default); assert_eq!(res.0, default);
assert_eq!(parse("\x2D\x2D\x32\x31\x38\x6D"), Err(ParseError::ImpossibleTimestamp("Invalid minute"))); assert_eq!(
parse("\x2D\x2D\x32\x31\x38\x6D"),
Err(ParseError::ImpossibleTimestamp("Invalid minute"))
);
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,3 @@
//! This code has been generated by running the `build_pycompat_tokenizer.py` script //! This code has been generated by running the `build_pycompat_tokenizer.py` script
//! in the repository root. Please do not edit it, as your edits will be destroyed //! in the repository root. Please do not edit it, as your edits will be destroyed
//! upon re-running code generation. //! upon re-running code generation.
@ -12,7 +11,9 @@ fn tokenize_assert(test_str: &str, comparison: Vec<&str>) {
#[test] #[test]
fn test_tokenize0() { fn test_tokenize0() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28",
];
tokenize_assert("Thu Sep 25 10:36:28", comp); tokenize_assert("Thu Sep 25 10:36:28", comp);
} }
@ -294,7 +295,9 @@ fn test_tokenize46() {
#[test] #[test]
fn test_tokenize47() { fn test_tokenize47() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 2003", comp);
} }
@ -306,7 +309,9 @@ fn test_tokenize48() {
#[test] #[test]
fn test_tokenize49() { fn test_tokenize49() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41",
];
tokenize_assert("2003-09-25T10:49:41", comp); tokenize_assert("2003-09-25T10:49:41", comp);
} }
@ -354,7 +359,9 @@ fn test_tokenize56() {
#[test] #[test]
fn test_tokenize57() { fn test_tokenize57() {
let comp = vec!["2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502"]; let comp = vec![
"2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502",
];
tokenize_assert("2003-09-25 10:49:41,502", comp); tokenize_assert("2003-09-25 10:49:41,502", comp);
} }
@ -510,7 +517,10 @@ fn test_tokenize82() {
#[test] #[test]
fn test_tokenize83() { fn test_tokenize83() {
let comp = vec![" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":", "01", ":", "02", " ", " ", " ", "am", " ", " "]; let comp = vec![
" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":",
"01", ":", "02", " ", " ", " ", "am", " ", " ",
];
tokenize_assert(" July 4 , 1976 12:01:02 am ", comp); tokenize_assert(" July 4 , 1976 12:01:02 am ", comp);
} }
@ -522,7 +532,9 @@ fn test_tokenize84() {
#[test] #[test]
fn test_tokenize85() { fn test_tokenize85() {
let comp = vec!["1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM"]; let comp = vec![
"1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM",
];
tokenize_assert("1996.July.10 AD 12:08 PM", comp); tokenize_assert("1996.July.10 AD 12:08 PM", comp);
} }
@ -558,25 +570,33 @@ fn test_tokenize90() {
#[test] #[test]
fn test_tokenize91() { fn test_tokenize91() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp); tokenize_assert("0:01:02 on July 4, 1976", comp);
} }
#[test] #[test]
fn test_tokenize92() { fn test_tokenize92() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp); tokenize_assert("0:01:02 on July 4, 1976", comp);
} }
#[test] #[test]
fn test_tokenize93() { fn test_tokenize93() {
let comp = vec!["July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am"]; let comp = vec![
"July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am",
];
tokenize_assert("July 4, 1976 12:01:02 am", comp); tokenize_assert("July 4, 1976 12:01:02 am", comp);
} }
#[test] #[test]
fn test_tokenize94() { fn test_tokenize94() {
let comp = vec!["Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995"]; let comp = vec![
"Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995",
];
tokenize_assert("Mon Jan 2 04:24:27 1995", comp); tokenize_assert("Mon Jan 2 04:24:27 1995", comp);
} }
@ -588,7 +608,9 @@ fn test_tokenize95() {
#[test] #[test]
fn test_tokenize96() { fn test_tokenize96() {
let comp = vec!["Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578"]; let comp = vec![
"Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578",
];
tokenize_assert("Jan 1 1999 11:23:34.578", comp); tokenize_assert("Jan 1 1999 11:23:34.578", comp);
} }
@ -618,13 +640,17 @@ fn test_tokenize100() {
#[test] #[test]
fn test_tokenize101() { fn test_tokenize101() {
let comp = vec!["0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; let comp = vec![
"0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0099-01-01T00:00:00", comp); tokenize_assert("0099-01-01T00:00:00", comp);
} }
#[test] #[test]
fn test_tokenize102() { fn test_tokenize102() {
let comp = vec!["0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; let comp = vec![
"0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0031-01-01T00:00:00", comp); tokenize_assert("0031-01-01T00:00:00", comp);
} }
@ -666,31 +692,42 @@ fn test_tokenize108() {
#[test] #[test]
fn test_tokenize109() { fn test_tokenize109() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
} }
#[test] #[test]
fn test_tokenize110() { fn test_tokenize110() {
let comp = vec!["2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu"]; let comp = vec![
"2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu",
];
tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp); tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp);
} }
#[test] #[test]
fn test_tokenize111() { fn test_tokenize111() {
let comp = vec!["Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-", "0300"]; let comp = vec![
"Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-",
"0300",
];
tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp); tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp);
} }
#[test] #[test]
fn test_tokenize112() { fn test_tokenize112() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41.5-03:00", comp); tokenize_assert("2003-09-25T10:49:41.5-03:00", comp);
} }
#[test] #[test]
fn test_tokenize113() { fn test_tokenize113() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41-03:00", comp); tokenize_assert("2003-09-25T10:49:41-03:00", comp);
} }
@ -708,19 +745,27 @@ fn test_tokenize115() {
#[test] #[test]
fn test_tokenize116() { fn test_tokenize116() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3"]; let comp = vec![
"2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3",
];
tokenize_assert("2018-08-10 10:00:00 UTC+3", comp); tokenize_assert("2018-08-10 10:00:00 UTC+3", comp);
} }
#[test] #[test]
fn test_tokenize117() { fn test_tokenize117() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-", "4"]; let comp = vec![
"2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-",
"4",
];
tokenize_assert("2018-08-10 03:36:47 PM GMT-4", comp); tokenize_assert("2018-08-10 03:36:47 PM GMT-4", comp);
} }
#[test] #[test]
fn test_tokenize118() { fn test_tokenize118() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-", "02", ":", "00"]; let comp = vec![
"2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-",
"02", ":", "00",
];
tokenize_assert("2018-08-10 04:15:00 AM Z-02:00", comp); tokenize_assert("2018-08-10 04:15:00 AM Z-02:00", comp);
} }
@ -828,91 +873,213 @@ fn test_tokenize135() {
#[test] #[test]
fn test_tokenize136() { fn test_tokenize136() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
} }
#[test] #[test]
fn test_tokenize137() { fn test_tokenize137() {
let comp = vec!["1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ", "PDT"]; let comp = vec![
"1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ",
"PDT",
];
tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp); tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp);
} }
#[test] #[test]
fn test_tokenize138() { fn test_tokenize138() {
let comp = vec!["Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30", ":", "42", "pm", " ", "PST"]; let comp = vec![
"Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30",
":", "42", "pm", " ", "PST",
];
tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp); tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp);
} }
#[test] #[test]
fn test_tokenize139() { fn test_tokenize139() {
let comp = vec!["November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am", " ", "EST"]; let comp = vec![
"November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am",
" ", "EST",
];
tokenize_assert("November 5, 1994, 8:15:30 am EST", comp); tokenize_assert("November 5, 1994, 8:15:30 am EST", comp);
} }
#[test] #[test]
fn test_tokenize140() { fn test_tokenize140() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00"]; let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00",
];
tokenize_assert("1994-11-05T08:15:30-05:00", comp); tokenize_assert("1994-11-05T08:15:30-05:00", comp);
} }
#[test] #[test]
fn test_tokenize141() { fn test_tokenize141() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z"]; let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z",
];
tokenize_assert("1994-11-05T08:15:30Z", comp); tokenize_assert("1994-11-05T08:15:30Z", comp);
} }
#[test] #[test]
fn test_tokenize142() { fn test_tokenize142() {
let comp = vec!["1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z"]; let comp = vec![
"1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z",
];
tokenize_assert("1976-07-04T00:01:02Z", comp); tokenize_assert("1976-07-04T00:01:02Z", comp);
} }
#[test] #[test]
fn test_tokenize143() { fn test_tokenize143() {
let comp = vec!["Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995"]; let comp = vec![
"Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995",
];
tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp); tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp);
} }
#[test] #[test]
fn test_tokenize144() { fn test_tokenize144() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."]; let comp = vec![
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp); "Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
} }
#[test] #[test]
fn test_tokenize145() { fn test_tokenize145() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."]; let comp = vec![
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp); "Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
} }
#[test] #[test]
fn test_tokenize146() { fn test_tokenize146() {
let comp = vec!["I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ", "1974"]; let comp = vec![
"I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ",
"1974",
];
tokenize_assert("I have a meeting on March 1, 1974", comp); tokenize_assert("I have a meeting on March 1, 1974", comp);
} }
#[test] #[test]
fn test_tokenize147() { fn test_tokenize147() {
let comp = vec!["On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ", "going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ", "Mars"]; let comp = vec![
tokenize_assert("On June 8th, 2020, I am going to be the first man on Mars", comp); "On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ",
"going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ",
"Mars",
];
tokenize_assert(
"On June 8th, 2020, I am going to be the first man on Mars",
comp,
);
} }
#[test] #[test]
fn test_tokenize148() { fn test_tokenize148() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003"]; let comp = vec![
tokenize_assert("Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", comp); "Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset",
" ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",",
" ", "2003",
];
tokenize_assert(
"Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003",
comp,
);
} }
#[test] #[test]
fn test_tokenize149() { fn test_tokenize149() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset"]; let comp = vec![
tokenize_assert("Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", comp); "Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December",
" ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on",
" ", "Sunset",
];
tokenize_assert(
"Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset",
comp,
);
} }
#[test] #[test]
fn test_tokenize150() { fn test_tokenize150() {
let comp = vec!["Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ", "going", " ", "to", " ", "see", " ", "you", " ", "there", "?"]; let comp = vec![
"Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ",
"going", " ", "to", " ", "see", " ", "you", " ", "there", "?",
];
tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp); tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp);
} }

View File

@ -14,7 +14,6 @@ pub(crate) enum ParseState {
} }
impl Tokenizer { impl Tokenizer {
pub(crate) fn new(parse_string: &str) -> Self { pub(crate) fn new(parse_string: &str) -> Self {
Tokenizer { Tokenizer {
token_stack: vec![], token_stack: vec![],
@ -92,7 +91,7 @@ impl Iterator for Tokenizer {
} else { } else {
break; break;
} }
}, }
ParseState::Alpha => { ParseState::Alpha => {
seenletters = true; seenletters = true;
if self.isword(nextchar) { if self.isword(nextchar) {
@ -105,19 +104,21 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::Numeric => { ParseState::Numeric => {
if self.isnum(nextchar) { if self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
token.as_mut().unwrap().push(nextchar); token.as_mut().unwrap().push(nextchar);
} else if nextchar == '.' || (nextchar == ',' && token.as_ref().unwrap().len() >= 2) { } else if nextchar == '.'
|| (nextchar == ',' && token.as_ref().unwrap().len() >= 2)
{
token.as_mut().unwrap().push(nextchar); token.as_mut().unwrap().push(nextchar);
state = ParseState::NumericDecimal; state = ParseState::NumericDecimal;
} else { } else {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::AlphaDecimal => { ParseState::AlphaDecimal => {
seenletters = true; seenletters = true;
if nextchar == '.' || self.isword(nextchar) { if nextchar == '.' || self.isword(nextchar) {
@ -130,7 +131,7 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::NumericDecimal => { ParseState::NumericDecimal => {
if nextchar == '.' || self.isnum(nextchar) { if nextchar == '.' || self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
@ -150,20 +151,25 @@ impl Iterator for Tokenizer {
// We do something slightly different to express the same logic // We do something slightly different to express the same logic
if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal { if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal {
// UNWRAP: The state check guarantees that we have a value // UNWRAP: The state check guarantees that we have a value
let dot_count = token.as_ref().unwrap().chars().filter(|c| *c == '.').count(); let dot_count = token
.as_ref()
.unwrap()
.chars()
.filter(|c| *c == '.')
.count();
let last_char = token.as_ref().unwrap().chars().last(); let last_char = token.as_ref().unwrap().chars().last();
let last_splittable = last_char == Some('.') || last_char == Some(','); let last_splittable = last_char == Some('.') || last_char == Some(',');
if seenletters || dot_count > 1 || last_splittable { if seenletters || dot_count > 1 || last_splittable {
let mut l = self.decimal_split(token.as_ref().unwrap()); let mut l = self.decimal_split(token.as_ref().unwrap());
let remaining = l.split_off(1); let remaining = l.split_off(1);
token = Some(l[0].clone()); token = Some(l[0].clone());
for t in remaining { for t in remaining {
self.token_stack.push(t); self.token_stack.push(t);
} }
} }
if state == ParseState::NumericDecimal && dot_count == 0 { if state == ParseState::NumericDecimal && dot_count == 0 {
token = Some(token.unwrap().replace(',', ".")); token = Some(token.unwrap().replace(',', "."));
} }

View File

@ -1,5 +1,5 @@
use ParseResult;
use ParseError; use ParseError;
use ParseResult;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum DayOfWeek { pub enum DayOfWeek {
@ -9,11 +9,10 @@ pub enum DayOfWeek {
Wednesday, Wednesday,
Thursday, Thursday,
Friday, Friday,
Saturday Saturday,
} }
impl DayOfWeek { impl DayOfWeek {
pub fn to_numeral(&self) -> u32 { pub fn to_numeral(&self) -> u32 {
match *self { match *self {
DayOfWeek::Sunday => 0, DayOfWeek::Sunday => 0,
@ -35,7 +34,7 @@ impl DayOfWeek {
4 => DayOfWeek::Thursday, 4 => DayOfWeek::Thursday,
5 => DayOfWeek::Friday, 5 => DayOfWeek::Friday,
6 => DayOfWeek::Saturday, 6 => DayOfWeek::Saturday,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
} }
} }
@ -59,12 +58,12 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => { 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => {
let c = year / 100; let c = year / 100;
(c, year - 100 * c) (c, year - 100 * c)
}, }
1 | 2 => { 1 | 2 => {
let c = (year - 1) / 100; let c = (year - 1) / 100;
(c, year - 1 - 100 * c) (c, year - 1 - 100 * c)
}, }
_ => return Err(ParseError::ImpossibleTimestamp("Invalid month")) _ => return Err(ParseError::ImpossibleTimestamp("Invalid month")),
}; };
let e = match month { let e = match month {
@ -75,7 +74,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
8 => 1, 8 => 1,
9 | 12 => 4, 9 | 12 => 4,
10 => 6, 10 => 6,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
}; };
// This implementation is Gregorian-only. // This implementation is Gregorian-only.
@ -84,7 +83,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
1 => 5, 1 => 5,
2 => 3, 2 => 3,
3 => 1, 3 => 1,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
}; };
match (day + e + f + g + g / 4) % 7 { match (day + e + f + g + g / 4) % 7 {
@ -95,7 +94,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
4 => Ok(DayOfWeek::Thursday), 4 => Ok(DayOfWeek::Thursday),
5 => Ok(DayOfWeek::Friday), 5 => Ok(DayOfWeek::Friday),
6 => Ok(DayOfWeek::Saturday), 6 => Ok(DayOfWeek::Saturday),
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
} }
} }
@ -114,7 +113,6 @@ mod test {
#[test] #[test]
fn weekday_difference() { fn weekday_difference() {
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Sunday), 0); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Sunday), 0);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Monday), 1); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Monday), 1);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Tuesday), 2); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Tuesday), 2);
@ -129,4 +127,4 @@ mod test {
assert_eq!(DayOfWeek::Friday.difference(&DayOfWeek::Sunday), 2); assert_eq!(DayOfWeek::Friday.difference(&DayOfWeek::Sunday), 2);
assert_eq!(DayOfWeek::Saturday.difference(&DayOfWeek::Sunday), 1); assert_eq!(DayOfWeek::Saturday.difference(&DayOfWeek::Sunday), 1);
} }
} }