1
0
mirror of https://github.com/bspeice/dtparse synced 2024-12-22 04:18:09 -05:00

Merge pull request #19 from bspeice/tz_fix

Attempt to read timezones from chrono-tz
This commit is contained in:
bspeice 2019-11-29 15:45:44 -05:00 committed by GitHub
commit 9f1b8d4971
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 2846 additions and 880 deletions

View File

@ -78,24 +78,7 @@ matrix:
# Historical Rust versions # Historical Rust versions
- env: TARGET=x86_64-unknown-linux-gnu - env: TARGET=x86_64-unknown-linux-gnu
rust: 1.21.0 rust: 1.28.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.22.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.23.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.24.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.25.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.26.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.27.0
# WASM support
- env: TARGET=asmjs-unknown-emscripten USE_CARGO_WEB=true
rust: nightly
before_install: before_install:
- set -e - set -e

View File

@ -18,6 +18,7 @@ name = "dtparse"
[dependencies] [dependencies]
chrono = "0.4" chrono = "0.4"
chrono-tz = "0.5"
lazy_static = "1.1" lazy_static = "1.1"
num-traits = "0.2" num-traits = "0.2"
rust_decimal = "^0.10.1" rust_decimal = "^0.10.1"

View File

@ -67,7 +67,7 @@ Further examples can be found in the [examples](examples) directory on internati
# Usage # Usage
`dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX, `dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
tested against them. tested against them.

View File

@ -8,7 +8,6 @@ use dtparse::ParserInfo;
use std::collections::HashMap; use std::collections::HashMap;
fn main() { fn main() {
// In this example, we'll just swap the default "months" parameter // In this example, we'll just swap the default "months" parameter
// with a version in Russian. Lovingly taken from: // with a version in Russian. Lovingly taken from:
// https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244 // https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244
@ -26,14 +25,24 @@ fn main() {
vec!["сен", "Сентябрь"], vec!["сен", "Сентябрь"],
vec!["окт", "Октябрь"], vec!["окт", "Октябрь"],
vec!["ноя", "Ноябрь"], vec!["ноя", "Ноябрь"],
vec!["дек", "Декабрь"] vec!["дек", "Декабрь"],
]); ]);
let p = Parser::new(info); let p = Parser::new(info);
assert_eq!( assert_eq!(
p.parse("10 Сентябрь 2015 10:20", None, None, false, false, None, false, &HashMap::new()) p.parse(
.unwrap().0, "10 Сентябрь 2015 10:20",
None,
None,
false,
false,
None,
false,
&HashMap::new()
)
.unwrap()
.0,
NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0) NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0)
); );
} }

View File

@ -4,23 +4,23 @@
//! # dtparse //! # dtparse
//! The fully-featured "even I couldn't understand that" time parser. //! The fully-featured "even I couldn't understand that" time parser.
//! Designed to take in strings and give back sensible dates and times. //! Designed to take in strings and give back sensible dates and times.
//! //!
//! dtparse has its foundations in the [`dateutil`](dateutil) library for //! dtparse has its foundations in the [`dateutil`](dateutil) library for
//! Python, which excels at taking "interesting" strings and trying to make //! Python, which excels at taking "interesting" strings and trying to make
//! sense of the dates and times they contain. A couple of quick examples //! sense of the dates and times they contain. A couple of quick examples
//! from the test cases should give some context: //! from the test cases should give some context:
//! //!
//! ```rust,ignore (tests-dont-compile-on-old-rust) //! ```rust,ignore (tests-dont-compile-on-old-rust)
//! # extern crate chrono; //! # extern crate chrono;
//! # extern crate dtparse; //! # extern crate dtparse;
//! use chrono::prelude::*; //! use chrono::prelude::*;
//! use dtparse::parse; //! use dtparse::parse;
//! //!
//! assert_eq!( //! assert_eq!(
//! parse("2008.12.30"), //! parse("2008.12.30"),
//! Ok((NaiveDate::from_ymd(2008, 12, 30).and_hms(0, 0, 0), None)) //! Ok((NaiveDate::from_ymd(2008, 12, 30).and_hms(0, 0, 0), None))
//! ); //! );
//! //!
//! // It can even handle timezones! //! // It can even handle timezones!
//! assert_eq!( //! assert_eq!(
//! parse("January 4, 2024; 18:30:04 +02:00"), //! parse("January 4, 2024; 18:30:04 +02:00"),
@ -30,17 +30,17 @@
//! )) //! ))
//! ); //! );
//! ``` //! ```
//! //!
//! And we can even handle fuzzy strings where dates/times aren't the //! And we can even handle fuzzy strings where dates/times aren't the
//! only content if we dig into the implementation a bit! //! only content if we dig into the implementation a bit!
//! //!
//! ```rust,ignore (tests-dont-compile-on-old-rust) //! ```rust,ignore (tests-dont-compile-on-old-rust)
//! # extern crate chrono; //! # extern crate chrono;
//! # extern crate dtparse; //! # extern crate dtparse;
//! use chrono::prelude::*; //! use chrono::prelude::*;
//! use dtparse::Parser; //! use dtparse::Parser;
//! # use std::collections::HashMap; //! # use std::collections::HashMap;
//! //!
//! let mut p = Parser::default(); //! let mut p = Parser::default();
//! assert_eq!( //! assert_eq!(
//! p.parse( //! p.parse(
@ -58,21 +58,22 @@
//! )) //! ))
//! ); //! );
//! ``` //! ```
//! //!
//! Further examples can be found in the `examples` directory on international usage. //! Further examples can be found in the `examples` directory on international usage.
//! //!
//! # Usage //! # Usage
//! //!
//! `dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX, //! `dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
//! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not //! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
//! tested against them. //! tested against them.
//! //!
//! [dateutil]: https://github.com/dateutil/dateutil //! [dateutil]: https://github.com/dateutil/dateutil
#[macro_use] #[macro_use]
extern crate lazy_static; extern crate lazy_static;
extern crate chrono; extern crate chrono;
extern crate chrono_tz;
extern crate num_traits; extern crate num_traits;
extern crate rust_decimal; extern crate rust_decimal;
@ -83,12 +84,15 @@ use chrono::Local;
use chrono::NaiveDate; use chrono::NaiveDate;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use chrono::NaiveTime; use chrono::NaiveTime;
use chrono::Offset;
use chrono::TimeZone;
use chrono::Timelike; use chrono::Timelike;
use chrono_tz::Tz;
use num_traits::cast::ToPrimitive; use num_traits::cast::ToPrimitive;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use rust_decimal::Error as DecimalError; use rust_decimal::Error as DecimalError;
use std::collections::HashMap;
use std::cmp::min; use std::cmp::min;
use std::collections::HashMap;
use std::num::ParseIntError; use std::num::ParseIntError;
use std::str::FromStr; use std::str::FromStr;
use std::vec::Vec; use std::vec::Vec;
@ -174,7 +178,7 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
} }
/// Container for specific tokens to be recognized during parsing. /// Container for specific tokens to be recognized during parsing.
/// ///
/// - `jump`: Values that indicate the end of a token for parsing and can be ignored /// - `jump`: Values that indicate the end of a token for parsing and can be ignored
/// - `weekday`: Names of the days of the week /// - `weekday`: Names of the days of the week
/// - `months`: Names of the months /// - `months`: Names of the months
@ -187,7 +191,7 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
/// - `yearfirst`: Upon encountering an ambiguous date, treat the first value as the year /// - `yearfirst`: Upon encountering an ambiguous date, treat the first value as the year
/// - `year`: The current year /// - `year`: The current year
/// - `century`: The first year in the current century /// - `century`: The first year in the current century
/// ///
/// Please note that if both `dayfirst` and `yearfirst` are true, years take precedence /// Please note that if both `dayfirst` and `yearfirst` are true, years take precedence
/// and will be parsed as "YDM" /// and will be parsed as "YDM"
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -228,12 +232,10 @@ impl Default for ParserInfo {
let century = year / 100 * 100; let century = year / 100 * 100;
ParserInfo { ParserInfo {
jump: parse_info(vec![ jump: parse_info(vec![vec![
vec![ " ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st",
" ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "nd", "rd", "th",
"st", "nd", "rd", "th", ]]),
],
]),
weekday: parse_info(vec![ weekday: parse_info(vec![
vec!["Mon", "Monday"], vec!["Mon", "Monday"],
vec!["Tue", "Tues", "Tuesday"], vec!["Tue", "Tues", "Tuesday"],
@ -341,7 +343,8 @@ impl ParserInfo {
if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) { if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) {
res.tzname = Some("UTC".to_owned()); res.tzname = Some("UTC".to_owned());
res.tzoffset = Some(0); res.tzoffset = Some(0);
} else if res.tzoffset != Some(0) && res.tzname.is_some() } else if res.tzoffset != Some(0)
&& res.tzname.is_some()
&& self.utczone_index(res.tzname.as_ref().unwrap()) && self.utczone_index(res.tzname.as_ref().unwrap())
{ {
res.tzoffset = Some(0); res.tzoffset = Some(0);
@ -358,16 +361,16 @@ fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
}; };
match month { match month {
2 => if leap_year { 2 => {
Ok(29) if leap_year {
} else { Ok(29)
Ok(28) } else {
}, Ok(28)
}
}
1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31), 1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31),
4 | 6 | 9 | 11 => Ok(30), 4 | 6 | 9 | 11 => Ok(30),
_ => { _ => Err(ParseError::ImpossibleTimestamp("Invalid month")),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
} }
} }
@ -421,9 +424,7 @@ impl YMD {
Some(YMDLabel::Month) => { Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month")) return Err(ParseError::ImpossibleTimestamp("Invalid month"))
} }
Some(YMDLabel::Day) => { Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
} }
} }
@ -435,9 +436,7 @@ impl YMD {
Some(YMDLabel::Month) => { Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month")) return Err(ParseError::ImpossibleTimestamp("Invalid month"))
} }
Some(YMDLabel::Day) => { Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
} }
} }
@ -498,19 +497,15 @@ impl YMD {
} }
if self._ymd.len() != strids.len() { if self._ymd.len() != strids.len() {
return Err(ParseError::YearMonthDayError("Tried to resolve year, month, and day without enough information")); return Err(ParseError::YearMonthDayError(
"Tried to resolve year, month, and day without enough information",
));
} }
Ok(( Ok((
strids strids.get(&YMDLabel::Year).map(|i| self._ymd[*i]),
.get(&YMDLabel::Year) strids.get(&YMDLabel::Month).map(|i| self._ymd[*i]),
.map(|i| self._ymd[*i]), strids.get(&YMDLabel::Day).map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Month)
.map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Day)
.map(|i| self._ymd[*i]),
)) ))
} }
@ -523,28 +518,24 @@ impl YMD {
let len_ymd = self._ymd.len(); let len_ymd = self._ymd.len();
let mut strids: HashMap<YMDLabel, usize> = HashMap::new(); let mut strids: HashMap<YMDLabel, usize> = HashMap::new();
self.ystridx self.ystridx.map(|u| strids.insert(YMDLabel::Year, u));
.map(|u| strids.insert(YMDLabel::Year, u)); self.mstridx.map(|u| strids.insert(YMDLabel::Month, u));
self.mstridx self.dstridx.map(|u| strids.insert(YMDLabel::Day, u));
.map(|u| strids.insert(YMDLabel::Month, u));
self.dstridx
.map(|u| strids.insert(YMDLabel::Day, u));
// TODO: More Rustiomatic way of doing this? // TODO: More Rustiomatic way of doing this?
if len_ymd == strids.len() && !strids.is_empty() if len_ymd == strids.len() && !strids.is_empty() || (len_ymd == 3 && strids.len() == 2) {
|| (len_ymd == 3 && strids.len() == 2)
{
return self.resolve_from_stridxs(&mut strids); return self.resolve_from_stridxs(&mut strids);
}; };
// Received year, month, day, and ??? // Received year, month, day, and ???
if len_ymd > 3 { if len_ymd > 3 {
return Err(ParseError::YearMonthDayError("Received extra tokens in resolving year, month, and day")); return Err(ParseError::YearMonthDayError(
"Received extra tokens in resolving year, month, and day",
));
} }
match (len_ymd, self.mstridx) { match (len_ymd, self.mstridx) {
(1, Some(val)) | (1, Some(val)) | (2, Some(val)) => {
(2, Some(val)) => {
let other = if len_ymd == 1 { let other = if len_ymd == 1 {
self._ymd[0] self._ymd[0]
} else { } else {
@ -554,7 +545,7 @@ impl YMD {
return Ok((Some(other), Some(self._ymd[val]), None)); return Ok((Some(other), Some(self._ymd[val]), None));
} }
return Ok((None, Some(self._ymd[val]), Some(other))); return Ok((None, Some(self._ymd[val]), Some(other)));
}, }
(2, None) => { (2, None) => {
if self._ymd[0] > 31 { if self._ymd[0] > 31 {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None)); return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None));
@ -566,28 +557,29 @@ impl YMD {
return Ok((None, Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((None, Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((None, Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((None, Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(3, Some(0)) => { (3, Some(0)) => {
if self._ymd[1] > 31 { if self._ymd[1] > 31 {
return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2]))); return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(3, Some(1)) => { (3, Some(1)) => {
if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) { if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2]))); return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
}, }
(3, Some(2)) => { (3, Some(2)) => {
// It was in the original docs, so: WTF!? // It was in the original docs, so: WTF!?
if self._ymd[1] > 31 { if self._ymd[1] > 31 {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1]))); return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
}, }
(3, None) => { (3, None) => {
if self._ymd[0] > 31 || self.ystridx == Some(0) if self._ymd[0] > 31
|| self.ystridx == Some(0)
|| (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31) || (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31)
{ {
if dayfirst && self._ymd[2] <= 12 { if dayfirst && self._ymd[2] <= 12 {
@ -598,8 +590,10 @@ impl YMD {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(_, _) => { return Ok((None, None, None)); }, (_, _) => {
return Ok((None, None, None));
}
} }
} }
} }
@ -631,7 +625,7 @@ pub struct Parser {
impl Parser { impl Parser {
/// Create a new `Parser` instance using the provided `ParserInfo`. /// Create a new `Parser` instance using the provided `ParserInfo`.
/// ///
/// This method allows you to set up a parser to handle different /// This method allows you to set up a parser to handle different
/// names for days of the week, months, etc., enabling customization /// names for days of the week, months, etc., enabling customization
/// for different languages or extra values. /// for different languages or extra values.
@ -642,27 +636,27 @@ impl Parser {
/// Main method to trigger parsing of a string using the previously-provided /// Main method to trigger parsing of a string using the previously-provided
/// parser information. Returns a naive timestamp along with timezone and /// parser information. Returns a naive timestamp along with timezone and
/// unused tokens if available. /// unused tokens if available.
/// ///
/// `dayfirst` and `yearfirst` force parser behavior in the event of ambiguous /// `dayfirst` and `yearfirst` force parser behavior in the event of ambiguous
/// dates. Consider the following scenarios where we parse the string '01.02.03' /// dates. Consider the following scenarios where we parse the string '01.02.03'
/// ///
/// - `dayfirst=Some(true)`, `yearfirst=None`: Results in `February 2, 2003` /// - `dayfirst=Some(true)`, `yearfirst=None`: Results in `February 2, 2003`
/// - `dayfirst=None`, `yearfirst=Some(true)`: Results in `February 3, 2001` /// - `dayfirst=None`, `yearfirst=Some(true)`: Results in `February 3, 2001`
/// - `dayfirst=Some(true)`, `yearfirst=Some(true)`: Results in `March 2, 2001` /// - `dayfirst=Some(true)`, `yearfirst=Some(true)`: Results in `March 2, 2001`
/// ///
/// `fuzzy` enables fuzzy parsing mode, allowing the parser to skip tokens if /// `fuzzy` enables fuzzy parsing mode, allowing the parser to skip tokens if
/// they are unrecognized. However, the unused tokens will not be returned /// they are unrecognized. However, the unused tokens will not be returned
/// unless `fuzzy_with_tokens` is set as `true`. /// unless `fuzzy_with_tokens` is set as `true`.
/// ///
/// `default` is the timestamp used to infer missing values, and is midnight /// `default` is the timestamp used to infer missing values, and is midnight
/// of the current day by default. For example, when parsing the text '2003', /// of the current day by default. For example, when parsing the text '2003',
/// we will use the current month and day as a default value, leading to a /// we will use the current month and day as a default value, leading to a
/// result of 'March 3, 2003' if the function was run using a default of /// result of 'March 3, 2003' if the function was run using a default of
/// March 3rd. /// March 3rd.
/// ///
/// `ignoretz` forces the parser to ignore timezone information even if it /// `ignoretz` forces the parser to ignore timezone information even if it
/// is recognized in the time string /// is recognized in the time string
/// ///
/// `tzinfos` is a map of timezone names to the offset seconds. For example, /// `tzinfos` is a map of timezone names to the offset seconds. For example,
/// the parser would ignore the 'EST' part of the string in '10 AM EST' /// the parser would ignore the 'EST' part of the string in '10 AM EST'
/// unless you added a `tzinfos` map of `{"EST": "14400"}`. Please note that /// unless you added a `tzinfos` map of `{"EST": "14400"}`. Please note that
@ -754,7 +748,9 @@ impl Parser {
} }
i += 2; i += 2;
} else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " " } else if i + 4 < len_l
&& l[i + 1] == l[i + 3]
&& l[i + 3] == " "
&& self.info.pertain_index(&l[i + 2]) && self.info.pertain_index(&l[i + 2])
{ {
// Jan of 01 // Jan of 01
@ -792,7 +788,7 @@ impl Parser {
} else { } else {
"+".to_owned() "+".to_owned()
}; };
l[i+1] = item; l[i + 1] = item;
res.tzoffset = None; res.tzoffset = None;
@ -828,8 +824,11 @@ impl Parser {
Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60)); Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));
let tzname = res.tzname.clone(); let tzname = res.tzname.clone();
if i + 5 < len_l && self.info.jump_index(&l[i + 2]) && l[i + 3] == "(" if i + 5 < len_l
&& l[i + 5] == ")" && 3 <= l[i + 4].len() && self.info.jump_index(&l[i + 2])
&& l[i + 3] == "("
&& l[i + 5] == ")"
&& 3 <= l[i + 4].len()
&& self.could_be_tzname(res.hour, &tzname, None, &l[i + 4]) && self.could_be_tzname(res.hour, &tzname, None, &l[i + 4])
{ {
// (GMT) // (GMT)
@ -875,7 +874,10 @@ impl Parser {
.chars() .chars()
.all(|c| 65u8 as char <= c && c <= 90u8 as char); .all(|c| 65u8 as char <= c && c <= 90u8 as char);
hour.is_some() && tzname.is_none() && tzoffset.is_none() && token.len() <= 5 hour.is_some()
&& tzname.is_none()
&& tzoffset.is_none()
&& token.len() <= 5
&& all_ascii_upper && all_ascii_upper
} }
@ -899,7 +901,11 @@ impl Parser {
Ok(val_is_ampm) Ok(val_is_ampm)
} }
fn build_naive(&self, res: &ParsingResult, default: &NaiveDateTime) -> ParseResult<NaiveDateTime> { fn build_naive(
&self,
res: &ParsingResult,
default: &NaiveDateTime,
) -> ParseResult<NaiveDateTime> {
let y = res.year.unwrap_or_else(|| default.year()); let y = res.year.unwrap_or_else(|| default.year());
let m = res.month.unwrap_or_else(|| default.month() as i32) as u32; let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
@ -919,7 +925,10 @@ impl Parser {
let d = NaiveDate::from_ymd( let d = NaiveDate::from_ymd(
y, y,
m, m,
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?) min(
res.day.unwrap_or(default.day() as i32) as u32,
days_in_month(y, m as i32)?,
),
); );
let d = d + d_offset; let d = d + d_offset;
@ -927,37 +936,40 @@ impl Parser {
let hour = res.hour.unwrap_or(default.hour() as i32) as u32; let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
let minute = res.minute.unwrap_or(default.minute() as i32) as u32; let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
let second = res.second.unwrap_or(default.second() as i32) as u32; let second = res.second.unwrap_or(default.second() as i32) as u32;
let microsecond = res.microsecond let microsecond = res
.microsecond
.unwrap_or(default.timestamp_subsec_micros() as i32) as u32; .unwrap_or(default.timestamp_subsec_micros() as i32) as u32;
let t = NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| { let t =
if hour >= 24 { NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
ParseError::ImpossibleTimestamp("Invalid hour") if hour >= 24 {
} else if minute >= 60 { ParseError::ImpossibleTimestamp("Invalid hour")
ParseError::ImpossibleTimestamp("Invalid minute") } else if minute >= 60 {
} else if second >= 60 { ParseError::ImpossibleTimestamp("Invalid minute")
ParseError::ImpossibleTimestamp("Invalid second") } else if second >= 60 {
} else if microsecond >= 2_000_000 { ParseError::ImpossibleTimestamp("Invalid second")
ParseError::ImpossibleTimestamp("Invalid microsecond") } else if microsecond >= 2_000_000 {
} else { ParseError::ImpossibleTimestamp("Invalid microsecond")
unreachable!(); } else {
} unreachable!();
})?; }
})?;
Ok(NaiveDateTime::new(d, t)) Ok(NaiveDateTime::new(d, t))
} }
fn build_tzaware( fn build_tzaware(
&self, &self,
_dt: &NaiveDateTime, dt: &NaiveDateTime,
res: &ParsingResult, res: &ParsingResult,
tzinfos: &HashMap<String, i32>, tzinfos: &HashMap<String, i32>,
) -> ParseResult<Option<FixedOffset>> { ) -> ParseResult<Option<FixedOffset>> {
// TODO: Actual timezone support
if let Some(offset) = res.tzoffset { if let Some(offset) = res.tzoffset {
Ok(Some(FixedOffset::east(offset))) Ok(Some(FixedOffset::east(offset)))
} else if res.tzoffset == None } else if res.tzoffset == None
&& (res.tzname == Some(" ".to_owned()) || res.tzname == Some(".".to_owned()) && (res.tzname == Some(" ".to_owned())
|| res.tzname == Some("-".to_owned()) || res.tzname == None) || res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned())
|| res.tzname == None)
{ {
Ok(None) Ok(None)
} else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) { } else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
@ -965,9 +977,15 @@ impl Parser {
*tzinfos.get(res.tzname.as_ref().unwrap()).unwrap(), *tzinfos.get(res.tzname.as_ref().unwrap()).unwrap(),
))) )))
} else if res.tzname.is_some() { } else if res.tzname.is_some() {
// TODO: Dateutil issues a warning/deprecation notice here. Should we force the issue? let tzname = res.tzname.as_ref().unwrap();
println!("tzname {} identified but not understood. Ignoring for the time being, but behavior is subject to change.", res.tzname.as_ref().unwrap()); let tz: Result<Tz, String> = tzname.parse();
Ok(None) if tz.is_ok() {
let offset = tz.unwrap().offset_from_local_datetime(dt).unwrap().fix();
Ok(Some(offset))
} else {
println!("tzname {} identified but not understood ({}). Ignoring for the time being, but behavior is subject to change.", tzname, tz.unwrap_err());
Ok(None)
}
} else { } else {
Err(ParseError::TimezoneUnsupported) Err(ParseError::TimezoneUnsupported)
} }
@ -991,7 +1009,9 @@ impl Parser {
// TODO: I miss the `x in y` syntax // TODO: I miss the `x in y` syntax
// TODO: Decompose this logic a bit // TODO: Decompose this logic a bit
if ymd.len() == 3 && (len_li == 2 || len_li == 4) && res.hour.is_none() if ymd.len() == 3
&& (len_li == 2 || len_li == 4)
&& res.hour.is_none()
&& (idx + 1 >= len_l && (idx + 1 >= len_l
|| (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none())) || (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none()))
{ {
@ -1022,7 +1042,11 @@ impl Parser {
} else if vec![8, 12, 14].contains(&len_li) { } else if vec![8, 12, 14].contains(&len_li) {
// YYMMDD // YYMMDD
let s = &tokens[idx]; let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?; ymd.append(
s[..4].parse::<i32>().unwrap(),
&s[..4],
Some(YMDLabel::Year),
)?;
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?; ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?; ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?;
@ -1079,10 +1103,10 @@ impl Parser {
if let Some(value) = info.month_index(&tokens[idx + 4]) { if let Some(value) = info.month_index(&tokens[idx + 4]) {
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?; ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
} else if let Ok(val) = tokens[idx + 4].parse::<i32>() { } else if let Ok(val) = tokens[idx + 4].parse::<i32>() {
ymd.append(val, &tokens[idx + 4], None)?; ymd.append(val, &tokens[idx + 4], None)?;
} else { } else {
return Err(ParseError::UnrecognizedFormat); return Err(ParseError::UnrecognizedFormat);
} }
idx += 2; idx += 2;
} }
@ -1160,7 +1184,7 @@ impl Parser {
len_l - 2 len_l - 2
} else if idx > 1 { } else if idx > 1 {
idx - 2 idx - 2
} else if len_l == 0{ } else if len_l == 0 {
panic!("Attempting to find_hms_index() wih no tokens."); panic!("Attempting to find_hms_index() wih no tokens.");
} else { } else {
0 0
@ -1168,13 +1192,18 @@ impl Parser {
if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() { if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() {
hms_idx = Some(idx + 1) hms_idx = Some(idx + 1)
} else if allow_jump && idx + 2 < len_l && tokens[idx + 1] == " " } else if allow_jump
&& idx + 2 < len_l
&& tokens[idx + 1] == " "
&& info.hms_index(&tokens[idx + 2]).is_some() && info.hms_index(&tokens[idx + 2]).is_some()
{ {
hms_idx = Some(idx + 2) hms_idx = Some(idx + 2)
} else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() { } else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() {
hms_idx = Some(idx - 1) hms_idx = Some(idx - 1)
} else if len_l > 0 && idx > 0 && idx == len_l - 1 && tokens[idx - 1] == " " } else if len_l > 0
&& idx > 0
&& idx == len_l - 1
&& tokens[idx - 1] == " "
&& info.hms_index(&tokens[idx_minus_two]).is_some() && info.hms_index(&tokens[idx_minus_two]).is_some()
{ {
hms_idx = Some(idx - 2) hms_idx = Some(idx - 2)
@ -1279,7 +1308,7 @@ fn ljust(s: &str, chars: usize, replace: char) -> String {
/// Main entry point for using `dtparse`. The parse function is responsible for /// Main entry point for using `dtparse`. The parse function is responsible for
/// taking in a string representing some time value, and turning it into /// taking in a string representing some time value, and turning it into
/// a timestamp with optional timezone information if it can be identified. /// a timestamp with optional timezone information if it can be identified.
/// ///
/// The default implementation assumes English values for names of months, /// The default implementation assumes English values for names of months,
/// days of the week, etc. It is equivalent to Python's `dateutil.parser.parse()` /// days of the week, etc. It is equivalent to Python's `dateutil.parser.parse()`
pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> { pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {

View File

@ -7,18 +7,36 @@ use Parser;
#[test] #[test]
fn test_fuzz() { fn test_fuzz() {
assert_eq!(
assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::ImpossibleTimestamp("Invalid month"))); parse("\x2D\x38\x31\x39\x34\x38\x34"),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
);
// Garbage in the third delimited field // Garbage in the third delimited field
assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"), assert_eq!(
Err(ParseError::UnrecognizedFormat)); parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
Err(ParseError::UnrecognizedFormat)
);
// OverflowError: Python int too large to convert to C long // OverflowError: Python int too large to convert to C long
// assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour)); // assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour));
let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0); let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0);
let p = Parser::default(); let p = Parser::default();
let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, &HashMap::new()).unwrap(); let res = p
.parse(
"\x0D\x31",
None,
None,
false,
false,
Some(&default),
false,
&HashMap::new(),
)
.unwrap();
assert_eq!(res.0, default); assert_eq!(res.0, default);
assert_eq!(parse("\x2D\x2D\x32\x31\x38\x6D"), Err(ParseError::ImpossibleTimestamp("Invalid minute"))); assert_eq!(
parse("\x2D\x2D\x32\x31\x38\x6D"),
Err(ParseError::ImpossibleTimestamp("Invalid minute"))
);
} }

View File

@ -1,3 +1,4 @@
mod fuzzing; mod fuzzing;
mod pycompat_parser; mod pycompat_parser;
mod pycompat_tokenizer; mod pycompat_tokenizer;
mod tz;

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,3 @@
//! This code has been generated by running the `build_pycompat_tokenizer.py` script //! This code has been generated by running the `build_pycompat_tokenizer.py` script
//! in the repository root. Please do not edit it, as your edits will be destroyed //! in the repository root. Please do not edit it, as your edits will be destroyed
//! upon re-running code generation. //! upon re-running code generation.
@ -12,7 +11,9 @@ fn tokenize_assert(test_str: &str, comparison: Vec<&str>) {
#[test] #[test]
fn test_tokenize0() { fn test_tokenize0() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28",
];
tokenize_assert("Thu Sep 25 10:36:28", comp); tokenize_assert("Thu Sep 25 10:36:28", comp);
} }
@ -294,7 +295,9 @@ fn test_tokenize46() {
#[test] #[test]
fn test_tokenize47() { fn test_tokenize47() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 2003", comp);
} }
@ -306,7 +309,9 @@ fn test_tokenize48() {
#[test] #[test]
fn test_tokenize49() { fn test_tokenize49() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41",
];
tokenize_assert("2003-09-25T10:49:41", comp); tokenize_assert("2003-09-25T10:49:41", comp);
} }
@ -354,7 +359,9 @@ fn test_tokenize56() {
#[test] #[test]
fn test_tokenize57() { fn test_tokenize57() {
let comp = vec!["2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502"]; let comp = vec![
"2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502",
];
tokenize_assert("2003-09-25 10:49:41,502", comp); tokenize_assert("2003-09-25 10:49:41,502", comp);
} }
@ -510,7 +517,10 @@ fn test_tokenize82() {
#[test] #[test]
fn test_tokenize83() { fn test_tokenize83() {
let comp = vec![" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":", "01", ":", "02", " ", " ", " ", "am", " ", " "]; let comp = vec![
" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":",
"01", ":", "02", " ", " ", " ", "am", " ", " ",
];
tokenize_assert(" July 4 , 1976 12:01:02 am ", comp); tokenize_assert(" July 4 , 1976 12:01:02 am ", comp);
} }
@ -522,7 +532,9 @@ fn test_tokenize84() {
#[test] #[test]
fn test_tokenize85() { fn test_tokenize85() {
let comp = vec!["1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM"]; let comp = vec![
"1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM",
];
tokenize_assert("1996.July.10 AD 12:08 PM", comp); tokenize_assert("1996.July.10 AD 12:08 PM", comp);
} }
@ -558,25 +570,33 @@ fn test_tokenize90() {
#[test] #[test]
fn test_tokenize91() { fn test_tokenize91() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp); tokenize_assert("0:01:02 on July 4, 1976", comp);
} }
#[test] #[test]
fn test_tokenize92() { fn test_tokenize92() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp); tokenize_assert("0:01:02 on July 4, 1976", comp);
} }
#[test] #[test]
fn test_tokenize93() { fn test_tokenize93() {
let comp = vec!["July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am"]; let comp = vec![
"July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am",
];
tokenize_assert("July 4, 1976 12:01:02 am", comp); tokenize_assert("July 4, 1976 12:01:02 am", comp);
} }
#[test] #[test]
fn test_tokenize94() { fn test_tokenize94() {
let comp = vec!["Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995"]; let comp = vec![
"Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995",
];
tokenize_assert("Mon Jan 2 04:24:27 1995", comp); tokenize_assert("Mon Jan 2 04:24:27 1995", comp);
} }
@ -588,7 +608,9 @@ fn test_tokenize95() {
#[test] #[test]
fn test_tokenize96() { fn test_tokenize96() {
let comp = vec!["Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578"]; let comp = vec![
"Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578",
];
tokenize_assert("Jan 1 1999 11:23:34.578", comp); tokenize_assert("Jan 1 1999 11:23:34.578", comp);
} }
@ -618,13 +640,17 @@ fn test_tokenize100() {
#[test] #[test]
fn test_tokenize101() { fn test_tokenize101() {
let comp = vec!["0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; let comp = vec![
"0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0099-01-01T00:00:00", comp); tokenize_assert("0099-01-01T00:00:00", comp);
} }
#[test] #[test]
fn test_tokenize102() { fn test_tokenize102() {
let comp = vec!["0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; let comp = vec![
"0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0031-01-01T00:00:00", comp); tokenize_assert("0031-01-01T00:00:00", comp);
} }
@ -666,31 +692,42 @@ fn test_tokenize108() {
#[test] #[test]
fn test_tokenize109() { fn test_tokenize109() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
} }
#[test] #[test]
fn test_tokenize110() { fn test_tokenize110() {
let comp = vec!["2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu"]; let comp = vec![
"2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu",
];
tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp); tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp);
} }
#[test] #[test]
fn test_tokenize111() { fn test_tokenize111() {
let comp = vec!["Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-", "0300"]; let comp = vec![
"Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-",
"0300",
];
tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp); tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp);
} }
#[test] #[test]
fn test_tokenize112() { fn test_tokenize112() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41.5-03:00", comp); tokenize_assert("2003-09-25T10:49:41.5-03:00", comp);
} }
#[test] #[test]
fn test_tokenize113() { fn test_tokenize113() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41-03:00", comp); tokenize_assert("2003-09-25T10:49:41-03:00", comp);
} }
@ -708,19 +745,27 @@ fn test_tokenize115() {
#[test] #[test]
fn test_tokenize116() { fn test_tokenize116() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3"]; let comp = vec![
"2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3",
];
tokenize_assert("2018-08-10 10:00:00 UTC+3", comp); tokenize_assert("2018-08-10 10:00:00 UTC+3", comp);
} }
#[test] #[test]
fn test_tokenize117() { fn test_tokenize117() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-", "4"]; let comp = vec![
"2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-",
"4",
];
tokenize_assert("2018-08-10 03:36:47 PM GMT-4", comp); tokenize_assert("2018-08-10 03:36:47 PM GMT-4", comp);
} }
#[test] #[test]
fn test_tokenize118() { fn test_tokenize118() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-", "02", ":", "00"]; let comp = vec![
"2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-",
"02", ":", "00",
];
tokenize_assert("2018-08-10 04:15:00 AM Z-02:00", comp); tokenize_assert("2018-08-10 04:15:00 AM Z-02:00", comp);
} }
@ -828,91 +873,213 @@ fn test_tokenize135() {
#[test] #[test]
fn test_tokenize136() { fn test_tokenize136() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
} }
#[test] #[test]
fn test_tokenize137() { fn test_tokenize137() {
let comp = vec!["1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ", "PDT"]; let comp = vec![
"1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ",
"PDT",
];
tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp); tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp);
} }
#[test] #[test]
fn test_tokenize138() { fn test_tokenize138() {
let comp = vec!["Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30", ":", "42", "pm", " ", "PST"]; let comp = vec![
"Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30",
":", "42", "pm", " ", "PST",
];
tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp); tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp);
} }
#[test] #[test]
fn test_tokenize139() { fn test_tokenize139() {
let comp = vec!["November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am", " ", "EST"]; let comp = vec![
"November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am",
" ", "EST",
];
tokenize_assert("November 5, 1994, 8:15:30 am EST", comp); tokenize_assert("November 5, 1994, 8:15:30 am EST", comp);
} }
#[test] #[test]
fn test_tokenize140() { fn test_tokenize140() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00"]; let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00",
];
tokenize_assert("1994-11-05T08:15:30-05:00", comp); tokenize_assert("1994-11-05T08:15:30-05:00", comp);
} }
#[test] #[test]
fn test_tokenize141() { fn test_tokenize141() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z"]; let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z",
];
tokenize_assert("1994-11-05T08:15:30Z", comp); tokenize_assert("1994-11-05T08:15:30Z", comp);
} }
#[test] #[test]
fn test_tokenize142() { fn test_tokenize142() {
let comp = vec!["1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z"]; let comp = vec![
"1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z",
];
tokenize_assert("1976-07-04T00:01:02Z", comp); tokenize_assert("1976-07-04T00:01:02Z", comp);
} }
#[test] #[test]
fn test_tokenize143() { fn test_tokenize143() {
let comp = vec!["Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995"]; let comp = vec![
"Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995",
];
tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp); tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp);
} }
#[test] #[test]
fn test_tokenize144() { fn test_tokenize144() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."]; let comp = vec![
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp); "Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
} }
#[test] #[test]
fn test_tokenize145() { fn test_tokenize145() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."]; let comp = vec![
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp); "Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
} }
#[test] #[test]
fn test_tokenize146() { fn test_tokenize146() {
let comp = vec!["I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ", "1974"]; let comp = vec![
"I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ",
"1974",
];
tokenize_assert("I have a meeting on March 1, 1974", comp); tokenize_assert("I have a meeting on March 1, 1974", comp);
} }
#[test] #[test]
fn test_tokenize147() { fn test_tokenize147() {
let comp = vec!["On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ", "going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ", "Mars"]; let comp = vec![
tokenize_assert("On June 8th, 2020, I am going to be the first man on Mars", comp); "On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ",
"going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ",
"Mars",
];
tokenize_assert(
"On June 8th, 2020, I am going to be the first man on Mars",
comp,
);
} }
#[test] #[test]
fn test_tokenize148() { fn test_tokenize148() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003"]; let comp = vec![
tokenize_assert("Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", comp); "Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset",
" ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",",
" ", "2003",
];
tokenize_assert(
"Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003",
comp,
);
} }
#[test] #[test]
fn test_tokenize149() { fn test_tokenize149() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset"]; let comp = vec![
tokenize_assert("Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", comp); "Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December",
" ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on",
" ", "Sunset",
];
tokenize_assert(
"Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset",
comp,
);
} }
#[test] #[test]
fn test_tokenize150() { fn test_tokenize150() {
let comp = vec!["Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ", "going", " ", "to", " ", "see", " ", "you", " ", "there", "?"]; let comp = vec![
"Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ",
"going", " ", "to", " ", "see", " ", "you", " ", "there", "?",
];
tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp); tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp);
} }

20
src/tests/tz.rs Normal file
View File

@ -0,0 +1,20 @@
use parse;
#[test]
fn est() {
// Issue originally reported in https://github.com/bspeice/dtparse/issues/18
let dt = parse("Fri, 21 Aug 2015 18:37:44 EST");
assert!(dt.is_ok());
assert!(dt.unwrap().1.is_some());
}
#[test]
fn cest() {
// Issue originally reported in https://github.com/bspeice/dtparse/issues/18
let dt = parse("Fri, 21 Aug 2015 18:37:44 CEST");
assert!(dt.is_ok());
// TODO: Fix
// assert!(dt.unwrap().1.is_some());
}

View File

@ -14,7 +14,6 @@ pub(crate) enum ParseState {
} }
impl Tokenizer { impl Tokenizer {
pub(crate) fn new(parse_string: &str) -> Self { pub(crate) fn new(parse_string: &str) -> Self {
Tokenizer { Tokenizer {
token_stack: vec![], token_stack: vec![],
@ -92,7 +91,7 @@ impl Iterator for Tokenizer {
} else { } else {
break; break;
} }
}, }
ParseState::Alpha => { ParseState::Alpha => {
seenletters = true; seenletters = true;
if self.isword(nextchar) { if self.isword(nextchar) {
@ -105,19 +104,21 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::Numeric => { ParseState::Numeric => {
if self.isnum(nextchar) { if self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
token.as_mut().unwrap().push(nextchar); token.as_mut().unwrap().push(nextchar);
} else if nextchar == '.' || (nextchar == ',' && token.as_ref().unwrap().len() >= 2) { } else if nextchar == '.'
|| (nextchar == ',' && token.as_ref().unwrap().len() >= 2)
{
token.as_mut().unwrap().push(nextchar); token.as_mut().unwrap().push(nextchar);
state = ParseState::NumericDecimal; state = ParseState::NumericDecimal;
} else { } else {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::AlphaDecimal => { ParseState::AlphaDecimal => {
seenletters = true; seenletters = true;
if nextchar == '.' || self.isword(nextchar) { if nextchar == '.' || self.isword(nextchar) {
@ -130,7 +131,7 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::NumericDecimal => { ParseState::NumericDecimal => {
if nextchar == '.' || self.isnum(nextchar) { if nextchar == '.' || self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
@ -150,20 +151,25 @@ impl Iterator for Tokenizer {
// We do something slightly different to express the same logic // We do something slightly different to express the same logic
if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal { if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal {
// UNWRAP: The state check guarantees that we have a value // UNWRAP: The state check guarantees that we have a value
let dot_count = token.as_ref().unwrap().chars().filter(|c| *c == '.').count(); let dot_count = token
.as_ref()
.unwrap()
.chars()
.filter(|c| *c == '.')
.count();
let last_char = token.as_ref().unwrap().chars().last(); let last_char = token.as_ref().unwrap().chars().last();
let last_splittable = last_char == Some('.') || last_char == Some(','); let last_splittable = last_char == Some('.') || last_char == Some(',');
if seenletters || dot_count > 1 || last_splittable { if seenletters || dot_count > 1 || last_splittable {
let mut l = self.decimal_split(token.as_ref().unwrap()); let mut l = self.decimal_split(token.as_ref().unwrap());
let remaining = l.split_off(1); let remaining = l.split_off(1);
token = Some(l[0].clone()); token = Some(l[0].clone());
for t in remaining { for t in remaining {
self.token_stack.push(t); self.token_stack.push(t);
} }
} }
if state == ParseState::NumericDecimal && dot_count == 0 { if state == ParseState::NumericDecimal && dot_count == 0 {
token = Some(token.unwrap().replace(',', ".")); token = Some(token.unwrap().replace(',', "."));
} }

View File

@ -1,5 +1,5 @@
use ParseResult;
use ParseError; use ParseError;
use ParseResult;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum DayOfWeek { pub enum DayOfWeek {
@ -9,11 +9,10 @@ pub enum DayOfWeek {
Wednesday, Wednesday,
Thursday, Thursday,
Friday, Friday,
Saturday Saturday,
} }
impl DayOfWeek { impl DayOfWeek {
pub fn to_numeral(&self) -> u32 { pub fn to_numeral(&self) -> u32 {
match *self { match *self {
DayOfWeek::Sunday => 0, DayOfWeek::Sunday => 0,
@ -35,7 +34,7 @@ impl DayOfWeek {
4 => DayOfWeek::Thursday, 4 => DayOfWeek::Thursday,
5 => DayOfWeek::Friday, 5 => DayOfWeek::Friday,
6 => DayOfWeek::Saturday, 6 => DayOfWeek::Saturday,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
} }
} }
@ -59,12 +58,12 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => { 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => {
let c = year / 100; let c = year / 100;
(c, year - 100 * c) (c, year - 100 * c)
}, }
1 | 2 => { 1 | 2 => {
let c = (year - 1) / 100; let c = (year - 1) / 100;
(c, year - 1 - 100 * c) (c, year - 1 - 100 * c)
}, }
_ => return Err(ParseError::ImpossibleTimestamp("Invalid month")) _ => return Err(ParseError::ImpossibleTimestamp("Invalid month")),
}; };
let e = match month { let e = match month {
@ -75,7 +74,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
8 => 1, 8 => 1,
9 | 12 => 4, 9 | 12 => 4,
10 => 6, 10 => 6,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
}; };
// This implementation is Gregorian-only. // This implementation is Gregorian-only.
@ -84,7 +83,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
1 => 5, 1 => 5,
2 => 3, 2 => 3,
3 => 1, 3 => 1,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
}; };
match (day + e + f + g + g / 4) % 7 { match (day + e + f + g + g / 4) % 7 {
@ -95,7 +94,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
4 => Ok(DayOfWeek::Thursday), 4 => Ok(DayOfWeek::Thursday),
5 => Ok(DayOfWeek::Friday), 5 => Ok(DayOfWeek::Friday),
6 => Ok(DayOfWeek::Saturday), 6 => Ok(DayOfWeek::Saturday),
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
} }
} }
@ -114,7 +113,6 @@ mod test {
#[test] #[test]
fn weekday_difference() { fn weekday_difference() {
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Sunday), 0); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Sunday), 0);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Monday), 1); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Monday), 1);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Tuesday), 2); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Tuesday), 2);
@ -129,4 +127,4 @@ mod test {
assert_eq!(DayOfWeek::Friday.difference(&DayOfWeek::Sunday), 2); assert_eq!(DayOfWeek::Friday.difference(&DayOfWeek::Sunday), 2);
assert_eq!(DayOfWeek::Saturday.difference(&DayOfWeek::Sunday), 1); assert_eq!(DayOfWeek::Saturday.difference(&DayOfWeek::Sunday), 1);
} }
} }