Merge pull request #19 from bspeice/tz_fix

Attempt to read timezones from chrono-tz
pull/24/head
bspeice 2019-11-29 15:45:44 -05:00 committed by GitHub
commit 9f1b8d4971
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 2846 additions and 880 deletions

View File

@ -78,24 +78,7 @@ matrix:
# Historical Rust versions
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.21.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.22.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.23.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.24.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.25.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.26.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.27.0
# WASM support
- env: TARGET=asmjs-unknown-emscripten USE_CARGO_WEB=true
rust: nightly
rust: 1.28.0
before_install:
- set -e

View File

@ -18,6 +18,7 @@ name = "dtparse"
[dependencies]
chrono = "0.4"
chrono-tz = "0.5"
lazy_static = "1.1"
num-traits = "0.2"
rust_decimal = "^0.10.1"

View File

@ -67,7 +67,7 @@ Further examples can be found in the [examples](examples) directory on internati
# Usage
`dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX,
`dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
tested against them.

View File

@ -8,7 +8,6 @@ use dtparse::ParserInfo;
use std::collections::HashMap;
fn main() {
// In this example, we'll just swap the default "months" parameter
// with a version in Russian. Lovingly taken from:
// https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244
@ -26,14 +25,24 @@ fn main() {
vec!["сен", "Сентябрь"],
vec!["окт", "Октябрь"],
vec!["ноя", "Ноябрь"],
vec!["дек", "Декабрь"]
vec!["дек", "Декабрь"],
]);
let p = Parser::new(info);
assert_eq!(
p.parse("10 Сентябрь 2015 10:20", None, None, false, false, None, false, &HashMap::new())
.unwrap().0,
p.parse(
"10 Сентябрь 2015 10:20",
None,
None,
false,
false,
None,
false,
&HashMap::new()
)
.unwrap()
.0,
NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0)
);
}

View File

@ -4,23 +4,23 @@
//! # dtparse
//! The fully-featured "even I couldn't understand that" time parser.
//! Designed to take in strings and give back sensible dates and times.
//!
//!
//! dtparse has its foundations in the [`dateutil`](dateutil) library for
//! Python, which excels at taking "interesting" strings and trying to make
//! sense of the dates and times they contain. A couple of quick examples
//! from the test cases should give some context:
//!
//!
//! ```rust,ignore (tests-dont-compile-on-old-rust)
//! # extern crate chrono;
//! # extern crate dtparse;
//! use chrono::prelude::*;
//! use dtparse::parse;
//!
//!
//! assert_eq!(
//! parse("2008.12.30"),
//! Ok((NaiveDate::from_ymd(2008, 12, 30).and_hms(0, 0, 0), None))
//! );
//!
//!
//! // It can even handle timezones!
//! assert_eq!(
//! parse("January 4, 2024; 18:30:04 +02:00"),
@ -30,17 +30,17 @@
//! ))
//! );
//! ```
//!
//!
//! And we can even handle fuzzy strings where dates/times aren't the
//! only content if we dig into the implementation a bit!
//!
//!
//! ```rust,ignore (tests-dont-compile-on-old-rust)
//! # extern crate chrono;
//! # extern crate dtparse;
//! use chrono::prelude::*;
//! use dtparse::Parser;
//! # use std::collections::HashMap;
//!
//!
//! let mut p = Parser::default();
//! assert_eq!(
//! p.parse(
@ -58,21 +58,22 @@
//! ))
//! );
//! ```
//!
//!
//! Further examples can be found in the `examples` directory on international usage.
//!
//! # Usage
//!
//! `dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX,
//! `dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
//! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
//! tested against them.
//!
//!
//! [dateutil]: https://github.com/dateutil/dateutil
#[macro_use]
extern crate lazy_static;
extern crate chrono;
extern crate chrono_tz;
extern crate num_traits;
extern crate rust_decimal;
@ -83,12 +84,15 @@ use chrono::Local;
use chrono::NaiveDate;
use chrono::NaiveDateTime;
use chrono::NaiveTime;
use chrono::Offset;
use chrono::TimeZone;
use chrono::Timelike;
use chrono_tz::Tz;
use num_traits::cast::ToPrimitive;
use rust_decimal::Decimal;
use rust_decimal::Error as DecimalError;
use std::collections::HashMap;
use std::cmp::min;
use std::collections::HashMap;
use std::num::ParseIntError;
use std::str::FromStr;
use std::vec::Vec;
@ -174,7 +178,7 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
}
/// Container for specific tokens to be recognized during parsing.
///
///
/// - `jump`: Values that indicate the end of a token for parsing and can be ignored
/// - `weekday`: Names of the days of the week
/// - `months`: Names of the months
@ -187,7 +191,7 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
/// - `yearfirst`: Upon encountering an ambiguous date, treat the first value as the year
/// - `year`: The current year
/// - `century`: The first year in the current century
///
///
/// Please note that if both `dayfirst` and `yearfirst` are true, years take precedence
/// and will be parsed as "YDM"
#[derive(Debug, PartialEq)]
@ -228,12 +232,10 @@ impl Default for ParserInfo {
let century = year / 100 * 100;
ParserInfo {
jump: parse_info(vec![
vec![
" ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of",
"st", "nd", "rd", "th",
],
]),
jump: parse_info(vec![vec![
" ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st",
"nd", "rd", "th",
]]),
weekday: parse_info(vec![
vec!["Mon", "Monday"],
vec!["Tue", "Tues", "Tuesday"],
@ -341,7 +343,8 @@ impl ParserInfo {
if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) {
res.tzname = Some("UTC".to_owned());
res.tzoffset = Some(0);
} else if res.tzoffset != Some(0) && res.tzname.is_some()
} else if res.tzoffset != Some(0)
&& res.tzname.is_some()
&& self.utczone_index(res.tzname.as_ref().unwrap())
{
res.tzoffset = Some(0);
@ -358,16 +361,16 @@ fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
};
match month {
2 => if leap_year {
Ok(29)
} else {
Ok(28)
},
2 => {
if leap_year {
Ok(29)
} else {
Ok(28)
}
}
1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31),
4 | 6 | 9 | 11 => Ok(30),
_ => {
Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
_ => Err(ParseError::ImpossibleTimestamp("Invalid month")),
}
}
@ -421,9 +424,7 @@ impl YMD {
Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
Some(YMDLabel::Day) => {
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
}
}
@ -435,9 +436,7 @@ impl YMD {
Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
Some(YMDLabel::Day) => {
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
}
}
@ -498,19 +497,15 @@ impl YMD {
}
if self._ymd.len() != strids.len() {
return Err(ParseError::YearMonthDayError("Tried to resolve year, month, and day without enough information"));
return Err(ParseError::YearMonthDayError(
"Tried to resolve year, month, and day without enough information",
));
}
Ok((
strids
.get(&YMDLabel::Year)
.map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Month)
.map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Day)
.map(|i| self._ymd[*i]),
strids.get(&YMDLabel::Year).map(|i| self._ymd[*i]),
strids.get(&YMDLabel::Month).map(|i| self._ymd[*i]),
strids.get(&YMDLabel::Day).map(|i| self._ymd[*i]),
))
}
@ -523,28 +518,24 @@ impl YMD {
let len_ymd = self._ymd.len();
let mut strids: HashMap<YMDLabel, usize> = HashMap::new();
self.ystridx
.map(|u| strids.insert(YMDLabel::Year, u));
self.mstridx
.map(|u| strids.insert(YMDLabel::Month, u));
self.dstridx
.map(|u| strids.insert(YMDLabel::Day, u));
self.ystridx.map(|u| strids.insert(YMDLabel::Year, u));
self.mstridx.map(|u| strids.insert(YMDLabel::Month, u));
self.dstridx.map(|u| strids.insert(YMDLabel::Day, u));
// TODO: More Rustiomatic way of doing this?
if len_ymd == strids.len() && !strids.is_empty()
|| (len_ymd == 3 && strids.len() == 2)
{
if len_ymd == strids.len() && !strids.is_empty() || (len_ymd == 3 && strids.len() == 2) {
return self.resolve_from_stridxs(&mut strids);
};
// Received year, month, day, and ???
if len_ymd > 3 {
return Err(ParseError::YearMonthDayError("Received extra tokens in resolving year, month, and day"));
return Err(ParseError::YearMonthDayError(
"Received extra tokens in resolving year, month, and day",
));
}
match (len_ymd, self.mstridx) {
(1, Some(val)) |
(2, Some(val)) => {
(1, Some(val)) | (2, Some(val)) => {
let other = if len_ymd == 1 {
self._ymd[0]
} else {
@ -554,7 +545,7 @@ impl YMD {
return Ok((Some(other), Some(self._ymd[val]), None));
}
return Ok((None, Some(self._ymd[val]), Some(other)));
},
}
(2, None) => {
if self._ymd[0] > 31 {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None));
@ -566,28 +557,29 @@ impl YMD {
return Ok((None, Some(self._ymd[1]), Some(self._ymd[0])));
}
return Ok((None, Some(self._ymd[0]), Some(self._ymd[1])));
},
}
(3, Some(0)) => {
if self._ymd[1] > 31 {
return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2])));
}
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
},
}
(3, Some(1)) => {
if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
}
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
},
}
(3, Some(2)) => {
// It was in the original docs, so: WTF!?
if self._ymd[1] > 31 {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
}
return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
},
}
(3, None) => {
if self._ymd[0] > 31 || self.ystridx == Some(0)
if self._ymd[0] > 31
|| self.ystridx == Some(0)
|| (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31)
{
if dayfirst && self._ymd[2] <= 12 {
@ -598,8 +590,10 @@ impl YMD {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
}
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
},
(_, _) => { return Ok((None, None, None)); },
}
(_, _) => {
return Ok((None, None, None));
}
}
}
}
@ -631,7 +625,7 @@ pub struct Parser {
impl Parser {
/// Create a new `Parser` instance using the provided `ParserInfo`.
///
///
/// This method allows you to set up a parser to handle different
/// names for days of the week, months, etc., enabling customization
/// for different languages or extra values.
@ -642,27 +636,27 @@ impl Parser {
/// Main method to trigger parsing of a string using the previously-provided
/// parser information. Returns a naive timestamp along with timezone and
/// unused tokens if available.
///
///
/// `dayfirst` and `yearfirst` force parser behavior in the event of ambiguous
/// dates. Consider the following scenarios where we parse the string '01.02.03'
///
///
/// - `dayfirst=Some(true)`, `yearfirst=None`: Results in `February 2, 2003`
/// - `dayfirst=None`, `yearfirst=Some(true)`: Results in `February 3, 2001`
/// - `dayfirst=Some(true)`, `yearfirst=Some(true)`: Results in `March 2, 2001`
///
///
/// `fuzzy` enables fuzzy parsing mode, allowing the parser to skip tokens if
/// they are unrecognized. However, the unused tokens will not be returned
/// unless `fuzzy_with_tokens` is set as `true`.
///
///
/// `default` is the timestamp used to infer missing values, and is midnight
/// of the current day by default. For example, when parsing the text '2003',
/// we will use the current month and day as a default value, leading to a
/// result of 'March 3, 2003' if the function was run using a default of
/// March 3rd.
///
///
/// `ignoretz` forces the parser to ignore timezone information even if it
/// is recognized in the time string
///
///
/// `tzinfos` is a map of timezone names to the offset seconds. For example,
/// the parser would ignore the 'EST' part of the string in '10 AM EST'
/// unless you added a `tzinfos` map of `{"EST": "14400"}`. Please note that
@ -754,7 +748,9 @@ impl Parser {
}
i += 2;
} else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " "
} else if i + 4 < len_l
&& l[i + 1] == l[i + 3]
&& l[i + 3] == " "
&& self.info.pertain_index(&l[i + 2])
{
// Jan of 01
@ -792,7 +788,7 @@ impl Parser {
} else {
"+".to_owned()
};
l[i+1] = item;
l[i + 1] = item;
res.tzoffset = None;
@ -828,8 +824,11 @@ impl Parser {
Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));
let tzname = res.tzname.clone();
if i + 5 < len_l && self.info.jump_index(&l[i + 2]) && l[i + 3] == "("
&& l[i + 5] == ")" && 3 <= l[i + 4].len()
if i + 5 < len_l
&& self.info.jump_index(&l[i + 2])
&& l[i + 3] == "("
&& l[i + 5] == ")"
&& 3 <= l[i + 4].len()
&& self.could_be_tzname(res.hour, &tzname, None, &l[i + 4])
{
// (GMT)
@ -875,7 +874,10 @@ impl Parser {
.chars()
.all(|c| 65u8 as char <= c && c <= 90u8 as char);
hour.is_some() && tzname.is_none() && tzoffset.is_none() && token.len() <= 5
hour.is_some()
&& tzname.is_none()
&& tzoffset.is_none()
&& token.len() <= 5
&& all_ascii_upper
}
@ -899,7 +901,11 @@ impl Parser {
Ok(val_is_ampm)
}
fn build_naive(&self, res: &ParsingResult, default: &NaiveDateTime) -> ParseResult<NaiveDateTime> {
fn build_naive(
&self,
res: &ParsingResult,
default: &NaiveDateTime,
) -> ParseResult<NaiveDateTime> {
let y = res.year.unwrap_or_else(|| default.year());
let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
@ -919,7 +925,10 @@ impl Parser {
let d = NaiveDate::from_ymd(
y,
m,
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?)
min(
res.day.unwrap_or(default.day() as i32) as u32,
days_in_month(y, m as i32)?,
),
);
let d = d + d_offset;
@ -927,37 +936,40 @@ impl Parser {
let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
let second = res.second.unwrap_or(default.second() as i32) as u32;
let microsecond = res.microsecond
let microsecond = res
.microsecond
.unwrap_or(default.timestamp_subsec_micros() as i32) as u32;
let t = NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
if hour >= 24 {
ParseError::ImpossibleTimestamp("Invalid hour")
} else if minute >= 60 {
ParseError::ImpossibleTimestamp("Invalid minute")
} else if second >= 60 {
ParseError::ImpossibleTimestamp("Invalid second")
} else if microsecond >= 2_000_000 {
ParseError::ImpossibleTimestamp("Invalid microsecond")
} else {
unreachable!();
}
})?;
let t =
NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
if hour >= 24 {
ParseError::ImpossibleTimestamp("Invalid hour")
} else if minute >= 60 {
ParseError::ImpossibleTimestamp("Invalid minute")
} else if second >= 60 {
ParseError::ImpossibleTimestamp("Invalid second")
} else if microsecond >= 2_000_000 {
ParseError::ImpossibleTimestamp("Invalid microsecond")
} else {
unreachable!();
}
})?;
Ok(NaiveDateTime::new(d, t))
}
fn build_tzaware(
&self,
_dt: &NaiveDateTime,
dt: &NaiveDateTime,
res: &ParsingResult,
tzinfos: &HashMap<String, i32>,
) -> ParseResult<Option<FixedOffset>> {
// TODO: Actual timezone support
if let Some(offset) = res.tzoffset {
Ok(Some(FixedOffset::east(offset)))
} else if res.tzoffset == None
&& (res.tzname == Some(" ".to_owned()) || res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned()) || res.tzname == None)
&& (res.tzname == Some(" ".to_owned())
|| res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned())
|| res.tzname == None)
{
Ok(None)
} else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
@ -965,9 +977,15 @@ impl Parser {
*tzinfos.get(res.tzname.as_ref().unwrap()).unwrap(),
)))
} else if res.tzname.is_some() {
// TODO: Dateutil issues a warning/deprecation notice here. Should we force the issue?
println!("tzname {} identified but not understood. Ignoring for the time being, but behavior is subject to change.", res.tzname.as_ref().unwrap());
Ok(None)
let tzname = res.tzname.as_ref().unwrap();
let tz: Result<Tz, String> = tzname.parse();
if tz.is_ok() {
let offset = tz.unwrap().offset_from_local_datetime(dt).unwrap().fix();
Ok(Some(offset))
} else {
println!("tzname {} identified but not understood ({}). Ignoring for the time being, but behavior is subject to change.", tzname, tz.unwrap_err());
Ok(None)
}
} else {
Err(ParseError::TimezoneUnsupported)
}
@ -991,7 +1009,9 @@ impl Parser {
// TODO: I miss the `x in y` syntax
// TODO: Decompose this logic a bit
if ymd.len() == 3 && (len_li == 2 || len_li == 4) && res.hour.is_none()
if ymd.len() == 3
&& (len_li == 2 || len_li == 4)
&& res.hour.is_none()
&& (idx + 1 >= len_l
|| (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none()))
{
@ -1022,7 +1042,11 @@ impl Parser {
} else if vec![8, 12, 14].contains(&len_li) {
// YYMMDD
let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?;
ymd.append(
s[..4].parse::<i32>().unwrap(),
&s[..4],
Some(YMDLabel::Year),
)?;
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?;
@ -1079,10 +1103,10 @@ impl Parser {
if let Some(value) = info.month_index(&tokens[idx + 4]) {
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
} else if let Ok(val) = tokens[idx + 4].parse::<i32>() {
ymd.append(val, &tokens[idx + 4], None)?;
} else {
return Err(ParseError::UnrecognizedFormat);
}
ymd.append(val, &tokens[idx + 4], None)?;
} else {
return Err(ParseError::UnrecognizedFormat);
}
idx += 2;
}
@ -1160,7 +1184,7 @@ impl Parser {
len_l - 2
} else if idx > 1 {
idx - 2
} else if len_l == 0{
} else if len_l == 0 {
panic!("Attempting to find_hms_index() wih no tokens.");
} else {
0
@ -1168,13 +1192,18 @@ impl Parser {
if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() {
hms_idx = Some(idx + 1)
} else if allow_jump && idx + 2 < len_l && tokens[idx + 1] == " "
} else if allow_jump
&& idx + 2 < len_l
&& tokens[idx + 1] == " "
&& info.hms_index(&tokens[idx + 2]).is_some()
{
hms_idx = Some(idx + 2)
} else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() {
hms_idx = Some(idx - 1)
} else if len_l > 0 && idx > 0 && idx == len_l - 1 && tokens[idx - 1] == " "
} else if len_l > 0
&& idx > 0
&& idx == len_l - 1
&& tokens[idx - 1] == " "
&& info.hms_index(&tokens[idx_minus_two]).is_some()
{
hms_idx = Some(idx - 2)
@ -1279,7 +1308,7 @@ fn ljust(s: &str, chars: usize, replace: char) -> String {
/// Main entry point for using `dtparse`. The parse function is responsible for
/// taking in a string representing some time value, and turning it into
/// a timestamp with optional timezone information if it can be identified.
///
///
/// The default implementation assumes English values for names of months,
/// days of the week, etc. It is equivalent to Python's `dateutil.parser.parse()`
pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {

View File

@ -7,18 +7,36 @@ use Parser;
#[test]
fn test_fuzz() {
assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::ImpossibleTimestamp("Invalid month")));
assert_eq!(
parse("\x2D\x38\x31\x39\x34\x38\x34"),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
);
// Garbage in the third delimited field
assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
Err(ParseError::UnrecognizedFormat));
assert_eq!(
parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
Err(ParseError::UnrecognizedFormat)
);
// OverflowError: Python int too large to convert to C long
// assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour));
let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0);
let p = Parser::default();
let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, &HashMap::new()).unwrap();
let res = p
.parse(
"\x0D\x31",
None,
None,
false,
false,
Some(&default),
false,
&HashMap::new(),
)
.unwrap();
assert_eq!(res.0, default);
assert_eq!(parse("\x2D\x2D\x32\x31\x38\x6D"), Err(ParseError::ImpossibleTimestamp("Invalid minute")));
assert_eq!(
parse("\x2D\x2D\x32\x31\x38\x6D"),
Err(ParseError::ImpossibleTimestamp("Invalid minute"))
);
}

View File

@ -1,3 +1,4 @@
mod fuzzing;
mod pycompat_parser;
mod pycompat_tokenizer;
mod tz;

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,3 @@
//! This code has been generated by running the `build_pycompat_tokenizer.py` script
//! in the repository root. Please do not edit it, as your edits will be destroyed
//! upon re-running code generation.
@ -12,7 +11,9 @@ fn tokenize_assert(test_str: &str, comparison: Vec<&str>) {
#[test]
fn test_tokenize0() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28"];
let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28",
];
tokenize_assert("Thu Sep 25 10:36:28", comp);
}
@ -294,7 +295,9 @@ fn test_tokenize46() {
#[test]
fn test_tokenize47() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003"];
let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 2003", comp);
}
@ -306,7 +309,9 @@ fn test_tokenize48() {
#[test]
fn test_tokenize49() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41"];
let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41",
];
tokenize_assert("2003-09-25T10:49:41", comp);
}
@ -354,7 +359,9 @@ fn test_tokenize56() {
#[test]
fn test_tokenize57() {
let comp = vec!["2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502"];
let comp = vec![
"2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502",
];
tokenize_assert("2003-09-25 10:49:41,502", comp);
}
@ -510,7 +517,10 @@ fn test_tokenize82() {
#[test]
fn test_tokenize83() {
let comp = vec![" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":", "01", ":", "02", " ", " ", " ", "am", " ", " "];
let comp = vec![
" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":",
"01", ":", "02", " ", " ", " ", "am", " ", " ",
];
tokenize_assert(" July 4 , 1976 12:01:02 am ", comp);
}
@ -522,7 +532,9 @@ fn test_tokenize84() {
#[test]
fn test_tokenize85() {
let comp = vec!["1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM"];
let comp = vec![
"1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM",
];
tokenize_assert("1996.July.10 AD 12:08 PM", comp);
}
@ -558,25 +570,33 @@ fn test_tokenize90() {
#[test]
fn test_tokenize91() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"];
let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp);
}
#[test]
fn test_tokenize92() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"];
let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp);
}
#[test]
fn test_tokenize93() {
let comp = vec!["July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am"];
let comp = vec![
"July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am",
];
tokenize_assert("July 4, 1976 12:01:02 am", comp);
}
#[test]
fn test_tokenize94() {
let comp = vec!["Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995"];
let comp = vec![
"Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995",
];
tokenize_assert("Mon Jan 2 04:24:27 1995", comp);
}
@ -588,7 +608,9 @@ fn test_tokenize95() {
#[test]
fn test_tokenize96() {
let comp = vec!["Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578"];
let comp = vec![
"Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578",
];
tokenize_assert("Jan 1 1999 11:23:34.578", comp);
}
@ -618,13 +640,17 @@ fn test_tokenize100() {
#[test]
fn test_tokenize101() {
let comp = vec!["0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"];
let comp = vec![
"0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0099-01-01T00:00:00", comp);
}
#[test]
fn test_tokenize102() {
let comp = vec!["0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"];
let comp = vec![
"0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0031-01-01T00:00:00", comp);
}
@ -666,31 +692,42 @@ fn test_tokenize108() {
#[test]
fn test_tokenize109() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"];
let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
}
#[test]
fn test_tokenize110() {
let comp = vec!["2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu"];
let comp = vec![
"2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu",
];
tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp);
}
#[test]
fn test_tokenize111() {
let comp = vec!["Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-", "0300"];
let comp = vec![
"Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-",
"0300",
];
tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp);
}
#[test]
fn test_tokenize112() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00"];
let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41.5-03:00", comp);
}
#[test]
fn test_tokenize113() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00"];
let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41-03:00", comp);
}
@ -708,19 +745,27 @@ fn test_tokenize115() {
#[test]
fn test_tokenize116() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3"];
let comp = vec![
"2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3",
];
tokenize_assert("2018-08-10 10:00:00 UTC+3", comp);
}
#[test]
fn test_tokenize117() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-", "4"];
let comp = vec![
"2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-",
"4",
];
tokenize_assert("2018-08-10 03:36:47 PM GMT-4", comp);
}
#[test]
fn test_tokenize118() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-", "02", ":", "00"];
let comp = vec![
"2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-",
"02", ":", "00",
];
tokenize_assert("2018-08-10 04:15:00 AM Z-02:00", comp);
}
@ -828,91 +873,213 @@ fn test_tokenize135() {
#[test]
fn test_tokenize136() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"];
let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
}
#[test]
fn test_tokenize137() {
let comp = vec!["1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ", "PDT"];
let comp = vec![
"1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ",
"PDT",
];
tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp);
}
#[test]
fn test_tokenize138() {
let comp = vec!["Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30", ":", "42", "pm", " ", "PST"];
let comp = vec![
"Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30",
":", "42", "pm", " ", "PST",
];
tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp);
}
#[test]
fn test_tokenize139() {
let comp = vec!["November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am", " ", "EST"];
let comp = vec![
"November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am",
" ", "EST",
];
tokenize_assert("November 5, 1994, 8:15:30 am EST", comp);
}
#[test]
fn test_tokenize140() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00"];
let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00",
];
tokenize_assert("1994-11-05T08:15:30-05:00", comp);
}
#[test]
fn test_tokenize141() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z"];
let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z",
];
tokenize_assert("1994-11-05T08:15:30Z", comp);
}
#[test]
fn test_tokenize142() {
let comp = vec!["1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z"];
let comp = vec![
"1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z",
];
tokenize_assert("1976-07-04T00:01:02Z", comp);
}
#[test]
fn test_tokenize143() {
let comp = vec!["Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995"];
let comp = vec![
"Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995",
];
tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp);
}
#[test]
fn test_tokenize144() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."];
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp);
let comp = vec![
"Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
}
#[test]
fn test_tokenize145() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."];
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp);
let comp = vec![
"Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
}
#[test]
fn test_tokenize146() {
let comp = vec!["I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ", "1974"];
let comp = vec![
"I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ",
"1974",
];
tokenize_assert("I have a meeting on March 1, 1974", comp);
}
#[test]
fn test_tokenize147() {
let comp = vec!["On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ", "going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ", "Mars"];
tokenize_assert("On June 8th, 2020, I am going to be the first man on Mars", comp);
let comp = vec![
"On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ",
"going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ",
"Mars",
];
tokenize_assert(
"On June 8th, 2020, I am going to be the first man on Mars",
comp,
);
}
#[test]
fn test_tokenize148() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003"];
tokenize_assert("Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", comp);
let comp = vec![
"Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset",
" ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",",
" ", "2003",
];
tokenize_assert(
"Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003",
comp,
);
}
#[test]
fn test_tokenize149() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset"];
tokenize_assert("Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", comp);
let comp = vec![
"Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December",
" ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on",
" ", "Sunset",
];
tokenize_assert(
"Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset",
comp,
);
}
#[test]
fn test_tokenize150() {
let comp = vec!["Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ", "going", " ", "to", " ", "see", " ", "you", " ", "there", "?"];
let comp = vec![
"Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ",
"going", " ", "to", " ", "see", " ", "you", " ", "there", "?",
];
tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp);
}

20
src/tests/tz.rs Normal file
View File

@ -0,0 +1,20 @@
use parse;
#[test]
fn est() {
// Issue originally reported in https://github.com/bspeice/dtparse/issues/18
let dt = parse("Fri, 21 Aug 2015 18:37:44 EST");
assert!(dt.is_ok());
assert!(dt.unwrap().1.is_some());
}
#[test]
fn cest() {
// Issue originally reported in https://github.com/bspeice/dtparse/issues/18
let dt = parse("Fri, 21 Aug 2015 18:37:44 CEST");
assert!(dt.is_ok());
// TODO: Fix
// assert!(dt.unwrap().1.is_some());
}

View File

@ -14,7 +14,6 @@ pub(crate) enum ParseState {
}
impl Tokenizer {
pub(crate) fn new(parse_string: &str) -> Self {
Tokenizer {
token_stack: vec![],
@ -92,7 +91,7 @@ impl Iterator for Tokenizer {
} else {
break;
}
},
}
ParseState::Alpha => {
seenletters = true;
if self.isword(nextchar) {
@ -105,19 +104,21 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar);
break;
}
},
}
ParseState::Numeric => {
if self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
token.as_mut().unwrap().push(nextchar);
} else if nextchar == '.' || (nextchar == ',' && token.as_ref().unwrap().len() >= 2) {
} else if nextchar == '.'
|| (nextchar == ',' && token.as_ref().unwrap().len() >= 2)
{
token.as_mut().unwrap().push(nextchar);
state = ParseState::NumericDecimal;
} else {
self.parse_string.push(nextchar);
break;
}
},
}
ParseState::AlphaDecimal => {
seenletters = true;
if nextchar == '.' || self.isword(nextchar) {
@ -130,7 +131,7 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar);
break;
}
},
}
ParseState::NumericDecimal => {
if nextchar == '.' || self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
@ -150,20 +151,25 @@ impl Iterator for Tokenizer {
// We do something slightly different to express the same logic
if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal {
// UNWRAP: The state check guarantees that we have a value
let dot_count = token.as_ref().unwrap().chars().filter(|c| *c == '.').count();
let dot_count = token
.as_ref()
.unwrap()
.chars()
.filter(|c| *c == '.')
.count();
let last_char = token.as_ref().unwrap().chars().last();
let last_splittable = last_char == Some('.') || last_char == Some(',');
if seenletters || dot_count > 1 || last_splittable {
let mut l = self.decimal_split(token.as_ref().unwrap());
let remaining = l.split_off(1);
token = Some(l[0].clone());
for t in remaining {
self.token_stack.push(t);
}
}
if state == ParseState::NumericDecimal && dot_count == 0 {
token = Some(token.unwrap().replace(',', "."));
}

View File

@ -1,5 +1,5 @@
use ParseResult;
use ParseError;
use ParseResult;
#[derive(Debug, PartialEq)]
pub enum DayOfWeek {
@ -9,11 +9,10 @@ pub enum DayOfWeek {
Wednesday,
Thursday,
Friday,
Saturday
Saturday,
}
impl DayOfWeek {
pub fn to_numeral(&self) -> u32 {
match *self {
DayOfWeek::Sunday => 0,
@ -35,7 +34,7 @@ impl DayOfWeek {
4 => DayOfWeek::Thursday,
5 => DayOfWeek::Friday,
6 => DayOfWeek::Saturday,
_ => panic!("Unreachable.")
_ => panic!("Unreachable."),
}
}
@ -59,12 +58,12 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => {
let c = year / 100;
(c, year - 100 * c)
},
}
1 | 2 => {
let c = (year - 1) / 100;
(c, year - 1 - 100 * c)
},
_ => return Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
_ => return Err(ParseError::ImpossibleTimestamp("Invalid month")),
};
let e = match month {
@ -75,7 +74,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
8 => 1,
9 | 12 => 4,
10 => 6,
_ => panic!("Unreachable.")
_ => panic!("Unreachable."),
};
// This implementation is Gregorian-only.
@ -84,7 +83,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
1 => 5,
2 => 3,
3 => 1,
_ => panic!("Unreachable.")
_ => panic!("Unreachable."),
};
match (day + e + f + g + g / 4) % 7 {
@ -95,7 +94,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
4 => Ok(DayOfWeek::Thursday),
5 => Ok(DayOfWeek::Friday),
6 => Ok(DayOfWeek::Saturday),
_ => panic!("Unreachable.")
_ => panic!("Unreachable."),
}
}
@ -114,7 +113,6 @@ mod test {
#[test]
fn weekday_difference() {
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Sunday), 0);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Monday), 1);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Tuesday), 2);
@ -129,4 +127,4 @@ mod test {
assert_eq!(DayOfWeek::Friday.difference(&DayOfWeek::Sunday), 2);
assert_eq!(DayOfWeek::Saturday.difference(&DayOfWeek::Sunday), 1);
}
}
}