1
0
mirror of https://github.com/bspeice/dtparse synced 2024-12-22 04:18:09 -05:00

Merge pull request #19 from bspeice/tz_fix

Attempt to read timezones from chrono-tz
This commit is contained in:
bspeice 2019-11-29 15:45:44 -05:00 committed by GitHub
commit 9f1b8d4971
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 2846 additions and 880 deletions

View File

@ -78,24 +78,7 @@ matrix:
# Historical Rust versions
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.21.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.22.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.23.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.24.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.25.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.26.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.27.0
# WASM support
- env: TARGET=asmjs-unknown-emscripten USE_CARGO_WEB=true
rust: nightly
rust: 1.28.0
before_install:
- set -e

View File

@ -18,6 +18,7 @@ name = "dtparse"
[dependencies]
chrono = "0.4"
chrono-tz = "0.5"
lazy_static = "1.1"
num-traits = "0.2"
rust_decimal = "^0.10.1"

View File

@ -67,7 +67,7 @@ Further examples can be found in the [examples](examples) directory on internati
# Usage
`dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX,
`dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
tested against them.

View File

@ -8,7 +8,6 @@ use dtparse::ParserInfo;
use std::collections::HashMap;
fn main() {
// In this example, we'll just swap the default "months" parameter
// with a version in Russian. Lovingly taken from:
// https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244
@ -26,14 +25,24 @@ fn main() {
vec!["сен", "Сентябрь"],
vec!["окт", "Октябрь"],
vec!["ноя", "Ноябрь"],
vec!["дек", "Декабрь"]
vec!["дек", "Декабрь"],
]);
let p = Parser::new(info);
assert_eq!(
p.parse("10 Сентябрь 2015 10:20", None, None, false, false, None, false, &HashMap::new())
.unwrap().0,
p.parse(
"10 Сентябрь 2015 10:20",
None,
None,
false,
false,
None,
false,
&HashMap::new()
)
.unwrap()
.0,
NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0)
);
}

View File

@ -63,7 +63,7 @@
//!
//! # Usage
//!
//! `dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX,
//! `dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
//! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
//! tested against them.
//!
@ -73,6 +73,7 @@
extern crate lazy_static;
extern crate chrono;
extern crate chrono_tz;
extern crate num_traits;
extern crate rust_decimal;
@ -83,12 +84,15 @@ use chrono::Local;
use chrono::NaiveDate;
use chrono::NaiveDateTime;
use chrono::NaiveTime;
use chrono::Offset;
use chrono::TimeZone;
use chrono::Timelike;
use chrono_tz::Tz;
use num_traits::cast::ToPrimitive;
use rust_decimal::Decimal;
use rust_decimal::Error as DecimalError;
use std::collections::HashMap;
use std::cmp::min;
use std::collections::HashMap;
use std::num::ParseIntError;
use std::str::FromStr;
use std::vec::Vec;
@ -228,12 +232,10 @@ impl Default for ParserInfo {
let century = year / 100 * 100;
ParserInfo {
jump: parse_info(vec![
vec![
" ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of",
"st", "nd", "rd", "th",
],
]),
jump: parse_info(vec![vec![
" ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st",
"nd", "rd", "th",
]]),
weekday: parse_info(vec![
vec!["Mon", "Monday"],
vec!["Tue", "Tues", "Tuesday"],
@ -341,7 +343,8 @@ impl ParserInfo {
if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) {
res.tzname = Some("UTC".to_owned());
res.tzoffset = Some(0);
} else if res.tzoffset != Some(0) && res.tzname.is_some()
} else if res.tzoffset != Some(0)
&& res.tzname.is_some()
&& self.utczone_index(res.tzname.as_ref().unwrap())
{
res.tzoffset = Some(0);
@ -358,16 +361,16 @@ fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
};
match month {
2 => if leap_year {
2 => {
if leap_year {
Ok(29)
} else {
Ok(28)
},
}
}
1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31),
4 | 6 | 9 | 11 => Ok(30),
_ => {
Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
_ => Err(ParseError::ImpossibleTimestamp("Invalid month")),
}
}
@ -421,9 +424,7 @@ impl YMD {
Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
Some(YMDLabel::Day) => {
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
}
}
@ -435,9 +436,7 @@ impl YMD {
Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
Some(YMDLabel::Day) => {
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
}
}
@ -498,19 +497,15 @@ impl YMD {
}
if self._ymd.len() != strids.len() {
return Err(ParseError::YearMonthDayError("Tried to resolve year, month, and day without enough information"));
return Err(ParseError::YearMonthDayError(
"Tried to resolve year, month, and day without enough information",
));
}
Ok((
strids
.get(&YMDLabel::Year)
.map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Month)
.map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Day)
.map(|i| self._ymd[*i]),
strids.get(&YMDLabel::Year).map(|i| self._ymd[*i]),
strids.get(&YMDLabel::Month).map(|i| self._ymd[*i]),
strids.get(&YMDLabel::Day).map(|i| self._ymd[*i]),
))
}
@ -523,28 +518,24 @@ impl YMD {
let len_ymd = self._ymd.len();
let mut strids: HashMap<YMDLabel, usize> = HashMap::new();
self.ystridx
.map(|u| strids.insert(YMDLabel::Year, u));
self.mstridx
.map(|u| strids.insert(YMDLabel::Month, u));
self.dstridx
.map(|u| strids.insert(YMDLabel::Day, u));
self.ystridx.map(|u| strids.insert(YMDLabel::Year, u));
self.mstridx.map(|u| strids.insert(YMDLabel::Month, u));
self.dstridx.map(|u| strids.insert(YMDLabel::Day, u));
// TODO: More Rustiomatic way of doing this?
if len_ymd == strids.len() && !strids.is_empty()
|| (len_ymd == 3 && strids.len() == 2)
{
if len_ymd == strids.len() && !strids.is_empty() || (len_ymd == 3 && strids.len() == 2) {
return self.resolve_from_stridxs(&mut strids);
};
// Received year, month, day, and ???
if len_ymd > 3 {
return Err(ParseError::YearMonthDayError("Received extra tokens in resolving year, month, and day"));
return Err(ParseError::YearMonthDayError(
"Received extra tokens in resolving year, month, and day",
));
}
match (len_ymd, self.mstridx) {
(1, Some(val)) |
(2, Some(val)) => {
(1, Some(val)) | (2, Some(val)) => {
let other = if len_ymd == 1 {
self._ymd[0]
} else {
@ -554,7 +545,7 @@ impl YMD {
return Ok((Some(other), Some(self._ymd[val]), None));
}
return Ok((None, Some(self._ymd[val]), Some(other)));
},
}
(2, None) => {
if self._ymd[0] > 31 {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None));
@ -566,28 +557,29 @@ impl YMD {
return Ok((None, Some(self._ymd[1]), Some(self._ymd[0])));
}
return Ok((None, Some(self._ymd[0]), Some(self._ymd[1])));
},
}
(3, Some(0)) => {
if self._ymd[1] > 31 {
return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2])));
}
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
},
}
(3, Some(1)) => {
if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
}
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
},
}
(3, Some(2)) => {
// It was in the original docs, so: WTF!?
if self._ymd[1] > 31 {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
}
return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
},
}
(3, None) => {
if self._ymd[0] > 31 || self.ystridx == Some(0)
if self._ymd[0] > 31
|| self.ystridx == Some(0)
|| (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31)
{
if dayfirst && self._ymd[2] <= 12 {
@ -598,8 +590,10 @@ impl YMD {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
}
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
},
(_, _) => { return Ok((None, None, None)); },
}
(_, _) => {
return Ok((None, None, None));
}
}
}
}
@ -754,7 +748,9 @@ impl Parser {
}
i += 2;
} else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " "
} else if i + 4 < len_l
&& l[i + 1] == l[i + 3]
&& l[i + 3] == " "
&& self.info.pertain_index(&l[i + 2])
{
// Jan of 01
@ -792,7 +788,7 @@ impl Parser {
} else {
"+".to_owned()
};
l[i+1] = item;
l[i + 1] = item;
res.tzoffset = None;
@ -828,8 +824,11 @@ impl Parser {
Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));
let tzname = res.tzname.clone();
if i + 5 < len_l && self.info.jump_index(&l[i + 2]) && l[i + 3] == "("
&& l[i + 5] == ")" && 3 <= l[i + 4].len()
if i + 5 < len_l
&& self.info.jump_index(&l[i + 2])
&& l[i + 3] == "("
&& l[i + 5] == ")"
&& 3 <= l[i + 4].len()
&& self.could_be_tzname(res.hour, &tzname, None, &l[i + 4])
{
// (GMT)
@ -875,7 +874,10 @@ impl Parser {
.chars()
.all(|c| 65u8 as char <= c && c <= 90u8 as char);
hour.is_some() && tzname.is_none() && tzoffset.is_none() && token.len() <= 5
hour.is_some()
&& tzname.is_none()
&& tzoffset.is_none()
&& token.len() <= 5
&& all_ascii_upper
}
@ -899,7 +901,11 @@ impl Parser {
Ok(val_is_ampm)
}
fn build_naive(&self, res: &ParsingResult, default: &NaiveDateTime) -> ParseResult<NaiveDateTime> {
fn build_naive(
&self,
res: &ParsingResult,
default: &NaiveDateTime,
) -> ParseResult<NaiveDateTime> {
let y = res.year.unwrap_or_else(|| default.year());
let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
@ -919,7 +925,10 @@ impl Parser {
let d = NaiveDate::from_ymd(
y,
m,
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?)
min(
res.day.unwrap_or(default.day() as i32) as u32,
days_in_month(y, m as i32)?,
),
);
let d = d + d_offset;
@ -927,9 +936,11 @@ impl Parser {
let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
let second = res.second.unwrap_or(default.second() as i32) as u32;
let microsecond = res.microsecond
let microsecond = res
.microsecond
.unwrap_or(default.timestamp_subsec_micros() as i32) as u32;
let t = NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
let t =
NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
if hour >= 24 {
ParseError::ImpossibleTimestamp("Invalid hour")
} else if minute >= 60 {
@ -948,16 +959,17 @@ impl Parser {
fn build_tzaware(
&self,
_dt: &NaiveDateTime,
dt: &NaiveDateTime,
res: &ParsingResult,
tzinfos: &HashMap<String, i32>,
) -> ParseResult<Option<FixedOffset>> {
// TODO: Actual timezone support
if let Some(offset) = res.tzoffset {
Ok(Some(FixedOffset::east(offset)))
} else if res.tzoffset == None
&& (res.tzname == Some(" ".to_owned()) || res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned()) || res.tzname == None)
&& (res.tzname == Some(" ".to_owned())
|| res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned())
|| res.tzname == None)
{
Ok(None)
} else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
@ -965,9 +977,15 @@ impl Parser {
*tzinfos.get(res.tzname.as_ref().unwrap()).unwrap(),
)))
} else if res.tzname.is_some() {
// TODO: Dateutil issues a warning/deprecation notice here. Should we force the issue?
println!("tzname {} identified but not understood. Ignoring for the time being, but behavior is subject to change.", res.tzname.as_ref().unwrap());
let tzname = res.tzname.as_ref().unwrap();
let tz: Result<Tz, String> = tzname.parse();
if tz.is_ok() {
let offset = tz.unwrap().offset_from_local_datetime(dt).unwrap().fix();
Ok(Some(offset))
} else {
println!("tzname {} identified but not understood ({}). Ignoring for the time being, but behavior is subject to change.", tzname, tz.unwrap_err());
Ok(None)
}
} else {
Err(ParseError::TimezoneUnsupported)
}
@ -991,7 +1009,9 @@ impl Parser {
// TODO: I miss the `x in y` syntax
// TODO: Decompose this logic a bit
if ymd.len() == 3 && (len_li == 2 || len_li == 4) && res.hour.is_none()
if ymd.len() == 3
&& (len_li == 2 || len_li == 4)
&& res.hour.is_none()
&& (idx + 1 >= len_l
|| (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none()))
{
@ -1022,7 +1042,11 @@ impl Parser {
} else if vec![8, 12, 14].contains(&len_li) {
// YYMMDD
let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?;
ymd.append(
s[..4].parse::<i32>().unwrap(),
&s[..4],
Some(YMDLabel::Year),
)?;
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?;
@ -1160,7 +1184,7 @@ impl Parser {
len_l - 2
} else if idx > 1 {
idx - 2
} else if len_l == 0{
} else if len_l == 0 {
panic!("Attempting to find_hms_index() wih no tokens.");
} else {
0
@ -1168,13 +1192,18 @@ impl Parser {
if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() {
hms_idx = Some(idx + 1)
} else if allow_jump && idx + 2 < len_l && tokens[idx + 1] == " "
} else if allow_jump
&& idx + 2 < len_l
&& tokens[idx + 1] == " "
&& info.hms_index(&tokens[idx + 2]).is_some()
{
hms_idx = Some(idx + 2)
} else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() {
hms_idx = Some(idx - 1)
} else if len_l > 0 && idx > 0 && idx == len_l - 1 && tokens[idx - 1] == " "
} else if len_l > 0
&& idx > 0
&& idx == len_l - 1
&& tokens[idx - 1] == " "
&& info.hms_index(&tokens[idx_minus_two]).is_some()
{
hms_idx = Some(idx - 2)

View File

@ -7,18 +7,36 @@ use Parser;
#[test]
fn test_fuzz() {
assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::ImpossibleTimestamp("Invalid month")));
assert_eq!(
parse("\x2D\x38\x31\x39\x34\x38\x34"),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
);
// Garbage in the third delimited field
assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
Err(ParseError::UnrecognizedFormat));
assert_eq!(
parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
Err(ParseError::UnrecognizedFormat)
);
// OverflowError: Python int too large to convert to C long
// assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour));
let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0);
let p = Parser::default();
let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, &HashMap::new()).unwrap();
let res = p
.parse(
"\x0D\x31",
None,
None,
false,
false,
Some(&default),
false,
&HashMap::new(),
)
.unwrap();
assert_eq!(res.0, default);
assert_eq!(parse("\x2D\x2D\x32\x31\x38\x6D"), Err(ParseError::ImpossibleTimestamp("Invalid minute")));
assert_eq!(
parse("\x2D\x2D\x32\x31\x38\x6D"),
Err(ParseError::ImpossibleTimestamp("Invalid minute"))
);
}

View File

@ -1,3 +1,4 @@
mod fuzzing;
mod pycompat_parser;
mod pycompat_tokenizer;
mod tz;

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,3 @@
//! This code has been generated by running the `build_pycompat_tokenizer.py` script
//! in the repository root. Please do not edit it, as your edits will be destroyed
//! upon re-running code generation.
@ -12,7 +11,9 @@ fn tokenize_assert(test_str: &str, comparison: Vec<&str>) {
#[test]
fn test_tokenize0() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28"];
let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28",
];
tokenize_assert("Thu Sep 25 10:36:28", comp);
}
@ -294,7 +295,9 @@ fn test_tokenize46() {
#[test]
fn test_tokenize47() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003"];
let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 2003", comp);
}
@ -306,7 +309,9 @@ fn test_tokenize48() {
#[test]
fn test_tokenize49() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41"];
let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41",
];
tokenize_assert("2003-09-25T10:49:41", comp);
}
@ -354,7 +359,9 @@ fn test_tokenize56() {
#[test]
fn test_tokenize57() {
let comp = vec!["2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502"];
let comp = vec![
"2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502",
];
tokenize_assert("2003-09-25 10:49:41,502", comp);
}
@ -510,7 +517,10 @@ fn test_tokenize82() {
#[test]
fn test_tokenize83() {
let comp = vec![" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":", "01", ":", "02", " ", " ", " ", "am", " ", " "];
let comp = vec![
" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":",
"01", ":", "02", " ", " ", " ", "am", " ", " ",
];
tokenize_assert(" July 4 , 1976 12:01:02 am ", comp);
}
@ -522,7 +532,9 @@ fn test_tokenize84() {
#[test]
fn test_tokenize85() {
let comp = vec!["1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM"];
let comp = vec![
"1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM",
];
tokenize_assert("1996.July.10 AD 12:08 PM", comp);
}
@ -558,25 +570,33 @@ fn test_tokenize90() {
#[test]
fn test_tokenize91() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"];
let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp);
}
#[test]
fn test_tokenize92() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"];
let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp);
}
#[test]
fn test_tokenize93() {
let comp = vec!["July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am"];
let comp = vec![
"July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am",
];
tokenize_assert("July 4, 1976 12:01:02 am", comp);
}
#[test]
fn test_tokenize94() {
let comp = vec!["Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995"];
let comp = vec![
"Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995",
];
tokenize_assert("Mon Jan 2 04:24:27 1995", comp);
}
@ -588,7 +608,9 @@ fn test_tokenize95() {
#[test]
fn test_tokenize96() {
let comp = vec!["Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578"];
let comp = vec![
"Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578",
];
tokenize_assert("Jan 1 1999 11:23:34.578", comp);
}
@ -618,13 +640,17 @@ fn test_tokenize100() {
#[test]
fn test_tokenize101() {
let comp = vec!["0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"];
let comp = vec![
"0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0099-01-01T00:00:00", comp);
}
#[test]
fn test_tokenize102() {
let comp = vec!["0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"];
let comp = vec![
"0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0031-01-01T00:00:00", comp);
}
@ -666,31 +692,42 @@ fn test_tokenize108() {
#[test]
fn test_tokenize109() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"];
let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
}
#[test]
fn test_tokenize110() {
let comp = vec!["2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu"];
let comp = vec![
"2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu",
];
tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp);
}
#[test]
fn test_tokenize111() {
let comp = vec!["Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-", "0300"];
let comp = vec![
"Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-",
"0300",
];
tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp);
}
#[test]
fn test_tokenize112() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00"];
let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41.5-03:00", comp);
}
#[test]
fn test_tokenize113() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00"];
let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41-03:00", comp);
}
@ -708,19 +745,27 @@ fn test_tokenize115() {
#[test]
fn test_tokenize116() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3"];
let comp = vec![
"2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3",
];
tokenize_assert("2018-08-10 10:00:00 UTC+3", comp);
}
#[test]
fn test_tokenize117() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-", "4"];
let comp = vec![
"2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-",
"4",
];
tokenize_assert("2018-08-10 03:36:47 PM GMT-4", comp);
}
#[test]
fn test_tokenize118() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-", "02", ":", "00"];
let comp = vec![
"2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-",
"02", ":", "00",
];
tokenize_assert("2018-08-10 04:15:00 AM Z-02:00", comp);
}
@ -828,91 +873,213 @@ fn test_tokenize135() {
#[test]
fn test_tokenize136() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"];
let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
}
#[test]
fn test_tokenize137() {
let comp = vec!["1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ", "PDT"];
let comp = vec![
"1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ",
"PDT",
];
tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp);
}
#[test]
fn test_tokenize138() {
let comp = vec!["Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30", ":", "42", "pm", " ", "PST"];
let comp = vec![
"Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30",
":", "42", "pm", " ", "PST",
];
tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp);
}
#[test]
fn test_tokenize139() {
let comp = vec!["November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am", " ", "EST"];
let comp = vec![
"November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am",
" ", "EST",
];
tokenize_assert("November 5, 1994, 8:15:30 am EST", comp);
}
#[test]
fn test_tokenize140() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00"];
let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00",
];
tokenize_assert("1994-11-05T08:15:30-05:00", comp);
}
#[test]
fn test_tokenize141() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z"];
let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z",
];
tokenize_assert("1994-11-05T08:15:30Z", comp);
}
#[test]
fn test_tokenize142() {
let comp = vec!["1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z"];
let comp = vec![
"1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z",
];
tokenize_assert("1976-07-04T00:01:02Z", comp);
}
#[test]
fn test_tokenize143() {
let comp = vec!["Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995"];
let comp = vec![
"Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995",
];
tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp);
}
#[test]
fn test_tokenize144() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."];
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp);
let comp = vec![
"Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
}
#[test]
fn test_tokenize145() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."];
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp);
let comp = vec![
"Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
}
#[test]
fn test_tokenize146() {
let comp = vec!["I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ", "1974"];
let comp = vec![
"I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ",
"1974",
];
tokenize_assert("I have a meeting on March 1, 1974", comp);
}
#[test]
fn test_tokenize147() {
let comp = vec!["On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ", "going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ", "Mars"];
tokenize_assert("On June 8th, 2020, I am going to be the first man on Mars", comp);
let comp = vec![
"On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ",
"going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ",
"Mars",
];
tokenize_assert(
"On June 8th, 2020, I am going to be the first man on Mars",
comp,
);
}
#[test]
fn test_tokenize148() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003"];
tokenize_assert("Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", comp);
let comp = vec![
"Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset",
" ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",",
" ", "2003",
];
tokenize_assert(
"Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003",
comp,
);
}
#[test]
fn test_tokenize149() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset"];
tokenize_assert("Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", comp);
let comp = vec![
"Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December",
" ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on",
" ", "Sunset",
];
tokenize_assert(
"Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset",
comp,
);
}
#[test]
fn test_tokenize150() {
let comp = vec!["Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ", "going", " ", "to", " ", "see", " ", "you", " ", "there", "?"];
let comp = vec![
"Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ",
"going", " ", "to", " ", "see", " ", "you", " ", "there", "?",
];
tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp);
}

20
src/tests/tz.rs Normal file
View File

@ -0,0 +1,20 @@
use parse;
#[test]
fn est() {
// Issue originally reported in https://github.com/bspeice/dtparse/issues/18
let dt = parse("Fri, 21 Aug 2015 18:37:44 EST");
assert!(dt.is_ok());
assert!(dt.unwrap().1.is_some());
}
#[test]
fn cest() {
// Issue originally reported in https://github.com/bspeice/dtparse/issues/18
let dt = parse("Fri, 21 Aug 2015 18:37:44 CEST");
assert!(dt.is_ok());
// TODO: Fix
// assert!(dt.unwrap().1.is_some());
}

View File

@ -14,7 +14,6 @@ pub(crate) enum ParseState {
}
impl Tokenizer {
pub(crate) fn new(parse_string: &str) -> Self {
Tokenizer {
token_stack: vec![],
@ -92,7 +91,7 @@ impl Iterator for Tokenizer {
} else {
break;
}
},
}
ParseState::Alpha => {
seenletters = true;
if self.isword(nextchar) {
@ -105,19 +104,21 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar);
break;
}
},
}
ParseState::Numeric => {
if self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
token.as_mut().unwrap().push(nextchar);
} else if nextchar == '.' || (nextchar == ',' && token.as_ref().unwrap().len() >= 2) {
} else if nextchar == '.'
|| (nextchar == ',' && token.as_ref().unwrap().len() >= 2)
{
token.as_mut().unwrap().push(nextchar);
state = ParseState::NumericDecimal;
} else {
self.parse_string.push(nextchar);
break;
}
},
}
ParseState::AlphaDecimal => {
seenletters = true;
if nextchar == '.' || self.isword(nextchar) {
@ -130,7 +131,7 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar);
break;
}
},
}
ParseState::NumericDecimal => {
if nextchar == '.' || self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
@ -150,7 +151,12 @@ impl Iterator for Tokenizer {
// We do something slightly different to express the same logic
if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal {
// UNWRAP: The state check guarantees that we have a value
let dot_count = token.as_ref().unwrap().chars().filter(|c| *c == '.').count();
let dot_count = token
.as_ref()
.unwrap()
.chars()
.filter(|c| *c == '.')
.count();
let last_char = token.as_ref().unwrap().chars().last();
let last_splittable = last_char == Some('.') || last_char == Some(',');

View File

@ -1,5 +1,5 @@
use ParseResult;
use ParseError;
use ParseResult;
#[derive(Debug, PartialEq)]
pub enum DayOfWeek {
@ -9,11 +9,10 @@ pub enum DayOfWeek {
Wednesday,
Thursday,
Friday,
Saturday
Saturday,
}
impl DayOfWeek {
pub fn to_numeral(&self) -> u32 {
match *self {
DayOfWeek::Sunday => 0,
@ -35,7 +34,7 @@ impl DayOfWeek {
4 => DayOfWeek::Thursday,
5 => DayOfWeek::Friday,
6 => DayOfWeek::Saturday,
_ => panic!("Unreachable.")
_ => panic!("Unreachable."),
}
}
@ -59,12 +58,12 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => {
let c = year / 100;
(c, year - 100 * c)
},
}
1 | 2 => {
let c = (year - 1) / 100;
(c, year - 1 - 100 * c)
},
_ => return Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
_ => return Err(ParseError::ImpossibleTimestamp("Invalid month")),
};
let e = match month {
@ -75,7 +74,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
8 => 1,
9 | 12 => 4,
10 => 6,
_ => panic!("Unreachable.")
_ => panic!("Unreachable."),
};
// This implementation is Gregorian-only.
@ -84,7 +83,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
1 => 5,
2 => 3,
3 => 1,
_ => panic!("Unreachable.")
_ => panic!("Unreachable."),
};
match (day + e + f + g + g / 4) % 7 {
@ -95,7 +94,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
4 => Ok(DayOfWeek::Thursday),
5 => Ok(DayOfWeek::Friday),
6 => Ok(DayOfWeek::Saturday),
_ => panic!("Unreachable.")
_ => panic!("Unreachable."),
}
}
@ -114,7 +113,6 @@ mod test {
#[test]
fn weekday_difference() {
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Sunday), 0);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Monday), 1);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Tuesday), 2);