1
0
mirror of https://github.com/bspeice/dtparse synced 2024-12-22 04:18:09 -05:00

Merge pull request #19 from bspeice/tz_fix

Attempt to read timezones from chrono-tz
This commit is contained in:
bspeice 2019-11-29 15:45:44 -05:00 committed by GitHub
commit 9f1b8d4971
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 2846 additions and 880 deletions

View File

@ -78,24 +78,7 @@ matrix:
# Historical Rust versions # Historical Rust versions
- env: TARGET=x86_64-unknown-linux-gnu - env: TARGET=x86_64-unknown-linux-gnu
rust: 1.21.0 rust: 1.28.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.22.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.23.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.24.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.25.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.26.0
- env: TARGET=x86_64-unknown-linux-gnu
rust: 1.27.0
# WASM support
- env: TARGET=asmjs-unknown-emscripten USE_CARGO_WEB=true
rust: nightly
before_install: before_install:
- set -e - set -e

View File

@ -18,6 +18,7 @@ name = "dtparse"
[dependencies] [dependencies]
chrono = "0.4" chrono = "0.4"
chrono-tz = "0.5"
lazy_static = "1.1" lazy_static = "1.1"
num-traits = "0.2" num-traits = "0.2"
rust_decimal = "^0.10.1" rust_decimal = "^0.10.1"

View File

@ -67,7 +67,7 @@ Further examples can be found in the [examples](examples) directory on internati
# Usage # Usage
`dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX, `dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
tested against them. tested against them.

View File

@ -8,7 +8,6 @@ use dtparse::ParserInfo;
use std::collections::HashMap; use std::collections::HashMap;
fn main() { fn main() {
// In this example, we'll just swap the default "months" parameter // In this example, we'll just swap the default "months" parameter
// with a version in Russian. Lovingly taken from: // with a version in Russian. Lovingly taken from:
// https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244 // https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244
@ -26,14 +25,24 @@ fn main() {
vec!["сен", "Сентябрь"], vec!["сен", "Сентябрь"],
vec!["окт", "Октябрь"], vec!["окт", "Октябрь"],
vec!["ноя", "Ноябрь"], vec!["ноя", "Ноябрь"],
vec!["дек", "Декабрь"] vec!["дек", "Декабрь"],
]); ]);
let p = Parser::new(info); let p = Parser::new(info);
assert_eq!( assert_eq!(
p.parse("10 Сентябрь 2015 10:20", None, None, false, false, None, false, &HashMap::new()) p.parse(
.unwrap().0, "10 Сентябрь 2015 10:20",
None,
None,
false,
false,
None,
false,
&HashMap::new()
)
.unwrap()
.0,
NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0) NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0)
); );
} }

View File

@ -63,7 +63,7 @@
//! //!
//! # Usage //! # Usage
//! //!
//! `dtparse` requires a minimum Rust version of 1.21 to build, but is tested on Windows, OSX, //! `dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
//! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not //! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
//! tested against them. //! tested against them.
//! //!
@ -73,6 +73,7 @@
extern crate lazy_static; extern crate lazy_static;
extern crate chrono; extern crate chrono;
extern crate chrono_tz;
extern crate num_traits; extern crate num_traits;
extern crate rust_decimal; extern crate rust_decimal;
@ -83,12 +84,15 @@ use chrono::Local;
use chrono::NaiveDate; use chrono::NaiveDate;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use chrono::NaiveTime; use chrono::NaiveTime;
use chrono::Offset;
use chrono::TimeZone;
use chrono::Timelike; use chrono::Timelike;
use chrono_tz::Tz;
use num_traits::cast::ToPrimitive; use num_traits::cast::ToPrimitive;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use rust_decimal::Error as DecimalError; use rust_decimal::Error as DecimalError;
use std::collections::HashMap;
use std::cmp::min; use std::cmp::min;
use std::collections::HashMap;
use std::num::ParseIntError; use std::num::ParseIntError;
use std::str::FromStr; use std::str::FromStr;
use std::vec::Vec; use std::vec::Vec;
@ -228,12 +232,10 @@ impl Default for ParserInfo {
let century = year / 100 * 100; let century = year / 100 * 100;
ParserInfo { ParserInfo {
jump: parse_info(vec![ jump: parse_info(vec![vec![
vec![ " ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st",
" ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "nd", "rd", "th",
"st", "nd", "rd", "th", ]]),
],
]),
weekday: parse_info(vec![ weekday: parse_info(vec![
vec!["Mon", "Monday"], vec!["Mon", "Monday"],
vec!["Tue", "Tues", "Tuesday"], vec!["Tue", "Tues", "Tuesday"],
@ -341,7 +343,8 @@ impl ParserInfo {
if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) { if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) {
res.tzname = Some("UTC".to_owned()); res.tzname = Some("UTC".to_owned());
res.tzoffset = Some(0); res.tzoffset = Some(0);
} else if res.tzoffset != Some(0) && res.tzname.is_some() } else if res.tzoffset != Some(0)
&& res.tzname.is_some()
&& self.utczone_index(res.tzname.as_ref().unwrap()) && self.utczone_index(res.tzname.as_ref().unwrap())
{ {
res.tzoffset = Some(0); res.tzoffset = Some(0);
@ -358,16 +361,16 @@ fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
}; };
match month { match month {
2 => if leap_year { 2 => {
if leap_year {
Ok(29) Ok(29)
} else { } else {
Ok(28) Ok(28)
}, }
}
1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31), 1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31),
4 | 6 | 9 | 11 => Ok(30), 4 | 6 | 9 | 11 => Ok(30),
_ => { _ => Err(ParseError::ImpossibleTimestamp("Invalid month")),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
} }
} }
@ -421,9 +424,7 @@ impl YMD {
Some(YMDLabel::Month) => { Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month")) return Err(ParseError::ImpossibleTimestamp("Invalid month"))
} }
Some(YMDLabel::Day) => { Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
} }
} }
@ -435,9 +436,7 @@ impl YMD {
Some(YMDLabel::Month) => { Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month")) return Err(ParseError::ImpossibleTimestamp("Invalid month"))
} }
Some(YMDLabel::Day) => { Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
} }
} }
@ -498,19 +497,15 @@ impl YMD {
} }
if self._ymd.len() != strids.len() { if self._ymd.len() != strids.len() {
return Err(ParseError::YearMonthDayError("Tried to resolve year, month, and day without enough information")); return Err(ParseError::YearMonthDayError(
"Tried to resolve year, month, and day without enough information",
));
} }
Ok(( Ok((
strids strids.get(&YMDLabel::Year).map(|i| self._ymd[*i]),
.get(&YMDLabel::Year) strids.get(&YMDLabel::Month).map(|i| self._ymd[*i]),
.map(|i| self._ymd[*i]), strids.get(&YMDLabel::Day).map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Month)
.map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Day)
.map(|i| self._ymd[*i]),
)) ))
} }
@ -523,28 +518,24 @@ impl YMD {
let len_ymd = self._ymd.len(); let len_ymd = self._ymd.len();
let mut strids: HashMap<YMDLabel, usize> = HashMap::new(); let mut strids: HashMap<YMDLabel, usize> = HashMap::new();
self.ystridx self.ystridx.map(|u| strids.insert(YMDLabel::Year, u));
.map(|u| strids.insert(YMDLabel::Year, u)); self.mstridx.map(|u| strids.insert(YMDLabel::Month, u));
self.mstridx self.dstridx.map(|u| strids.insert(YMDLabel::Day, u));
.map(|u| strids.insert(YMDLabel::Month, u));
self.dstridx
.map(|u| strids.insert(YMDLabel::Day, u));
// TODO: More Rustiomatic way of doing this? // TODO: More Rustiomatic way of doing this?
if len_ymd == strids.len() && !strids.is_empty() if len_ymd == strids.len() && !strids.is_empty() || (len_ymd == 3 && strids.len() == 2) {
|| (len_ymd == 3 && strids.len() == 2)
{
return self.resolve_from_stridxs(&mut strids); return self.resolve_from_stridxs(&mut strids);
}; };
// Received year, month, day, and ??? // Received year, month, day, and ???
if len_ymd > 3 { if len_ymd > 3 {
return Err(ParseError::YearMonthDayError("Received extra tokens in resolving year, month, and day")); return Err(ParseError::YearMonthDayError(
"Received extra tokens in resolving year, month, and day",
));
} }
match (len_ymd, self.mstridx) { match (len_ymd, self.mstridx) {
(1, Some(val)) | (1, Some(val)) | (2, Some(val)) => {
(2, Some(val)) => {
let other = if len_ymd == 1 { let other = if len_ymd == 1 {
self._ymd[0] self._ymd[0]
} else { } else {
@ -554,7 +545,7 @@ impl YMD {
return Ok((Some(other), Some(self._ymd[val]), None)); return Ok((Some(other), Some(self._ymd[val]), None));
} }
return Ok((None, Some(self._ymd[val]), Some(other))); return Ok((None, Some(self._ymd[val]), Some(other)));
}, }
(2, None) => { (2, None) => {
if self._ymd[0] > 31 { if self._ymd[0] > 31 {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None)); return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None));
@ -566,28 +557,29 @@ impl YMD {
return Ok((None, Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((None, Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((None, Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((None, Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(3, Some(0)) => { (3, Some(0)) => {
if self._ymd[1] > 31 { if self._ymd[1] > 31 {
return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2]))); return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(3, Some(1)) => { (3, Some(1)) => {
if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) { if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2]))); return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
}, }
(3, Some(2)) => { (3, Some(2)) => {
// It was in the original docs, so: WTF!? // It was in the original docs, so: WTF!?
if self._ymd[1] > 31 { if self._ymd[1] > 31 {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1]))); return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
}, }
(3, None) => { (3, None) => {
if self._ymd[0] > 31 || self.ystridx == Some(0) if self._ymd[0] > 31
|| self.ystridx == Some(0)
|| (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31) || (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31)
{ {
if dayfirst && self._ymd[2] <= 12 { if dayfirst && self._ymd[2] <= 12 {
@ -598,8 +590,10 @@ impl YMD {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(_, _) => { return Ok((None, None, None)); }, (_, _) => {
return Ok((None, None, None));
}
} }
} }
} }
@ -754,7 +748,9 @@ impl Parser {
} }
i += 2; i += 2;
} else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " " } else if i + 4 < len_l
&& l[i + 1] == l[i + 3]
&& l[i + 3] == " "
&& self.info.pertain_index(&l[i + 2]) && self.info.pertain_index(&l[i + 2])
{ {
// Jan of 01 // Jan of 01
@ -828,8 +824,11 @@ impl Parser {
Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60)); Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));
let tzname = res.tzname.clone(); let tzname = res.tzname.clone();
if i + 5 < len_l && self.info.jump_index(&l[i + 2]) && l[i + 3] == "(" if i + 5 < len_l
&& l[i + 5] == ")" && 3 <= l[i + 4].len() && self.info.jump_index(&l[i + 2])
&& l[i + 3] == "("
&& l[i + 5] == ")"
&& 3 <= l[i + 4].len()
&& self.could_be_tzname(res.hour, &tzname, None, &l[i + 4]) && self.could_be_tzname(res.hour, &tzname, None, &l[i + 4])
{ {
// (GMT) // (GMT)
@ -875,7 +874,10 @@ impl Parser {
.chars() .chars()
.all(|c| 65u8 as char <= c && c <= 90u8 as char); .all(|c| 65u8 as char <= c && c <= 90u8 as char);
hour.is_some() && tzname.is_none() && tzoffset.is_none() && token.len() <= 5 hour.is_some()
&& tzname.is_none()
&& tzoffset.is_none()
&& token.len() <= 5
&& all_ascii_upper && all_ascii_upper
} }
@ -899,7 +901,11 @@ impl Parser {
Ok(val_is_ampm) Ok(val_is_ampm)
} }
fn build_naive(&self, res: &ParsingResult, default: &NaiveDateTime) -> ParseResult<NaiveDateTime> { fn build_naive(
&self,
res: &ParsingResult,
default: &NaiveDateTime,
) -> ParseResult<NaiveDateTime> {
let y = res.year.unwrap_or_else(|| default.year()); let y = res.year.unwrap_or_else(|| default.year());
let m = res.month.unwrap_or_else(|| default.month() as i32) as u32; let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
@ -919,7 +925,10 @@ impl Parser {
let d = NaiveDate::from_ymd( let d = NaiveDate::from_ymd(
y, y,
m, m,
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?) min(
res.day.unwrap_or(default.day() as i32) as u32,
days_in_month(y, m as i32)?,
),
); );
let d = d + d_offset; let d = d + d_offset;
@ -927,9 +936,11 @@ impl Parser {
let hour = res.hour.unwrap_or(default.hour() as i32) as u32; let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
let minute = res.minute.unwrap_or(default.minute() as i32) as u32; let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
let second = res.second.unwrap_or(default.second() as i32) as u32; let second = res.second.unwrap_or(default.second() as i32) as u32;
let microsecond = res.microsecond let microsecond = res
.microsecond
.unwrap_or(default.timestamp_subsec_micros() as i32) as u32; .unwrap_or(default.timestamp_subsec_micros() as i32) as u32;
let t = NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| { let t =
NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
if hour >= 24 { if hour >= 24 {
ParseError::ImpossibleTimestamp("Invalid hour") ParseError::ImpossibleTimestamp("Invalid hour")
} else if minute >= 60 { } else if minute >= 60 {
@ -948,16 +959,17 @@ impl Parser {
fn build_tzaware( fn build_tzaware(
&self, &self,
_dt: &NaiveDateTime, dt: &NaiveDateTime,
res: &ParsingResult, res: &ParsingResult,
tzinfos: &HashMap<String, i32>, tzinfos: &HashMap<String, i32>,
) -> ParseResult<Option<FixedOffset>> { ) -> ParseResult<Option<FixedOffset>> {
// TODO: Actual timezone support
if let Some(offset) = res.tzoffset { if let Some(offset) = res.tzoffset {
Ok(Some(FixedOffset::east(offset))) Ok(Some(FixedOffset::east(offset)))
} else if res.tzoffset == None } else if res.tzoffset == None
&& (res.tzname == Some(" ".to_owned()) || res.tzname == Some(".".to_owned()) && (res.tzname == Some(" ".to_owned())
|| res.tzname == Some("-".to_owned()) || res.tzname == None) || res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned())
|| res.tzname == None)
{ {
Ok(None) Ok(None)
} else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) { } else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
@ -965,9 +977,15 @@ impl Parser {
*tzinfos.get(res.tzname.as_ref().unwrap()).unwrap(), *tzinfos.get(res.tzname.as_ref().unwrap()).unwrap(),
))) )))
} else if res.tzname.is_some() { } else if res.tzname.is_some() {
// TODO: Dateutil issues a warning/deprecation notice here. Should we force the issue? let tzname = res.tzname.as_ref().unwrap();
println!("tzname {} identified but not understood. Ignoring for the time being, but behavior is subject to change.", res.tzname.as_ref().unwrap()); let tz: Result<Tz, String> = tzname.parse();
if tz.is_ok() {
let offset = tz.unwrap().offset_from_local_datetime(dt).unwrap().fix();
Ok(Some(offset))
} else {
println!("tzname {} identified but not understood ({}). Ignoring for the time being, but behavior is subject to change.", tzname, tz.unwrap_err());
Ok(None) Ok(None)
}
} else { } else {
Err(ParseError::TimezoneUnsupported) Err(ParseError::TimezoneUnsupported)
} }
@ -991,7 +1009,9 @@ impl Parser {
// TODO: I miss the `x in y` syntax // TODO: I miss the `x in y` syntax
// TODO: Decompose this logic a bit // TODO: Decompose this logic a bit
if ymd.len() == 3 && (len_li == 2 || len_li == 4) && res.hour.is_none() if ymd.len() == 3
&& (len_li == 2 || len_li == 4)
&& res.hour.is_none()
&& (idx + 1 >= len_l && (idx + 1 >= len_l
|| (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none())) || (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none()))
{ {
@ -1022,7 +1042,11 @@ impl Parser {
} else if vec![8, 12, 14].contains(&len_li) { } else if vec![8, 12, 14].contains(&len_li) {
// YYMMDD // YYMMDD
let s = &tokens[idx]; let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?; ymd.append(
s[..4].parse::<i32>().unwrap(),
&s[..4],
Some(YMDLabel::Year),
)?;
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?; ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?; ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?;
@ -1168,13 +1192,18 @@ impl Parser {
if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() { if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() {
hms_idx = Some(idx + 1) hms_idx = Some(idx + 1)
} else if allow_jump && idx + 2 < len_l && tokens[idx + 1] == " " } else if allow_jump
&& idx + 2 < len_l
&& tokens[idx + 1] == " "
&& info.hms_index(&tokens[idx + 2]).is_some() && info.hms_index(&tokens[idx + 2]).is_some()
{ {
hms_idx = Some(idx + 2) hms_idx = Some(idx + 2)
} else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() { } else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() {
hms_idx = Some(idx - 1) hms_idx = Some(idx - 1)
} else if len_l > 0 && idx > 0 && idx == len_l - 1 && tokens[idx - 1] == " " } else if len_l > 0
&& idx > 0
&& idx == len_l - 1
&& tokens[idx - 1] == " "
&& info.hms_index(&tokens[idx_minus_two]).is_some() && info.hms_index(&tokens[idx_minus_two]).is_some()
{ {
hms_idx = Some(idx - 2) hms_idx = Some(idx - 2)

View File

@ -7,18 +7,36 @@ use Parser;
#[test] #[test]
fn test_fuzz() { fn test_fuzz() {
assert_eq!(
assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::ImpossibleTimestamp("Invalid month"))); parse("\x2D\x38\x31\x39\x34\x38\x34"),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
);
// Garbage in the third delimited field // Garbage in the third delimited field
assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"), assert_eq!(
Err(ParseError::UnrecognizedFormat)); parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
Err(ParseError::UnrecognizedFormat)
);
// OverflowError: Python int too large to convert to C long // OverflowError: Python int too large to convert to C long
// assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour)); // assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour));
let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0); let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0);
let p = Parser::default(); let p = Parser::default();
let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, &HashMap::new()).unwrap(); let res = p
.parse(
"\x0D\x31",
None,
None,
false,
false,
Some(&default),
false,
&HashMap::new(),
)
.unwrap();
assert_eq!(res.0, default); assert_eq!(res.0, default);
assert_eq!(parse("\x2D\x2D\x32\x31\x38\x6D"), Err(ParseError::ImpossibleTimestamp("Invalid minute"))); assert_eq!(
parse("\x2D\x2D\x32\x31\x38\x6D"),
Err(ParseError::ImpossibleTimestamp("Invalid minute"))
);
} }

View File

@ -1,3 +1,4 @@
mod fuzzing; mod fuzzing;
mod pycompat_parser; mod pycompat_parser;
mod pycompat_tokenizer; mod pycompat_tokenizer;
mod tz;

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,3 @@
//! This code has been generated by running the `build_pycompat_tokenizer.py` script //! This code has been generated by running the `build_pycompat_tokenizer.py` script
//! in the repository root. Please do not edit it, as your edits will be destroyed //! in the repository root. Please do not edit it, as your edits will be destroyed
//! upon re-running code generation. //! upon re-running code generation.
@ -12,7 +11,9 @@ fn tokenize_assert(test_str: &str, comparison: Vec<&str>) {
#[test] #[test]
fn test_tokenize0() { fn test_tokenize0() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28",
];
tokenize_assert("Thu Sep 25 10:36:28", comp); tokenize_assert("Thu Sep 25 10:36:28", comp);
} }
@ -294,7 +295,9 @@ fn test_tokenize46() {
#[test] #[test]
fn test_tokenize47() { fn test_tokenize47() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 2003", comp);
} }
@ -306,7 +309,9 @@ fn test_tokenize48() {
#[test] #[test]
fn test_tokenize49() { fn test_tokenize49() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41",
];
tokenize_assert("2003-09-25T10:49:41", comp); tokenize_assert("2003-09-25T10:49:41", comp);
} }
@ -354,7 +359,9 @@ fn test_tokenize56() {
#[test] #[test]
fn test_tokenize57() { fn test_tokenize57() {
let comp = vec!["2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502"]; let comp = vec![
"2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502",
];
tokenize_assert("2003-09-25 10:49:41,502", comp); tokenize_assert("2003-09-25 10:49:41,502", comp);
} }
@ -510,7 +517,10 @@ fn test_tokenize82() {
#[test] #[test]
fn test_tokenize83() { fn test_tokenize83() {
let comp = vec![" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":", "01", ":", "02", " ", " ", " ", "am", " ", " "]; let comp = vec![
" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":",
"01", ":", "02", " ", " ", " ", "am", " ", " ",
];
tokenize_assert(" July 4 , 1976 12:01:02 am ", comp); tokenize_assert(" July 4 , 1976 12:01:02 am ", comp);
} }
@ -522,7 +532,9 @@ fn test_tokenize84() {
#[test] #[test]
fn test_tokenize85() { fn test_tokenize85() {
let comp = vec!["1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM"]; let comp = vec![
"1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM",
];
tokenize_assert("1996.July.10 AD 12:08 PM", comp); tokenize_assert("1996.July.10 AD 12:08 PM", comp);
} }
@ -558,25 +570,33 @@ fn test_tokenize90() {
#[test] #[test]
fn test_tokenize91() { fn test_tokenize91() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp); tokenize_assert("0:01:02 on July 4, 1976", comp);
} }
#[test] #[test]
fn test_tokenize92() { fn test_tokenize92() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp); tokenize_assert("0:01:02 on July 4, 1976", comp);
} }
#[test] #[test]
fn test_tokenize93() { fn test_tokenize93() {
let comp = vec!["July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am"]; let comp = vec![
"July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am",
];
tokenize_assert("July 4, 1976 12:01:02 am", comp); tokenize_assert("July 4, 1976 12:01:02 am", comp);
} }
#[test] #[test]
fn test_tokenize94() { fn test_tokenize94() {
let comp = vec!["Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995"]; let comp = vec![
"Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995",
];
tokenize_assert("Mon Jan 2 04:24:27 1995", comp); tokenize_assert("Mon Jan 2 04:24:27 1995", comp);
} }
@ -588,7 +608,9 @@ fn test_tokenize95() {
#[test] #[test]
fn test_tokenize96() { fn test_tokenize96() {
let comp = vec!["Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578"]; let comp = vec![
"Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578",
];
tokenize_assert("Jan 1 1999 11:23:34.578", comp); tokenize_assert("Jan 1 1999 11:23:34.578", comp);
} }
@ -618,13 +640,17 @@ fn test_tokenize100() {
#[test] #[test]
fn test_tokenize101() { fn test_tokenize101() {
let comp = vec!["0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; let comp = vec![
"0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0099-01-01T00:00:00", comp); tokenize_assert("0099-01-01T00:00:00", comp);
} }
#[test] #[test]
fn test_tokenize102() { fn test_tokenize102() {
let comp = vec!["0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; let comp = vec![
"0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0031-01-01T00:00:00", comp); tokenize_assert("0031-01-01T00:00:00", comp);
} }
@ -666,31 +692,42 @@ fn test_tokenize108() {
#[test] #[test]
fn test_tokenize109() { fn test_tokenize109() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
} }
#[test] #[test]
fn test_tokenize110() { fn test_tokenize110() {
let comp = vec!["2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu"]; let comp = vec![
"2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu",
];
tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp); tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp);
} }
#[test] #[test]
fn test_tokenize111() { fn test_tokenize111() {
let comp = vec!["Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-", "0300"]; let comp = vec![
"Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-",
"0300",
];
tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp); tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp);
} }
#[test] #[test]
fn test_tokenize112() { fn test_tokenize112() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41.5-03:00", comp); tokenize_assert("2003-09-25T10:49:41.5-03:00", comp);
} }
#[test] #[test]
fn test_tokenize113() { fn test_tokenize113() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41-03:00", comp); tokenize_assert("2003-09-25T10:49:41-03:00", comp);
} }
@ -708,19 +745,27 @@ fn test_tokenize115() {
#[test] #[test]
fn test_tokenize116() { fn test_tokenize116() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3"]; let comp = vec![
"2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3",
];
tokenize_assert("2018-08-10 10:00:00 UTC+3", comp); tokenize_assert("2018-08-10 10:00:00 UTC+3", comp);
} }
#[test] #[test]
fn test_tokenize117() { fn test_tokenize117() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-", "4"]; let comp = vec![
"2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-",
"4",
];
tokenize_assert("2018-08-10 03:36:47 PM GMT-4", comp); tokenize_assert("2018-08-10 03:36:47 PM GMT-4", comp);
} }
#[test] #[test]
fn test_tokenize118() { fn test_tokenize118() {
let comp = vec!["2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-", "02", ":", "00"]; let comp = vec![
"2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-",
"02", ":", "00",
];
tokenize_assert("2018-08-10 04:15:00 AM Z-02:00", comp); tokenize_assert("2018-08-10 04:15:00 AM Z-02:00", comp);
} }
@ -828,91 +873,213 @@ fn test_tokenize135() {
#[test] #[test]
fn test_tokenize136() { fn test_tokenize136() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
} }
#[test] #[test]
fn test_tokenize137() { fn test_tokenize137() {
let comp = vec!["1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ", "PDT"]; let comp = vec![
"1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ",
"PDT",
];
tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp); tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp);
} }
#[test] #[test]
fn test_tokenize138() { fn test_tokenize138() {
let comp = vec!["Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30", ":", "42", "pm", " ", "PST"]; let comp = vec![
"Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30",
":", "42", "pm", " ", "PST",
];
tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp); tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp);
} }
#[test] #[test]
fn test_tokenize139() { fn test_tokenize139() {
let comp = vec!["November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am", " ", "EST"]; let comp = vec![
"November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am",
" ", "EST",
];
tokenize_assert("November 5, 1994, 8:15:30 am EST", comp); tokenize_assert("November 5, 1994, 8:15:30 am EST", comp);
} }
#[test] #[test]
fn test_tokenize140() { fn test_tokenize140() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00"]; let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00",
];
tokenize_assert("1994-11-05T08:15:30-05:00", comp); tokenize_assert("1994-11-05T08:15:30-05:00", comp);
} }
#[test] #[test]
fn test_tokenize141() { fn test_tokenize141() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z"]; let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z",
];
tokenize_assert("1994-11-05T08:15:30Z", comp); tokenize_assert("1994-11-05T08:15:30Z", comp);
} }
#[test] #[test]
fn test_tokenize142() { fn test_tokenize142() {
let comp = vec!["1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z"]; let comp = vec![
"1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z",
];
tokenize_assert("1976-07-04T00:01:02Z", comp); tokenize_assert("1976-07-04T00:01:02Z", comp);
} }
#[test] #[test]
fn test_tokenize143() { fn test_tokenize143() {
let comp = vec!["Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995"]; let comp = vec![
"Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995",
];
tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp); tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp);
} }
#[test] #[test]
fn test_tokenize144() { fn test_tokenize144() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."]; let comp = vec![
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp); "Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
} }
#[test] #[test]
fn test_tokenize145() { fn test_tokenize145() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."]; let comp = vec![
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp); "Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
} }
#[test] #[test]
fn test_tokenize146() { fn test_tokenize146() {
let comp = vec!["I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ", "1974"]; let comp = vec![
"I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ",
"1974",
];
tokenize_assert("I have a meeting on March 1, 1974", comp); tokenize_assert("I have a meeting on March 1, 1974", comp);
} }
#[test] #[test]
fn test_tokenize147() { fn test_tokenize147() {
let comp = vec!["On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ", "going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ", "Mars"]; let comp = vec![
tokenize_assert("On June 8th, 2020, I am going to be the first man on Mars", comp); "On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ",
"going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ",
"Mars",
];
tokenize_assert(
"On June 8th, 2020, I am going to be the first man on Mars",
comp,
);
} }
#[test] #[test]
fn test_tokenize148() { fn test_tokenize148() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003"]; let comp = vec![
tokenize_assert("Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", comp); "Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset",
" ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",",
" ", "2003",
];
tokenize_assert(
"Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003",
comp,
);
} }
#[test] #[test]
fn test_tokenize149() { fn test_tokenize149() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset"]; let comp = vec![
tokenize_assert("Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", comp); "Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December",
" ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on",
" ", "Sunset",
];
tokenize_assert(
"Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset",
comp,
);
} }
#[test] #[test]
fn test_tokenize150() { fn test_tokenize150() {
let comp = vec!["Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ", "going", " ", "to", " ", "see", " ", "you", " ", "there", "?"]; let comp = vec![
"Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ",
"going", " ", "to", " ", "see", " ", "you", " ", "there", "?",
];
tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp); tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp);
} }

20
src/tests/tz.rs Normal file
View File

@ -0,0 +1,20 @@
use parse;
#[test]
fn est() {
// Issue originally reported in https://github.com/bspeice/dtparse/issues/18
let dt = parse("Fri, 21 Aug 2015 18:37:44 EST");
assert!(dt.is_ok());
assert!(dt.unwrap().1.is_some());
}
#[test]
fn cest() {
// Issue originally reported in https://github.com/bspeice/dtparse/issues/18
let dt = parse("Fri, 21 Aug 2015 18:37:44 CEST");
assert!(dt.is_ok());
// TODO: Fix
// assert!(dt.unwrap().1.is_some());
}

View File

@ -14,7 +14,6 @@ pub(crate) enum ParseState {
} }
impl Tokenizer { impl Tokenizer {
pub(crate) fn new(parse_string: &str) -> Self { pub(crate) fn new(parse_string: &str) -> Self {
Tokenizer { Tokenizer {
token_stack: vec![], token_stack: vec![],
@ -92,7 +91,7 @@ impl Iterator for Tokenizer {
} else { } else {
break; break;
} }
}, }
ParseState::Alpha => { ParseState::Alpha => {
seenletters = true; seenletters = true;
if self.isword(nextchar) { if self.isword(nextchar) {
@ -105,19 +104,21 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::Numeric => { ParseState::Numeric => {
if self.isnum(nextchar) { if self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
token.as_mut().unwrap().push(nextchar); token.as_mut().unwrap().push(nextchar);
} else if nextchar == '.' || (nextchar == ',' && token.as_ref().unwrap().len() >= 2) { } else if nextchar == '.'
|| (nextchar == ',' && token.as_ref().unwrap().len() >= 2)
{
token.as_mut().unwrap().push(nextchar); token.as_mut().unwrap().push(nextchar);
state = ParseState::NumericDecimal; state = ParseState::NumericDecimal;
} else { } else {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::AlphaDecimal => { ParseState::AlphaDecimal => {
seenletters = true; seenletters = true;
if nextchar == '.' || self.isword(nextchar) { if nextchar == '.' || self.isword(nextchar) {
@ -130,7 +131,7 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::NumericDecimal => { ParseState::NumericDecimal => {
if nextchar == '.' || self.isnum(nextchar) { if nextchar == '.' || self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
@ -150,7 +151,12 @@ impl Iterator for Tokenizer {
// We do something slightly different to express the same logic // We do something slightly different to express the same logic
if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal { if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal {
// UNWRAP: The state check guarantees that we have a value // UNWRAP: The state check guarantees that we have a value
let dot_count = token.as_ref().unwrap().chars().filter(|c| *c == '.').count(); let dot_count = token
.as_ref()
.unwrap()
.chars()
.filter(|c| *c == '.')
.count();
let last_char = token.as_ref().unwrap().chars().last(); let last_char = token.as_ref().unwrap().chars().last();
let last_splittable = last_char == Some('.') || last_char == Some(','); let last_splittable = last_char == Some('.') || last_char == Some(',');

View File

@ -1,5 +1,5 @@
use ParseResult;
use ParseError; use ParseError;
use ParseResult;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum DayOfWeek { pub enum DayOfWeek {
@ -9,11 +9,10 @@ pub enum DayOfWeek {
Wednesday, Wednesday,
Thursday, Thursday,
Friday, Friday,
Saturday Saturday,
} }
impl DayOfWeek { impl DayOfWeek {
pub fn to_numeral(&self) -> u32 { pub fn to_numeral(&self) -> u32 {
match *self { match *self {
DayOfWeek::Sunday => 0, DayOfWeek::Sunday => 0,
@ -35,7 +34,7 @@ impl DayOfWeek {
4 => DayOfWeek::Thursday, 4 => DayOfWeek::Thursday,
5 => DayOfWeek::Friday, 5 => DayOfWeek::Friday,
6 => DayOfWeek::Saturday, 6 => DayOfWeek::Saturday,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
} }
} }
@ -59,12 +58,12 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => { 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => {
let c = year / 100; let c = year / 100;
(c, year - 100 * c) (c, year - 100 * c)
}, }
1 | 2 => { 1 | 2 => {
let c = (year - 1) / 100; let c = (year - 1) / 100;
(c, year - 1 - 100 * c) (c, year - 1 - 100 * c)
}, }
_ => return Err(ParseError::ImpossibleTimestamp("Invalid month")) _ => return Err(ParseError::ImpossibleTimestamp("Invalid month")),
}; };
let e = match month { let e = match month {
@ -75,7 +74,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
8 => 1, 8 => 1,
9 | 12 => 4, 9 | 12 => 4,
10 => 6, 10 => 6,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
}; };
// This implementation is Gregorian-only. // This implementation is Gregorian-only.
@ -84,7 +83,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
1 => 5, 1 => 5,
2 => 3, 2 => 3,
3 => 1, 3 => 1,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
}; };
match (day + e + f + g + g / 4) % 7 { match (day + e + f + g + g / 4) % 7 {
@ -95,7 +94,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
4 => Ok(DayOfWeek::Thursday), 4 => Ok(DayOfWeek::Thursday),
5 => Ok(DayOfWeek::Friday), 5 => Ok(DayOfWeek::Friday),
6 => Ok(DayOfWeek::Saturday), 6 => Ok(DayOfWeek::Saturday),
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
} }
} }
@ -114,7 +113,6 @@ mod test {
#[test] #[test]
fn weekday_difference() { fn weekday_difference() {
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Sunday), 0); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Sunday), 0);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Monday), 1); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Monday), 1);
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Tuesday), 2); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Tuesday), 2);