mirror of
https://github.com/bspeice/dtparse
synced 2024-12-22 12:28:08 -05:00
Lots of fixes, but it turns out tokenization is broken
This commit is contained in:
parent
2b90bf6ed7
commit
9135962839
@ -83,6 +83,9 @@ tests = {
|
|||||||
'test_fuzzy_tzinfo': [
|
'test_fuzzy_tzinfo': [
|
||||||
'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.'
|
'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.'
|
||||||
],
|
],
|
||||||
|
'test_fuzzy_tokens_tzinfo': [
|
||||||
|
'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.'
|
||||||
|
],
|
||||||
'test_parse_default_ignore': [
|
'test_parse_default_ignore': [
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
@ -158,6 +161,17 @@ def test_fuzzy_tzinfo(i, s):
|
|||||||
|
|
||||||
return TEST_FUZZY_TZINFO.format(i=i, d=d, s=s, offset=int(d.tzinfo._offset.total_seconds()))
|
return TEST_FUZZY_TZINFO.format(i=i, d=d, s=s, offset=int(d.tzinfo._offset.total_seconds()))
|
||||||
|
|
||||||
|
|
||||||
|
def test_fuzzy_tokens_tzinfo(i, s):
|
||||||
|
d, tokens = parse(s, fuzzy_with_tokens=True)
|
||||||
|
|
||||||
|
r_tokens = ", ".join(list(map(lambda s: f'"{s}".to_owned()', tokens)))
|
||||||
|
|
||||||
|
return TEST_FUZZY_TOKENS_TZINFO.format(
|
||||||
|
i=i, d=d, s=s, offset=int(d.tzinfo._offset.total_seconds()),
|
||||||
|
tokens=r_tokens
|
||||||
|
)
|
||||||
|
|
||||||
# Here lies all the ugly junk.
|
# Here lies all the ugly junk.
|
||||||
TEST_HEADER = '''
|
TEST_HEADER = '''
|
||||||
extern crate chrono;
|
extern crate chrono;
|
||||||
@ -214,8 +228,8 @@ fn parse_and_assert(
|
|||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_and_assert_simple(
|
fn parse_and_assert_simple(
|
||||||
@ -223,14 +237,14 @@ fn parse_and_assert_simple(
|
|||||||
s: &str,
|
s: &str,
|
||||||
) {
|
) {
|
||||||
let rs_parsed = dtparse::parse(s).expect(&format!("Unable to parse date in Rust '{}'", s));
|
let rs_parsed = dtparse::parse(s).expect(&format!("Unable to parse date in Rust '{}'", s));
|
||||||
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for {}", s);
|
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for {}", s);
|
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for {}", s);
|
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for {}", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for {}", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_fuzzy_and_assert(
|
fn parse_fuzzy_and_assert(
|
||||||
@ -264,9 +278,9 @@ fn parse_fuzzy_and_assert(
|
|||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
assert_eq!(ptokens, rs_parsed.2, "Fuzzy mismatch for {}", s);
|
assert_eq!(ptokens, rs_parsed.2, "Tokens mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! rs_tzinfo_map {
|
macro_rules! rs_tzinfo_map {
|
||||||
@ -411,7 +425,7 @@ fn test_parse_default_ignore{i}() {{
|
|||||||
|
|
||||||
TEST_FUZZY_TZINFO = '''
|
TEST_FUZZY_TZINFO = '''
|
||||||
#[test]
|
#[test]
|
||||||
fn test_fuzzy{i}() {{
|
fn test_fuzzy_tzinfo{i}() {{
|
||||||
let info = ParserInfo::default();
|
let info = ParserInfo::default();
|
||||||
let pdt = PyDateTime {{
|
let pdt = PyDateTime {{
|
||||||
year: {d.year}, month: {d.month}, day: {d.day},
|
year: {d.year}, month: {d.month}, day: {d.day},
|
||||||
@ -422,6 +436,20 @@ fn test_fuzzy{i}() {{
|
|||||||
None, false, HashMap::new());
|
None, false, HashMap::new());
|
||||||
}}\n'''
|
}}\n'''
|
||||||
|
|
||||||
|
TEST_FUZZY_TOKENS_TZINFO = '''
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_tokens_tzinfo{i}() {{
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {{
|
||||||
|
year: {d.year}, month: {d.month}, day: {d.day},
|
||||||
|
hour: {d.hour}, minute: {d.minute}, second: {d.second},
|
||||||
|
micros: {d.microsecond}, tzo: Some({offset})
|
||||||
|
}};
|
||||||
|
let tokens = vec![{tokens}];
|
||||||
|
parse_fuzzy_and_assert(pdt, Some(tokens), info, "{s}", None, None, true, true,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}}\n'''
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
83
src/lib.rs
83
src/lib.rs
@ -1,6 +1,3 @@
|
|||||||
#![allow(dead_code)]
|
|
||||||
#![allow(unused)]
|
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate lazy_static;
|
extern crate lazy_static;
|
||||||
|
|
||||||
@ -8,7 +5,6 @@ extern crate chrono;
|
|||||||
extern crate num_traits;
|
extern crate num_traits;
|
||||||
extern crate rust_decimal;
|
extern crate rust_decimal;
|
||||||
|
|
||||||
use chrono::DateTime;
|
|
||||||
use chrono::Datelike;
|
use chrono::Datelike;
|
||||||
use chrono::Duration;
|
use chrono::Duration;
|
||||||
use chrono::FixedOffset;
|
use chrono::FixedOffset;
|
||||||
@ -17,7 +13,6 @@ use chrono::NaiveDate;
|
|||||||
use chrono::NaiveDateTime;
|
use chrono::NaiveDateTime;
|
||||||
use chrono::NaiveTime;
|
use chrono::NaiveTime;
|
||||||
use chrono::Timelike;
|
use chrono::Timelike;
|
||||||
use chrono::Utc;
|
|
||||||
use num_traits::cast::ToPrimitive;
|
use num_traits::cast::ToPrimitive;
|
||||||
use rust_decimal::Decimal;
|
use rust_decimal::Decimal;
|
||||||
use rust_decimal::Error as DecimalError;
|
use rust_decimal::Error as DecimalError;
|
||||||
@ -33,7 +28,6 @@ mod weekday;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
use tokenize::ParseState;
|
|
||||||
use tokenize::Tokenizer;
|
use tokenize::Tokenizer;
|
||||||
use weekday::day_of_week;
|
use weekday::day_of_week;
|
||||||
use weekday::DayOfWeek;
|
use weekday::DayOfWeek;
|
||||||
@ -59,13 +53,13 @@ pub enum ParseInternalError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl From<DecimalError> for ParseInternalError {
|
impl From<DecimalError> for ParseInternalError {
|
||||||
fn from(err: DecimalError) -> Self {
|
fn from(_err: DecimalError) -> Self {
|
||||||
ParseInternalError::InvalidDecimal
|
ParseInternalError::InvalidDecimal
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<ParseIntError> for ParseInternalError {
|
impl From<ParseIntError> for ParseInternalError {
|
||||||
fn from(err: ParseIntError) -> Self {
|
fn from(_err: ParseIntError) -> Self {
|
||||||
ParseInternalError::InvalidInteger
|
ParseInternalError::InvalidInteger
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -294,11 +288,6 @@ struct YMD {
|
|||||||
ystridx: Option<usize>,
|
ystridx: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum YMDAppendEither {
|
|
||||||
Number(i32),
|
|
||||||
Stringy(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl YMD {
|
impl YMD {
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
self._ymd.len()
|
self._ymd.len()
|
||||||
@ -388,7 +377,7 @@ impl YMD {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => Err(ParseInternalError::ValueError("Missing label.".to_owned())),
|
None => Ok(()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -613,34 +602,34 @@ impl Parser {
|
|||||||
while i < len_l {
|
while i < len_l {
|
||||||
let value_repr = l[i].clone();
|
let value_repr = l[i].clone();
|
||||||
|
|
||||||
if let Ok(v) = Decimal::from_str(&value_repr) {
|
if let Ok(_v) = Decimal::from_str(&value_repr) {
|
||||||
i = self.parse_numeric_token(&l, i, &self.info, &mut ymd, &mut res, fuzzy)?;
|
i = self.parse_numeric_token(&l, i, &self.info, &mut ymd, &mut res, fuzzy)?;
|
||||||
} else if let Some(value) = self.info.get_weekday(&l[i]) {
|
} else if let Some(value) = self.info.get_weekday(&l[i]) {
|
||||||
res.weekday = Some(value);
|
res.weekday = Some(value);
|
||||||
} else if let Some(value) = self.info.get_month(&l[i]) {
|
} else if let Some(value) = self.info.get_month(&l[i]) {
|
||||||
ymd.append(value as i32, &l[i], Some(YMDLabel::Month));
|
ymd.append(value as i32, &l[i], Some(YMDLabel::Month))?;
|
||||||
|
|
||||||
if i + 1 < len_l {
|
if i + 1 < len_l {
|
||||||
if l[i + 1] == "-" || l[i + 1] == "/" {
|
if l[i + 1] == "-" || l[i + 1] == "/" {
|
||||||
// Jan-01[-99]
|
// Jan-01[-99]
|
||||||
let sep = &l[i + 1];
|
let sep = &l[i + 1];
|
||||||
// TODO: This seems like a very unsafe unwrap
|
// TODO: This seems like a very unsafe unwrap
|
||||||
ymd.append(l[i + 2].parse::<i32>().unwrap(), &l[i + 2], None);
|
ymd.append(l[i + 2].parse::<i32>().unwrap(), &l[i + 2], None)?;
|
||||||
|
|
||||||
if i + 3 < len_l && &l[i + 3] == sep {
|
if i + 3 < len_l && &l[i + 3] == sep {
|
||||||
// Jan-01-99
|
// Jan-01-99
|
||||||
ymd.append(l[i + 4].parse::<i32>().unwrap(), &l[i + 4], None);
|
ymd.append(l[i + 4].parse::<i32>().unwrap(), &l[i + 4], None)?;
|
||||||
i += 2;
|
i += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 2;
|
i += 2;
|
||||||
} else if (i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " "
|
} else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " "
|
||||||
&& self.info.get_pertain(&l[i + 2]))
|
&& self.info.get_pertain(&l[i + 2])
|
||||||
{
|
{
|
||||||
// Jan of 01
|
// Jan of 01
|
||||||
if let Some(value) = l[i + 4].parse::<i32>().ok() {
|
if let Some(value) = l[i + 4].parse::<i32>().ok() {
|
||||||
let year = self.info.convertyear(value, false);
|
let year = self.info.convertyear(value, false);
|
||||||
ymd.append(year, &l[i + 4], Some(YMDLabel::Year));
|
ymd.append(year, &l[i + 4], Some(YMDLabel::Year))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 4;
|
i += 4;
|
||||||
@ -737,7 +726,7 @@ impl Parser {
|
|||||||
if !self.info.validate(&mut res) {
|
if !self.info.validate(&mut res) {
|
||||||
Err(ParseError::InvalidParseResult(res))
|
Err(ParseError::InvalidParseResult(res))
|
||||||
} else if fuzzy_with_tokens {
|
} else if fuzzy_with_tokens {
|
||||||
let skipped_tokens = skipped_idxs.into_iter().map(|i| l[i].clone()).collect();
|
let skipped_tokens = self.recombine_skipped(skipped_idxs, l);
|
||||||
Ok((res, Some(skipped_tokens)))
|
Ok((res, Some(skipped_tokens)))
|
||||||
} else {
|
} else {
|
||||||
Ok((res, None))
|
Ok((res, None))
|
||||||
@ -797,7 +786,7 @@ impl Parser {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// TODO: Change month/day to u32
|
// TODO: Change month/day to u32
|
||||||
let mut d = NaiveDate::from_ymd(
|
let d = NaiveDate::from_ymd(
|
||||||
y,
|
y,
|
||||||
m,
|
m,
|
||||||
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?)
|
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?)
|
||||||
@ -818,7 +807,7 @@ impl Parser {
|
|||||||
|
|
||||||
fn build_tzaware(
|
fn build_tzaware(
|
||||||
&self,
|
&self,
|
||||||
dt: &NaiveDateTime,
|
_dt: &NaiveDateTime,
|
||||||
res: &ParsingResult,
|
res: &ParsingResult,
|
||||||
tzinfos: HashMap<String, i32>,
|
tzinfos: HashMap<String, i32>,
|
||||||
) -> ParseResult<Option<FixedOffset>> {
|
) -> ParseResult<Option<FixedOffset>> {
|
||||||
@ -877,9 +866,9 @@ impl Parser {
|
|||||||
let s = &tokens[idx];
|
let s = &tokens[idx];
|
||||||
|
|
||||||
if ymd.len() == 0 && tokens[idx].find(".") == None {
|
if ymd.len() == 0 && tokens[idx].find(".") == None {
|
||||||
ymd.append(s[0..2].parse::<i32>().unwrap(), &s[0..2], None);
|
ymd.append(s[0..2].parse::<i32>().unwrap(), &s[0..2], None)?;
|
||||||
ymd.append(s[2..4].parse::<i32>().unwrap(), &s[2..4], None);
|
ymd.append(s[2..4].parse::<i32>().unwrap(), &s[2..4], None)?;
|
||||||
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None);
|
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
|
||||||
} else {
|
} else {
|
||||||
// 19990101T235959[.59]
|
// 19990101T235959[.59]
|
||||||
res.hour = s[0..2].parse::<i32>().ok();
|
res.hour = s[0..2].parse::<i32>().ok();
|
||||||
@ -892,9 +881,9 @@ impl Parser {
|
|||||||
} else if vec![8, 12, 14].contains(&len_li) {
|
} else if vec![8, 12, 14].contains(&len_li) {
|
||||||
// YYMMDD
|
// YYMMDD
|
||||||
let s = &tokens[idx];
|
let s = &tokens[idx];
|
||||||
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year));
|
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?;
|
||||||
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None);
|
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
|
||||||
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None);
|
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?;
|
||||||
|
|
||||||
if len_li > 8 {
|
if len_li > 8 {
|
||||||
res.hour = Some(s[8..10].parse::<i32>()?);
|
res.hour = Some(s[8..10].parse::<i32>()?);
|
||||||
@ -936,20 +925,20 @@ impl Parser {
|
|||||||
{
|
{
|
||||||
// TODO: There's got to be a better way of handling the condition above
|
// TODO: There's got to be a better way of handling the condition above
|
||||||
let sep = &tokens[idx + 1];
|
let sep = &tokens[idx + 1];
|
||||||
ymd.append(value_repr.parse::<i32>().unwrap(), &value_repr, None);
|
ymd.append(value_repr.parse::<i32>().unwrap(), &value_repr, None)?;
|
||||||
|
|
||||||
if idx + 2 < len_l && !info.get_jump(&tokens[idx + 2]) {
|
if idx + 2 < len_l && !info.get_jump(&tokens[idx + 2]) {
|
||||||
if let Ok(val) = tokens[idx + 2].parse::<i32>() {
|
if let Ok(val) = tokens[idx + 2].parse::<i32>() {
|
||||||
ymd.append(val, &tokens[idx + 2], None);
|
ymd.append(val, &tokens[idx + 2], None)?;
|
||||||
} else if let Some(val) = info.get_month(&tokens[idx + 2]) {
|
} else if let Some(val) = info.get_month(&tokens[idx + 2]) {
|
||||||
ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month));
|
ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if idx + 3 < len_l && &tokens[idx + 3] == sep {
|
if idx + 3 < len_l && &tokens[idx + 3] == sep {
|
||||||
if let Some(value) = info.get_month(&tokens[idx + 4]) {
|
if let Some(value) = info.get_month(&tokens[idx + 4]) {
|
||||||
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month));
|
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
|
||||||
} else {
|
} else {
|
||||||
ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None);
|
ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
idx += 2;
|
idx += 2;
|
||||||
@ -965,7 +954,7 @@ impl Parser {
|
|||||||
let ampm = info.get_ampm(&tokens[idx + 2]).unwrap();
|
let ampm = info.get_ampm(&tokens[idx + 2]).unwrap();
|
||||||
res.hour = Some(self.adjust_ampm(hour, ampm));
|
res.hour = Some(self.adjust_ampm(hour, ampm));
|
||||||
} else {
|
} else {
|
||||||
ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None);
|
ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None)?;
|
||||||
}
|
}
|
||||||
} else if info.get_ampm(&tokens[idx + 1]).is_some()
|
} else if info.get_ampm(&tokens[idx + 1]).is_some()
|
||||||
&& (*ZERO <= value && value < *TWENTY_FOUR)
|
&& (*ZERO <= value && value < *TWENTY_FOUR)
|
||||||
@ -975,7 +964,7 @@ impl Parser {
|
|||||||
res.hour = Some(self.adjust_ampm(hour, info.get_ampm(&tokens[idx + 1]).unwrap()));
|
res.hour = Some(self.adjust_ampm(hour, info.get_ampm(&tokens[idx + 1]).unwrap()));
|
||||||
idx += 1;
|
idx += 1;
|
||||||
} else if ymd.could_be_day(value.to_i64().unwrap() as i32) {
|
} else if ymd.could_be_day(value.to_i64().unwrap() as i32) {
|
||||||
ymd.append(value.to_i64().unwrap() as i32, &value_repr, None);
|
ymd.append(value.to_i64().unwrap() as i32, &value_repr, None)?;
|
||||||
} else if !fuzzy {
|
} else if !fuzzy {
|
||||||
return Err(ParseInternalError::ValueError("".to_owned()));
|
return Err(ParseInternalError::ValueError("".to_owned()));
|
||||||
}
|
}
|
||||||
@ -1106,6 +1095,26 @@ impl Parser {
|
|||||||
|
|
||||||
(minute, second)
|
(minute, second)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn recombine_skipped(&self, skipped_idxs: Vec<usize>, tokens: Vec<String>) -> Vec<String> {
|
||||||
|
let mut skipped_tokens: Vec<String> = vec![];
|
||||||
|
|
||||||
|
let mut sorted_idxs = skipped_idxs.clone();
|
||||||
|
sorted_idxs.sort();
|
||||||
|
|
||||||
|
for (i, idx) in sorted_idxs.iter().enumerate() {
|
||||||
|
if i > 0 && idx - 1 == skipped_idxs[i - 1] {
|
||||||
|
// UNWRAP: Having an initial value and unconditional push at end guarantees value
|
||||||
|
let mut t = skipped_tokens.pop().unwrap();
|
||||||
|
t.push_str(tokens[idx.clone()].as_ref());
|
||||||
|
skipped_tokens.push(t);
|
||||||
|
} else {
|
||||||
|
skipped_tokens.push(tokens[idx.clone()].to_owned());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
skipped_tokens
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn close_to_integer(value: &Decimal) -> bool {
|
fn close_to_integer(value: &Decimal) -> bool {
|
||||||
|
@ -201,3 +201,15 @@ fn decimal_split(characters: &str, cast_period: bool) -> Vec<String> {
|
|||||||
|
|
||||||
token_stack
|
token_stack
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
use Tokenizer;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_basic() {
|
||||||
|
let tokens: Vec<String> = Tokenizer::new("September of 2003,".to_owned()).collect();
|
||||||
|
assert_eq!(tokens, vec!["September", " ", "of", " ", "2003", ","]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
use std::cmp::max;
|
|
||||||
|
|
||||||
use ParseResult;
|
use ParseResult;
|
||||||
use ParseError;
|
use ParseError;
|
||||||
|
|
||||||
|
@ -53,8 +53,8 @@ fn parse_and_assert(
|
|||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_and_assert_simple(
|
fn parse_and_assert_simple(
|
||||||
@ -62,14 +62,14 @@ fn parse_and_assert_simple(
|
|||||||
s: &str,
|
s: &str,
|
||||||
) {
|
) {
|
||||||
let rs_parsed = dtparse::parse(s).expect(&format!("Unable to parse date in Rust '{}'", s));
|
let rs_parsed = dtparse::parse(s).expect(&format!("Unable to parse date in Rust '{}'", s));
|
||||||
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for {}", s);
|
assert_eq!(pdt.year, rs_parsed.0.year(), "Year mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for {}", s);
|
assert_eq!(pdt.month, rs_parsed.0.month(), "Month mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for {}", s);
|
assert_eq!(pdt.day, rs_parsed.0.day(), "Day mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for {}", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for {}", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_fuzzy_and_assert(
|
fn parse_fuzzy_and_assert(
|
||||||
@ -103,9 +103,9 @@ fn parse_fuzzy_and_assert(
|
|||||||
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
assert_eq!(pdt.hour, rs_parsed.0.hour(), "Hour mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
assert_eq!(pdt.minute, rs_parsed.0.minute(), "Minute mismatch f'or' {}", s);
|
||||||
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
assert_eq!(pdt.second, rs_parsed.0.second(), "Second mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for {}", s);
|
assert_eq!(pdt.micros, rs_parsed.0.timestamp_subsec_micros(), "Microsecond mismatch for '{}'", s);
|
||||||
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for {}", s);
|
assert_eq!(pdt.tzo, rs_parsed.1.map(|u| u.local_minus_utc()), "Timezone Offset mismatch for '{}'", s);
|
||||||
assert_eq!(ptokens, rs_parsed.2, "Fuzzy mismatch for {}", s);
|
assert_eq!(ptokens, rs_parsed.2, "Tokens mismatch for '{}'", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! rs_tzinfo_map {
|
macro_rules! rs_tzinfo_map {
|
||||||
@ -1735,7 +1735,7 @@ fn test_parse_ignoretz7() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_fuzzy0() {
|
fn test_fuzzy_tzinfo0() {
|
||||||
let info = ParserInfo::default();
|
let info = ParserInfo::default();
|
||||||
let pdt = PyDateTime {
|
let pdt = PyDateTime {
|
||||||
year: 2003, month: 9, day: 25,
|
year: 2003, month: 9, day: 25,
|
||||||
@ -1745,3 +1745,16 @@ fn test_fuzzy0() {
|
|||||||
parse_fuzzy_and_assert(pdt, None, info, "Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", None, None, true, false,
|
parse_fuzzy_and_assert(pdt, None, info, "Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", None, None, true, false,
|
||||||
None, false, HashMap::new());
|
None, false, HashMap::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_tokens_tzinfo0() {
|
||||||
|
let info = ParserInfo::default();
|
||||||
|
let pdt = PyDateTime {
|
||||||
|
year: 2003, month: 9, day: 25,
|
||||||
|
hour: 10, minute: 49, second: 41,
|
||||||
|
micros: 0, tzo: Some(-10800)
|
||||||
|
};
|
||||||
|
let tokens = vec!["Today is ".to_owned(), "of ".to_owned(), ", exactly at ".to_owned(), " with timezone ".to_owned(), ".".to_owned()];
|
||||||
|
parse_fuzzy_and_assert(pdt, Some(tokens), info, "Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", None, None, true, true,
|
||||||
|
None, false, HashMap::new());
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user