1
0
mirror of https://github.com/bspeice/dtparse synced 2025-07-03 14:54:57 -04:00

Lots of fixes, but it turns out tokenization is broken

This commit is contained in:
Bradlee Speice
2018-07-03 01:02:27 -04:00
parent 2b90bf6ed7
commit 9135962839
5 changed files with 127 additions and 67 deletions

View File

@ -1,6 +1,3 @@
#![allow(dead_code)]
#![allow(unused)]
#[macro_use]
extern crate lazy_static;
@ -8,7 +5,6 @@ extern crate chrono;
extern crate num_traits;
extern crate rust_decimal;
use chrono::DateTime;
use chrono::Datelike;
use chrono::Duration;
use chrono::FixedOffset;
@ -17,7 +13,6 @@ use chrono::NaiveDate;
use chrono::NaiveDateTime;
use chrono::NaiveTime;
use chrono::Timelike;
use chrono::Utc;
use num_traits::cast::ToPrimitive;
use rust_decimal::Decimal;
use rust_decimal::Error as DecimalError;
@ -33,7 +28,6 @@ mod weekday;
#[cfg(test)]
mod tests;
use tokenize::ParseState;
use tokenize::Tokenizer;
use weekday::day_of_week;
use weekday::DayOfWeek;
@ -59,13 +53,13 @@ pub enum ParseInternalError {
}
impl From<DecimalError> for ParseInternalError {
fn from(err: DecimalError) -> Self {
fn from(_err: DecimalError) -> Self {
ParseInternalError::InvalidDecimal
}
}
impl From<ParseIntError> for ParseInternalError {
fn from(err: ParseIntError) -> Self {
fn from(_err: ParseIntError) -> Self {
ParseInternalError::InvalidInteger
}
}
@ -294,11 +288,6 @@ struct YMD {
ystridx: Option<usize>,
}
enum YMDAppendEither {
Number(i32),
Stringy(String),
}
impl YMD {
fn len(&self) -> usize {
self._ymd.len()
@ -388,7 +377,7 @@ impl YMD {
Ok(())
}
}
None => Err(ParseInternalError::ValueError("Missing label.".to_owned())),
None => Ok(()),
}
}
@ -613,34 +602,34 @@ impl Parser {
while i < len_l {
let value_repr = l[i].clone();
if let Ok(v) = Decimal::from_str(&value_repr) {
if let Ok(_v) = Decimal::from_str(&value_repr) {
i = self.parse_numeric_token(&l, i, &self.info, &mut ymd, &mut res, fuzzy)?;
} else if let Some(value) = self.info.get_weekday(&l[i]) {
res.weekday = Some(value);
} else if let Some(value) = self.info.get_month(&l[i]) {
ymd.append(value as i32, &l[i], Some(YMDLabel::Month));
ymd.append(value as i32, &l[i], Some(YMDLabel::Month))?;
if i + 1 < len_l {
if l[i + 1] == "-" || l[i + 1] == "/" {
// Jan-01[-99]
let sep = &l[i + 1];
// TODO: This seems like a very unsafe unwrap
ymd.append(l[i + 2].parse::<i32>().unwrap(), &l[i + 2], None);
ymd.append(l[i + 2].parse::<i32>().unwrap(), &l[i + 2], None)?;
if i + 3 < len_l && &l[i + 3] == sep {
// Jan-01-99
ymd.append(l[i + 4].parse::<i32>().unwrap(), &l[i + 4], None);
ymd.append(l[i + 4].parse::<i32>().unwrap(), &l[i + 4], None)?;
i += 2;
}
i += 2;
} else if (i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " "
&& self.info.get_pertain(&l[i + 2]))
} else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " "
&& self.info.get_pertain(&l[i + 2])
{
// Jan of 01
if let Some(value) = l[i + 4].parse::<i32>().ok() {
let year = self.info.convertyear(value, false);
ymd.append(year, &l[i + 4], Some(YMDLabel::Year));
ymd.append(year, &l[i + 4], Some(YMDLabel::Year))?;
}
i += 4;
@ -737,7 +726,7 @@ impl Parser {
if !self.info.validate(&mut res) {
Err(ParseError::InvalidParseResult(res))
} else if fuzzy_with_tokens {
let skipped_tokens = skipped_idxs.into_iter().map(|i| l[i].clone()).collect();
let skipped_tokens = self.recombine_skipped(skipped_idxs, l);
Ok((res, Some(skipped_tokens)))
} else {
Ok((res, None))
@ -797,7 +786,7 @@ impl Parser {
};
// TODO: Change month/day to u32
let mut d = NaiveDate::from_ymd(
let d = NaiveDate::from_ymd(
y,
m,
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?)
@ -818,7 +807,7 @@ impl Parser {
fn build_tzaware(
&self,
dt: &NaiveDateTime,
_dt: &NaiveDateTime,
res: &ParsingResult,
tzinfos: HashMap<String, i32>,
) -> ParseResult<Option<FixedOffset>> {
@ -877,9 +866,9 @@ impl Parser {
let s = &tokens[idx];
if ymd.len() == 0 && tokens[idx].find(".") == None {
ymd.append(s[0..2].parse::<i32>().unwrap(), &s[0..2], None);
ymd.append(s[2..4].parse::<i32>().unwrap(), &s[2..4], None);
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None);
ymd.append(s[0..2].parse::<i32>().unwrap(), &s[0..2], None)?;
ymd.append(s[2..4].parse::<i32>().unwrap(), &s[2..4], None)?;
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
} else {
// 19990101T235959[.59]
res.hour = s[0..2].parse::<i32>().ok();
@ -892,9 +881,9 @@ impl Parser {
} else if vec![8, 12, 14].contains(&len_li) {
// YYMMDD
let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year));
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None);
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None);
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?;
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?;
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?;
if len_li > 8 {
res.hour = Some(s[8..10].parse::<i32>()?);
@ -936,20 +925,20 @@ impl Parser {
{
// TODO: There's got to be a better way of handling the condition above
let sep = &tokens[idx + 1];
ymd.append(value_repr.parse::<i32>().unwrap(), &value_repr, None);
ymd.append(value_repr.parse::<i32>().unwrap(), &value_repr, None)?;
if idx + 2 < len_l && !info.get_jump(&tokens[idx + 2]) {
if let Ok(val) = tokens[idx + 2].parse::<i32>() {
ymd.append(val, &tokens[idx + 2], None);
ymd.append(val, &tokens[idx + 2], None)?;
} else if let Some(val) = info.get_month(&tokens[idx + 2]) {
ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month));
ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month))?;
}
if idx + 3 < len_l && &tokens[idx + 3] == sep {
if let Some(value) = info.get_month(&tokens[idx + 4]) {
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month));
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
} else {
ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None);
ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None)?;
}
idx += 2;
@ -965,7 +954,7 @@ impl Parser {
let ampm = info.get_ampm(&tokens[idx + 2]).unwrap();
res.hour = Some(self.adjust_ampm(hour, ampm));
} else {
ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None);
ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None)?;
}
} else if info.get_ampm(&tokens[idx + 1]).is_some()
&& (*ZERO <= value && value < *TWENTY_FOUR)
@ -975,7 +964,7 @@ impl Parser {
res.hour = Some(self.adjust_ampm(hour, info.get_ampm(&tokens[idx + 1]).unwrap()));
idx += 1;
} else if ymd.could_be_day(value.to_i64().unwrap() as i32) {
ymd.append(value.to_i64().unwrap() as i32, &value_repr, None);
ymd.append(value.to_i64().unwrap() as i32, &value_repr, None)?;
} else if !fuzzy {
return Err(ParseInternalError::ValueError("".to_owned()));
}
@ -1106,6 +1095,26 @@ impl Parser {
(minute, second)
}
fn recombine_skipped(&self, skipped_idxs: Vec<usize>, tokens: Vec<String>) -> Vec<String> {
let mut skipped_tokens: Vec<String> = vec![];
let mut sorted_idxs = skipped_idxs.clone();
sorted_idxs.sort();
for (i, idx) in sorted_idxs.iter().enumerate() {
if i > 0 && idx - 1 == skipped_idxs[i - 1] {
// UNWRAP: Having an initial value and unconditional push at end guarantees value
let mut t = skipped_tokens.pop().unwrap();
t.push_str(tokens[idx.clone()].as_ref());
skipped_tokens.push(t);
} else {
skipped_tokens.push(tokens[idx.clone()].to_owned());
}
}
skipped_tokens
}
}
fn close_to_integer(value: &Decimal) -> bool {