1
0
mirror of https://github.com/bspeice/dtparse synced 2024-12-22 04:18:09 -05:00

Merge pull request #14 from mjmeehan/master

Fix parser crash on bogus data in third field after separator
This commit is contained in:
bspeice 2018-07-19 18:06:53 -04:00 committed by GitHub
commit 87ada05bac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 50 additions and 4 deletions

5
fuzz/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
target
libfuzzer
corpus
artifacts

22
fuzz/Cargo.toml Normal file
View File

@ -0,0 +1,22 @@
[package]
name = "dtparse-fuzz"
version = "0.0.1"
authors = ["Automatically generated"]
publish = false
[package.metadata]
cargo-fuzz = true
[dependencies.dtparse]
path = ".."
[dependencies.libfuzzer-sys]
git = "https://github.com/rust-fuzz/libfuzzer-sys.git"
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[[bin]]
name = "fuzzer_script_1"
path = "fuzzers/fuzzer_script_1.rs"

View File

@ -0,0 +1,10 @@
#![no_main]
extern crate libfuzzer_sys;
extern crate dtparse;
use dtparse::parse;
#[export_name="rust_fuzzer_test_input"]
pub extern fn go(data: &[u8]) {
if let Ok(s) = std::str::from_utf8(data) {
parse(s);
}
}

View File

@ -952,7 +952,11 @@ impl Parser {
if let Some(value) = info.get_month(&tokens[idx + 4]) {
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
} else {
ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None)?;
if let Ok(val) = tokens[idx + 4].parse::<i32>() {
ymd.append(val, &tokens[idx + 4], None)?;
} else {
return Err(ParseInternalError::ValueError("Unknown string format".to_owned()));
}
}
idx += 2;

View File

@ -3,13 +3,18 @@ use std::collections::HashMap;
use parse;
use ParseError;
use ParseInternalError;
use Parser;
#[test]
fn test_fuzz() {
assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::InvalidMonth));
// Garbage in the third delimited field
assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
Err(ParseError::InternalError(ParseInternalError::ValueError("Unknown string format".to_owned()))));
// OverflowError: Python int too large to convert to C long
// assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour));
let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0);
let mut p = Parser::default();
let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, HashMap::new()).unwrap();

View File

@ -123,7 +123,7 @@ impl Iterator for Tokenizer {
if nextchar == '.' || self.isword(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
token.as_mut().unwrap().push(nextchar);
} else if self.isnum(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') {
} else if self.isnum(nextchar) && token.as_ref().unwrap().ends_with('.') {
token.as_mut().unwrap().push(nextchar);
state = ParseState::NumericDecimal;
} else {
@ -135,7 +135,7 @@ impl Iterator for Tokenizer {
if nextchar == '.' || self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
token.as_mut().unwrap().push(nextchar);
} else if self.isword(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') {
} else if self.isword(nextchar) && token.as_ref().unwrap().ends_with('.') {
token.as_mut().unwrap().push(nextchar);
state = ParseState::AlphaDecimal;
} else {