From fecba852d12761541e15626bcaa494515d3a46f5 Mon Sep 17 00:00:00 2001 From: Mike Meehan Date: Wed, 18 Jul 2018 20:34:14 -0400 Subject: [PATCH 1/4] Use ends_with in some places --- src/tokenize.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tokenize.rs b/src/tokenize.rs index bb982a0..81cc512 100644 --- a/src/tokenize.rs +++ b/src/tokenize.rs @@ -123,7 +123,7 @@ impl Iterator for Tokenizer { if nextchar == '.' || self.isword(nextchar) { // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token token.as_mut().unwrap().push(nextchar); - } else if self.isnum(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') { + } else if self.isnum(nextchar) && token.as_ref().unwrap().ends_with('.') { token.as_mut().unwrap().push(nextchar); state = ParseState::NumericDecimal; } else { @@ -135,7 +135,7 @@ impl Iterator for Tokenizer { if nextchar == '.' || self.isnum(nextchar) { // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token token.as_mut().unwrap().push(nextchar); - } else if self.isword(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') { + } else if self.isword(nextchar) && token.as_ref().unwrap().ends_with('.') { token.as_mut().unwrap().push(nextchar); state = ParseState::AlphaDecimal; } else { From ac95e9e8c3da79e09f48a10f05d5fd594d9ccae4 Mon Sep 17 00:00:00 2001 From: Mike Meehan Date: Wed, 18 Jul 2018 22:27:29 -0400 Subject: [PATCH 2/4] Add fuzzing, find and fix a parser bug. --- fuzz/.gitignore | 5 +++++ fuzz/Cargo.toml | 22 ++++++++++++++++++++++ fuzz/fuzzers/fuzzer_script_1.rs | 10 ++++++++++ src/lib.rs | 6 +++++- src/tests/fuzzing.rs | 5 ++++- 5 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/fuzzers/fuzzer_script_1.rs diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..dfeb7db --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,5 @@ + +target +libfuzzer +corpus +artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..0350800 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,22 @@ + +[package] +name = "dtparse-fuzz" +version = "0.0.1" +authors = ["Automatically generated"] +publish = false + +[package.metadata] +cargo-fuzz = true + +[dependencies.dtparse] +path = ".." +[dependencies.libfuzzer-sys] +git = "https://github.com/rust-fuzz/libfuzzer-sys.git" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "fuzzer_script_1" +path = "fuzzers/fuzzer_script_1.rs" diff --git a/fuzz/fuzzers/fuzzer_script_1.rs b/fuzz/fuzzers/fuzzer_script_1.rs new file mode 100644 index 0000000..40d7dbe --- /dev/null +++ b/fuzz/fuzzers/fuzzer_script_1.rs @@ -0,0 +1,10 @@ +#![no_main] +extern crate libfuzzer_sys; +extern crate dtparse; +use dtparse::parse; +#[export_name="rust_fuzzer_test_input"] +pub extern fn go(data: &[u8]) { + if let Ok(s) = sd::str::from_utf8(data) { + parse(s); + } +} diff --git a/src/lib.rs b/src/lib.rs index 1b3f71c..680ea7c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -952,7 +952,11 @@ impl Parser { if let Some(value) = info.get_month(&tokens[idx + 4]) { ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?; } else { - ymd.append(tokens[idx + 4].parse::().unwrap(), &tokens[idx + 4], None)?; + if let Ok(val) = tokens[idx + 4].parse::() { + ymd.append(val, &tokens[idx + 4], None)?; + } else { + return Err(ParseInternalError::ValueError("".to_owned())); + } } idx += 2; diff --git a/src/tests/fuzzing.rs b/src/tests/fuzzing.rs index 5f0d842..2508f02 100644 --- a/src/tests/fuzzing.rs +++ b/src/tests/fuzzing.rs @@ -3,13 +3,16 @@ use std::collections::HashMap; use parse; use ParseError; +use ParseInternalError; use Parser; #[test] fn test_fuzz() { assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::InvalidMonth)); - + // Garbage in the third delimited field + assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"), + Err(ParseError::InternalError(ParseInternalError::ValueError("".to_owned())))); let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0); let mut p = Parser::default(); let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, HashMap::new()).unwrap(); From 4acd07297436433da4c9a84339b0ef628db71378 Mon Sep 17 00:00:00 2001 From: Mike Meehan Date: Wed, 18 Jul 2018 22:38:02 -0400 Subject: [PATCH 3/4] Fix typo in the fuzzer script --- fuzz/fuzzers/fuzzer_script_1.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/fuzzers/fuzzer_script_1.rs b/fuzz/fuzzers/fuzzer_script_1.rs index 40d7dbe..92cd04c 100644 --- a/fuzz/fuzzers/fuzzer_script_1.rs +++ b/fuzz/fuzzers/fuzzer_script_1.rs @@ -4,7 +4,7 @@ extern crate dtparse; use dtparse::parse; #[export_name="rust_fuzzer_test_input"] pub extern fn go(data: &[u8]) { - if let Ok(s) = sd::str::from_utf8(data) { + if let Ok(s) = std::str::from_utf8(data) { parse(s); } } From e1c7c8424aee758088680f4f07a4b6ecc8319fda Mon Sep 17 00:00:00 2001 From: Mike Meehan Date: Wed, 18 Jul 2018 23:03:53 -0400 Subject: [PATCH 4/4] Improve error message to match dateutil --- src/lib.rs | 2 +- src/tests/fuzzing.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 680ea7c..6589e89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -955,7 +955,7 @@ impl Parser { if let Ok(val) = tokens[idx + 4].parse::() { ymd.append(val, &tokens[idx + 4], None)?; } else { - return Err(ParseInternalError::ValueError("".to_owned())); + return Err(ParseInternalError::ValueError("Unknown string format".to_owned())); } } diff --git a/src/tests/fuzzing.rs b/src/tests/fuzzing.rs index 2508f02..e204d17 100644 --- a/src/tests/fuzzing.rs +++ b/src/tests/fuzzing.rs @@ -12,7 +12,9 @@ fn test_fuzz() { assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::InvalidMonth)); // Garbage in the third delimited field assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"), - Err(ParseError::InternalError(ParseInternalError::ValueError("".to_owned())))); + Err(ParseError::InternalError(ParseInternalError::ValueError("Unknown string format".to_owned())))); + // OverflowError: Python int too large to convert to C long + // assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour)); let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0); let mut p = Parser::default(); let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, HashMap::new()).unwrap();