Merge pull request #14 from mjmeehan/master

Fix parser crash on bogus data in third field after separator
2026-07-24 04:13:01 -04:00 · 2018-07-19 18:06:53 -04:00
parent fe0a0ea3a7 e1c7c8424a
commit 87ada05bac
6 changed files with 50 additions and 4 deletions
@@ -0,0 +1,5 @@
 target
 libfuzzer
 corpus
 artifacts
@@ -0,0 +1,22 @@
 [package]
 name = "dtparse-fuzz"
 version = "0.0.1"
 authors = ["Automatically generated"]
 publish = false
 [package.metadata]
 cargo-fuzz = true
 [dependencies.dtparse]
 path = ".."
 [dependencies.libfuzzer-sys]
 git = "https://github.com/rust-fuzz/libfuzzer-sys.git"
 # Prevent this from interfering with workspaces
 [workspace]
 members = ["."]
 [[bin]]
 name = "fuzzer_script_1"
 path = "fuzzers/fuzzer_script_1.rs"
@@ -0,0 +1,10 @@
 #![no_main]
 extern crate libfuzzer_sys;
 extern crate dtparse;
 use dtparse::parse;
 #[export_name="rust_fuzzer_test_input"]
 pub extern fn go(data: &[u8]) {
    if let Ok(s) = std::str::from_utf8(data) {
        parse(s);
    }
 }
@@ -952,7 +952,11 @@ impl Parser {
                    if let Some(value) = info.get_month(&tokens[idx + 4]) {
                        ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
                    } else {
-                        ymd.append(tokens[idx + 4].parse::<i32>().unwrap(), &tokens[idx + 4], None)?;
+                        if let Ok(val) = tokens[idx + 4].parse::<i32>() {
                            ymd.append(val, &tokens[idx + 4], None)?;
                        } else {
                            return Err(ParseInternalError::ValueError("Unknown string format".to_owned()));
                        }
                    }
                    idx += 2;
@@ -3,13 +3,18 @@ use std::collections::HashMap;
 use parse;
 use ParseError;
 use ParseInternalError;
 use Parser;
 #[test]
 fn test_fuzz() {
    assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::InvalidMonth));
-
+    // Garbage in the third delimited field
    assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
               Err(ParseError::InternalError(ParseInternalError::ValueError("Unknown string format".to_owned()))));
    // OverflowError: Python int too large to convert to C long
    // assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour));
    let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0);
    let mut p = Parser::default();
    let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, HashMap::new()).unwrap();
@@ -123,7 +123,7 @@ impl Iterator for Tokenizer {
                    if nextchar == '.' || self.isword(nextchar) {
                        // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
                        token.as_mut().unwrap().push(nextchar);
-                    } else if self.isnum(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') {
+                    } else if self.isnum(nextchar) && token.as_ref().unwrap().ends_with('.') {
                        token.as_mut().unwrap().push(nextchar);
                        state = ParseState::NumericDecimal;
                    } else {
@@ -135,7 +135,7 @@ impl Iterator for Tokenizer {
                    if nextchar == '.' || self.isnum(nextchar) {
                        // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
                        token.as_mut().unwrap().push(nextchar);
-                    } else if self.isword(nextchar) && token.as_ref().unwrap().chars().last() == Some('.') {
+                    } else if self.isword(nextchar) && token.as_ref().unwrap().ends_with('.') {
                        token.as_mut().unwrap().push(nextchar);
                        state = ParseState::AlphaDecimal;
                    } else {