1
0
mirror of https://github.com/bspeice/dtparse synced 2025-07-03 14:54:57 -04:00

37 Commits

Author SHA1 Message Date
6a5ec31d8e Release version 1.1.0 2020-06-11 15:42:03 -04:00
23f50fb62b Merge #29
29: Properly handle no date content being found r=bspeice a=bspeice

Fixes #22 

Co-authored-by: Bradlee Speice <bradlee@speice.io>
2020-06-11 19:33:20 +00:00
f1ca602e9f Properly handle no date content being found 2020-06-11 15:00:37 -04:00
899cd88280 Merge #27
27: Add fixes for dateutil/dateutil#822 r=bspeice a=bspeice

Fixes #16 

Co-authored-by: Bradlee Speice <bradlee@speice.io>
2020-06-11 18:18:24 +00:00
a08bb2d9d7 Add fixes for dateutil/dateutil#822 2020-06-11 13:59:07 -04:00
af6c3238c4 Merge #28
28: Disable clippy component for 1.28 r=bspeice a=bspeice

And fix some other issues from a `.travis.yml` file I definitely didn't just copy-paste from a separate project...

Co-authored-by: Bradlee Speice <bradlee@speice.io>
2020-06-11 17:40:24 +00:00
b098f54f8b Convert clippy lints 2020-06-11 13:33:09 -04:00
61022c323e Cargo fmt 2020-06-11 13:11:52 -04:00
4079b3ce2f Fix ENV naming 2020-06-11 13:06:26 -04:00
3e03b188b4 Disable clippy component for 1.28 2020-06-11 13:05:21 -04:00
7147677926 Merge pull request #25 from bspeice/simplify_testing
Simplify testing procedure
2020-06-11 12:26:12 -04:00
22b6a321e6 Remove Appveyor badge from README 2020-06-11 12:25:55 -04:00
9edc2a3102 Simplify testing procedure 2020-06-11 12:23:36 -04:00
245f746c8c Merge pull request #24 from bspeice/panic_fuzzing
Fix #21
2020-06-11 12:23:10 -04:00
5782a573bc Merge pull request #23 from gma2th/master
Implement Error trait for ParseError
2020-06-11 12:13:12 -04:00
e895fbd9f3 Implement Error trait for ParseError 2020-06-11 23:29:37 +08:00
2a2f1e7fbd Fix #21 2020-05-29 14:23:54 -04:00
e9c994a755 Merge pull request #20 from bspeice/timezone
Remove timezone handling
2019-11-30 08:41:11 -05:00
d6fc72459e Mark unused 2019-11-29 18:19:13 -05:00
d7ff381d7f Bugfix 2019-11-29 17:58:40 -05:00
d5e0a5d46a Remove timezone handling
There are too many issues in chrono-tz to make it worth supporting.
2019-11-29 16:49:29 -05:00
9f1b8d4971 Merge pull request #19 from bspeice/tz_fix
Attempt to read timezones from chrono-tz
2019-11-29 15:45:44 -05:00
0f7ac8538c Remove WASM from Travis 2019-11-25 23:11:19 -05:00
b81a8d9541 Use 1.28 as minimum Rust version 2019-11-25 20:57:56 -05:00
030ca4fced Rustfmt 2019-11-13 23:18:37 -05:00
142712900f Attempt to read timezones from chrono-tz 2019-11-13 23:12:47 -05:00
c310cbaa0d Fix an example warning 2018-09-18 23:06:38 -04:00
ef3ea38834 Release version 1.0.3 2018-09-18 23:04:07 -04:00
741afa3451 Remove a last println and use a static default parser
Heap profilers are fun
2018-09-17 23:14:50 -04:00
4d7c5dd995 Now with 100% more WASM!
Version bump and README update
2018-08-14 22:49:24 -04:00
afb7747cdf Fix target name 2018-08-14 22:38:05 -04:00
22e0300275 Test adding WASM support 2018-08-14 21:53:35 -04:00
0ef35527d9 Release 1.0.1 2018-08-11 13:13:12 -04:00
b5fa1d89ef Fix new tests not being date-neutral 2018-08-11 10:38:46 -04:00
246b389ac9 Clippy caught a bug
And now I have test cases for it!
2018-08-10 23:24:02 -04:00
4d48885f4b Add old rust versions to test agains 2018-08-10 22:15:27 -04:00
48705339e6 Add one final CONTRIBUTOR 2018-08-03 23:43:24 -04:00
18 changed files with 3171 additions and 1196 deletions

4
.gitignore vendored
View File

@ -3,4 +3,6 @@
**/*.rs.bk **/*.rs.bk
Cargo.lock Cargo.lock
.vscode .vscode
*.pyc *.pyc
.idea/
*.swp

View File

@ -1,103 +1,40 @@
# Based on the "trust" template v0.1.2
# https://github.com/japaric/trust/tree/v0.1.2
dist: trusty
language: rust language: rust
services: docker
sudo: required
env: jobs:
global:
- CRATE_NAME=dtparse
matrix:
include: include:
# Android - rust: stable
- env: TARGET=aarch64-linux-android DISABLE_TESTS=1 os: linux
- env: TARGET=arm-linux-androideabi DISABLE_TESTS=1 - rust: 1.28.0
- env: TARGET=armv7-linux-androideabi DISABLE_TESTS=1 os: linux
- env: TARGET=i686-linux-android DISABLE_TESTS=1 env: DISABLE_TOOLS=true
- env: TARGET=x86_64-linux-android DISABLE_TESTS=1 - rust: stable
os: osx
- rust: stable-msvc
os: windows
- rust: stable
os: windows
# iOS cache:
- env: TARGET=aarch64-apple-ios DISABLE_TESTS=1 - cargo
os: osx
- env: TARGET=armv7-apple-ios DISABLE_TESTS=1
os: osx
- env: TARGET=armv7s-apple-ios DISABLE_TESTS=1
os: osx
- env: TARGET=i386-apple-ios DISABLE_TESTS=1
os: osx
- env: TARGET=x86_64-apple-ios DISABLE_TESTS=1
os: osx
# Linux before_script:
- env: TARGET=aarch64-unknown-linux-gnu - rustup show
- env: TARGET=arm-unknown-linux-gnueabi # CMake doesn't like the `sh.exe` provided by Git being in PATH
- env: TARGET=armv7-unknown-linux-gnueabihf - if [[ "$TRAVIS_OS_NAME" == "windows" ]]; then rm "C:/Program Files/Git/usr/bin/sh.exe"; fi
- env: TARGET=i686-unknown-linux-gnu - if [[ "$DISABLE_TOOLS" == "" ]]; then rustup component add clippy; rustup component add rustfmt; fi
- env: TARGET=i686-unknown-linux-musl
- env: TARGET=mips-unknown-linux-gnu
- env: TARGET=mips64-unknown-linux-gnuabi64
- env: TARGET=mips64el-unknown-linux-gnuabi64
- env: TARGET=mipsel-unknown-linux-gnu
- env: TARGET=powerpc-unknown-linux-gnu
- env: TARGET=powerpc64-unknown-linux-gnu
- env: TARGET=powerpc64le-unknown-linux-gnu
- env: TARGET=s390x-unknown-linux-gnu DISABLE_TESTS=1
- env: TARGET=x86_64-unknown-linux-gnu
- env: TARGET=x86_64-unknown-linux-musl
# OSX
- env: TARGET=i686-apple-darwin
os: osx
- env: TARGET=x86_64-apple-darwin
os: osx
# *BSD
- env: TARGET=i686-unknown-freebsd DISABLE_TESTS=1
- env: TARGET=x86_64-unknown-freebsd DISABLE_TESTS=1
- env: TARGET=x86_64-unknown-netbsd DISABLE_TESTS=1
# Windows
- env: TARGET=x86_64-pc-windows-gnu
# Nightly and Beta
- env: TARGET=x86_64-unknown-linux-gnu
rust: nightly
- env: TARGET=x86_64-apple-darwin
os: osx
rust: nightly
- env: TARGET=x86_64-unknown-linux-gnu
rust: beta
- env: TARGET=x86_64-apple-darwin
os: osx
rust: beta
before_install:
- set -e
- rustup self update
install:
- sh ci/install.sh
- source ~/.cargo/env || true
script: script:
- bash ci/script.sh - if [[ "$DISABLE_TOOLS" == "" ]]; then cargo clippy --all && cargo fmt --all -- --check; fi
after_script: set +e # For default build, split up compilation and tests so we can track build times
- cargo test --no-run
cache: cargo - cargo test
before_cache: - cargo test --release --no-run
# Travis can't cache files that are not readable by "others" - cargo test --release
- chmod -R a+r $HOME/.cargo
branches: branches:
only: only:
# release tags
- /^v\d+\.\d+\.\d+.*$/
- master - master
- staging
notifications: - trying
email:
on_success: never

34
CHANGELOG.md Normal file
View File

@ -0,0 +1,34 @@
Version 1.0.3 (2018-09-18)
==========================
Misc
----
- Changed the default `parse` function to use a static parser
Version 1.0.2 (2018-08-14)
==========================
Misc
----
- Add tests for WASM
Version 1.0.1 (2018-08-11)
==========================
Bugfixes
--------
- Fixed an issue with "GMT+3" not being handled correctly
Misc
----
- Upgrade `lazy_static` and `rust_decimal` dependencies
Version 1.0.0 (2018-08-03)
==========================
Initial release. Passes all relevant unit tests from Python's
`dateutil` project.

View File

@ -3,4 +3,5 @@ This project benefits from the Rust and open source communities, but most specif
# Contributors: # Contributors:
- [@messense](https://github.com/messense) - [@messense](https://github.com/messense)
- [@mjmeehan](https://github.com/mjmeehan) - [@mjmeehan](https://github.com/mjmeehan)
- [@neosilky](https://github.com/neosilky)

View File

@ -1,6 +1,6 @@
[package] [package]
name = "dtparse" name = "dtparse"
version = "1.0.0" version = "1.1.0"
authors = ["Bradlee Speice <bradlee@speice.io>"] authors = ["Bradlee Speice <bradlee@speice.io>"]
description = "A dateutil-compatible timestamp parser for Rust" description = "A dateutil-compatible timestamp parser for Rust"
repository = "https://github.com/bspeice/dtparse.git" repository = "https://github.com/bspeice/dtparse.git"
@ -10,7 +10,6 @@ license = "Apache-2.0"
[badges] [badges]
travis-ci = { repository = "bspeice/dtparse" } travis-ci = { repository = "bspeice/dtparse" }
appveyor = { repository = "bspeice/dtparse" }
maintenance = { status = "passively-maintained" } maintenance = { status = "passively-maintained" }
[lib] [lib]
@ -18,6 +17,7 @@ name = "dtparse"
[dependencies] [dependencies]
chrono = "0.4" chrono = "0.4"
lazy_static = "1.0" chrono-tz = "0.5"
lazy_static = "1.1"
num-traits = "0.2" num-traits = "0.2"
rust_decimal = "0.9" rust_decimal = "^0.10.1"

View File

@ -1,7 +1,6 @@
# dtparse # dtparse
[![travisci](https://travis-ci.org/bspeice/dtparse.svg?branch=master)](https://travis-ci.org/bspeice/dtparse) [![travisci](https://travis-ci.org/bspeice/dtparse.svg?branch=master)](https://travis-ci.org/bspeice/dtparse)
[![appveyor](https://ci.appveyor.com/api/projects/status/r4de76tg9utfjva1/branch/master?svg=true)](https://ci.appveyor.com/project/bspeice/dtparse/branch/master)
[![crates.io](https://img.shields.io/crates/v/dtparse.svg)](https://crates.io/crates/dtparse) [![crates.io](https://img.shields.io/crates/v/dtparse.svg)](https://crates.io/crates/dtparse)
[![docs.rs](https://docs.rs/dtparse/badge.svg)](https://docs.rs/dtparse/) [![docs.rs](https://docs.rs/dtparse/badge.svg)](https://docs.rs/dtparse/)
@ -65,5 +64,11 @@ assert_eq!(
Further examples can be found in the [examples](examples) directory on international usage. Further examples can be found in the [examples](examples) directory on international usage.
# Usage
`dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
tested against them.
[dateutil]: https://github.com/dateutil/dateutil [dateutil]: https://github.com/dateutil/dateutil
[examples]: https://github.com/bspeice/dtparse/tree/master/examples [examples]: https://github.com/bspeice/dtparse/tree/master/examples

View File

@ -1,121 +0,0 @@
# Appveyor configuration template for Rust using rustup for Rust installation
# https://github.com/starkat99/appveyor-rust
## Operating System (VM environment) ##
# Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets.
os: Visual Studio 2017
## Build Matrix ##
# This configuration will setup a build for each channel & target combination (12 windows
# combinations in all).
#
# There are 3 channels: stable, beta, and nightly.
#
# Alternatively, the full version may be specified for the channel to build using that specific
# version (e.g. channel: 1.5.0)
#
# The values for target are the set of windows Rust build targets. Each value is of the form
#
# ARCH-pc-windows-TOOLCHAIN
#
# Where ARCH is the target architecture, either x86_64 or i686, and TOOLCHAIN is the linker
# toolchain to use, either msvc or gnu. See https://www.rust-lang.org/downloads.html#win-foot for
# a description of the toolchain differences.
# See https://github.com/rust-lang-nursery/rustup.rs/#toolchain-specification for description of
# toolchains and host triples.
#
# Comment out channel/target combos you do not wish to build in CI.
#
# You may use the `cargoflags` and `RUSTFLAGS` variables to set additional flags for cargo commands
# and rustc, respectively. For instance, you can uncomment the cargoflags lines in the nightly
# channels to enable unstable features when building for nightly. Or you could add additional
# matrix entries to test different combinations of features.
environment:
matrix:
### MSVC Toolchains ###
# Stable 64-bit MSVC
- channel: stable
target: x86_64-pc-windows-msvc
# Stable 32-bit MSVC
- channel: stable
target: i686-pc-windows-msvc
# Beta 64-bit MSVC
- channel: beta
target: x86_64-pc-windows-msvc
# Beta 32-bit MSVC
- channel: beta
target: i686-pc-windows-msvc
# Nightly 64-bit MSVC
- channel: nightly
target: x86_64-pc-windows-msvc
#cargoflags: --features "unstable"
# Nightly 32-bit MSVC
- channel: nightly
target: i686-pc-windows-msvc
#cargoflags: --features "unstable"
### GNU Toolchains ###
# Stable 64-bit GNU
- channel: stable
target: x86_64-pc-windows-gnu
# Stable 32-bit GNU
- channel: stable
target: i686-pc-windows-gnu
# Beta 64-bit GNU
- channel: beta
target: x86_64-pc-windows-gnu
# Beta 32-bit GNU
- channel: beta
target: i686-pc-windows-gnu
# Nightly 64-bit GNU
- channel: nightly
target: x86_64-pc-windows-gnu
#cargoflags: --features "unstable"
# Nightly 32-bit GNU
- channel: nightly
target: i686-pc-windows-gnu
#cargoflags: --features "unstable"
### Allowed failures ###
# See Appveyor documentation for specific details. In short, place any channel or targets you wish
# to allow build failures on (usually nightly at least is a wise choice). This will prevent a build
# or test failure in the matching channels/targets from failing the entire build.
matrix:
allow_failures:
- channel: nightly
# If you only care about stable channel build failures, uncomment the following line:
#- channel: beta
## Install Script ##
# This is the most important part of the Appveyor configuration. This installs the version of Rust
# specified by the 'channel' and 'target' environment variables from the build matrix. This uses
# rustup to install Rust.
#
# For simple configurations, instead of using the build matrix, you can simply set the
# default-toolchain and default-host manually here.
install:
- appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
- rustup-init -yv --default-toolchain %channel% --default-host %target%
- set PATH=%PATH%;%USERPROFILE%\.cargo\bin
- rustc -vV
- cargo -vV
## Build Script ##
# 'cargo test' takes care of building for us, so disable Appveyor's build stage. This prevents
# the "directory does not contain a project or solution file" error.
build: false
# Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs
#directly or perform other testing commands. Rust will automatically be placed in the PATH
# environment variable.
test_script:
- cargo test --verbose %cargoflags%

4
bors.toml Normal file
View File

@ -0,0 +1,4 @@
status = [
"continuous-integration/travis-ci/push",
]
delete_merged_branches = true

12
build_pycompat.py Normal file → Executable file
View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
from dateutil.parser import parse from dateutil.parser import parse
from dateutil.tz import tzutc
from datetime import datetime from datetime import datetime
tests = { tests = {
@ -48,7 +50,9 @@ tests = {
'test_parse_offset': [ 'test_parse_offset': [
'Thu, 25 Sep 2003 10:49:41 -0300', '2003-09-25T10:49:41.5-03:00', 'Thu, 25 Sep 2003 10:49:41 -0300', '2003-09-25T10:49:41.5-03:00',
'2003-09-25T10:49:41-03:00', '20030925T104941.5-0300', '2003-09-25T10:49:41-03:00', '20030925T104941.5-0300',
'20030925T104941-0300' '20030925T104941-0300',
# dtparse-specific
"2018-08-10 10:00:00 UTC+3", "2018-08-10 03:36:47 PM GMT-4", "2018-08-10 04:15:00 AM Z-02:00"
], ],
'test_parse_dayfirst': [ 'test_parse_dayfirst': [
'10-09-2003', '10.09.2003', '10/09/2003', '10 09 2003', '10-09-2003', '10.09.2003', '10/09/2003', '10 09 2003',
@ -77,7 +81,7 @@ tests = {
'Thu Sep 25 10:36:28 BRST 2003', '1996.07.10 AD at 15:08:56 PDT', 'Thu Sep 25 10:36:28 BRST 2003', '1996.07.10 AD at 15:08:56 PDT',
'Tuesday, April 12, 1952 AD 3:30:42pm PST', 'Tuesday, April 12, 1952 AD 3:30:42pm PST',
'November 5, 1994, 8:15:30 am EST', '1994-11-05T08:15:30-05:00', 'November 5, 1994, 8:15:30 am EST', '1994-11-05T08:15:30-05:00',
'1994-11-05T08:15:30Z', '1976-07-04T00:01:02Z', '1994-11-05T08:15:30Z', '1976-07-04T00:01:02Z', '1986-07-05T08:15:30z',
'Tue Apr 4 00:22:12 PDT 1995' 'Tue Apr 4 00:22:12 PDT 1995'
], ],
'test_fuzzy_tzinfo': [ 'test_fuzzy_tzinfo': [
@ -229,7 +233,7 @@ fn parse_and_assert(
tzinfos: &HashMap<String, i32>, tzinfos: &HashMap<String, i32>,
) { ) {
let mut parser = Parser::new(info); let parser = Parser::new(info);
let rs_parsed = parser.parse( let rs_parsed = parser.parse(
s, s,
dayfirst, dayfirst,
@ -279,7 +283,7 @@ fn parse_fuzzy_and_assert(
tzinfos: &HashMap<String, i32>, tzinfos: &HashMap<String, i32>,
) { ) {
let mut parser = Parser::new(info); let parser = Parser::new(info);
let rs_parsed = parser.parse( let rs_parsed = parser.parse(
s, s,
dayfirst, dayfirst,

1
build_pycompat_tokenizer.py Normal file → Executable file
View File

@ -1,3 +1,4 @@
#!/usr/bin/python3
from dateutil.parser import _timelex from dateutil.parser import _timelex
from build_pycompat import tests from build_pycompat import tests

View File

@ -14,7 +14,27 @@ main() {
cross test --target $TARGET --release cross test --target $TARGET --release
} }
main_web() {
CARGO_WEB_RELEASE="$(curl -L -s -H 'Accept: application/json' https://github.com/koute/cargo-web/releases/latest)"
CARGO_WEB_VERSION="$(echo $CARGO_WEB_RELEASE | sed -e 's/.*"tag_name":"\([^"]*\)".*/\1/')"
CARGO_WEB_URL="https://github.com/koute/cargo-web/releases/download/$CARGO_WEB_VERSION/cargo-web-x86_64-unknown-linux-gnu.gz"
echo "Downloading cargo-web from: $CARGO_WEB_URL"
curl -L "$CARGO_WEB_URL" | gzip -d > cargo-web
chmod +x cargo-web
mkdir -p ~/.cargo/bin
mv cargo-web ~/.cargo/bin
cargo web build --target $TARGET
cargo web test --target $TARGET --release
}
# we don't run the "test phase" when doing deploys # we don't run the "test phase" when doing deploys
if [ -z $TRAVIS_TAG ]; then if [ -z $TRAVIS_TAG ]; then
main if [ -z "$USE_CARGO_WEB" ]; then
fi main
else
main_web
fi
fi

View File

@ -8,7 +8,6 @@ use dtparse::ParserInfo;
use std::collections::HashMap; use std::collections::HashMap;
fn main() { fn main() {
// In this example, we'll just swap the default "months" parameter // In this example, we'll just swap the default "months" parameter
// with a version in Russian. Lovingly taken from: // with a version in Russian. Lovingly taken from:
// https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244 // https://github.com/dateutil/dateutil/blob/99f5770e7c63aa049b28abe465d7f1cc25b63fd2/dateutil/test/test_parser.py#L244
@ -26,14 +25,24 @@ fn main() {
vec!["сен", "Сентябрь"], vec!["сен", "Сентябрь"],
vec!["окт", "Октябрь"], vec!["окт", "Октябрь"],
vec!["ноя", "Ноябрь"], vec!["ноя", "Ноябрь"],
vec!["дек", "Декабрь"] vec!["дек", "Декабрь"],
]); ]);
let mut p = Parser::new(info); let p = Parser::new(info);
assert_eq!( assert_eq!(
p.parse("10 Сентябрь 2015 10:20", None, None, false, false, None, false, &HashMap::new()) p.parse(
.unwrap().0, "10 Сентябрь 2015 10:20",
None,
None,
false,
false,
None,
false,
&HashMap::new()
)
.unwrap()
.0,
NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0) NaiveDate::from_ymd(2015, 9, 10).and_hms(10, 20, 0)
); );
} }

View File

@ -1,26 +1,27 @@
#![deny(missing_docs)] #![deny(missing_docs)]
#![cfg_attr(test, allow(unknown_lints))]
#![cfg_attr(test, deny(warnings))] #![cfg_attr(test, deny(warnings))]
//! # dtparse //! # dtparse
//! The fully-featured "even I couldn't understand that" time parser. //! The fully-featured "even I couldn't understand that" time parser.
//! Designed to take in strings and give back sensible dates and times. //! Designed to take in strings and give back sensible dates and times.
//! //!
//! dtparse has its foundations in the [`dateutil`](dateutil) library for //! dtparse has its foundations in the [`dateutil`](dateutil) library for
//! Python, which excels at taking "interesting" strings and trying to make //! Python, which excels at taking "interesting" strings and trying to make
//! sense of the dates and times they contain. A couple of quick examples //! sense of the dates and times they contain. A couple of quick examples
//! from the test cases should give some context: //! from the test cases should give some context:
//! //!
//! ```rust //! ```rust,ignore (tests-dont-compile-on-old-rust)
//! # extern crate chrono; //! # extern crate chrono;
//! # extern crate dtparse; //! # extern crate dtparse;
//! use chrono::prelude::*; //! use chrono::prelude::*;
//! use dtparse::parse; //! use dtparse::parse;
//! //!
//! assert_eq!( //! assert_eq!(
//! parse("2008.12.30"), //! parse("2008.12.30"),
//! Ok((NaiveDate::from_ymd(2008, 12, 30).and_hms(0, 0, 0), None)) //! Ok((NaiveDate::from_ymd(2008, 12, 30).and_hms(0, 0, 0), None))
//! ); //! );
//! //!
//! // It can even handle timezones! //! // It can even handle timezones!
//! assert_eq!( //! assert_eq!(
//! parse("January 4, 2024; 18:30:04 +02:00"), //! parse("January 4, 2024; 18:30:04 +02:00"),
@ -30,17 +31,17 @@
//! )) //! ))
//! ); //! );
//! ``` //! ```
//! //!
//! And we can even handle fuzzy strings where dates/times aren't the //! And we can even handle fuzzy strings where dates/times aren't the
//! only content if we dig into the implementation a bit! //! only content if we dig into the implementation a bit!
//! //!
//! ```rust //! ```rust,ignore (tests-dont-compile-on-old-rust)
//! # extern crate chrono; //! # extern crate chrono;
//! # extern crate dtparse; //! # extern crate dtparse;
//! use chrono::prelude::*; //! use chrono::prelude::*;
//! use dtparse::Parser; //! use dtparse::Parser;
//! # use std::collections::HashMap; //! # use std::collections::HashMap;
//! //!
//! let mut p = Parser::default(); //! let mut p = Parser::default();
//! assert_eq!( //! assert_eq!(
//! p.parse( //! p.parse(
@ -58,15 +59,22 @@
//! )) //! ))
//! ); //! );
//! ``` //! ```
//! //!
//! Further examples can be found in the `examples` directory on international usage. //! Further examples can be found in the `examples` directory on international usage.
//! //!
//! # Usage
//!
//! `dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
//! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
//! tested against them.
//!
//! [dateutil]: https://github.com/dateutil/dateutil //! [dateutil]: https://github.com/dateutil/dateutil
#[macro_use] #[macro_use]
extern crate lazy_static; extern crate lazy_static;
extern crate chrono; extern crate chrono;
extern crate chrono_tz;
extern crate num_traits; extern crate num_traits;
extern crate rust_decimal; extern crate rust_decimal;
@ -81,8 +89,10 @@ use chrono::Timelike;
use num_traits::cast::ToPrimitive; use num_traits::cast::ToPrimitive;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use rust_decimal::Error as DecimalError; use rust_decimal::Error as DecimalError;
use std::collections::HashMap;
use std::cmp::min; use std::cmp::min;
use std::collections::HashMap;
use std::error::Error;
use std::fmt;
use std::num::ParseIntError; use std::num::ParseIntError;
use std::str::FromStr; use std::str::FromStr;
use std::vec::Vec; use std::vec::Vec;
@ -102,6 +112,7 @@ lazy_static! {
static ref ONE: Decimal = Decimal::new(1, 0); static ref ONE: Decimal = Decimal::new(1, 0);
static ref TWENTY_FOUR: Decimal = Decimal::new(24, 0); static ref TWENTY_FOUR: Decimal = Decimal::new(24, 0);
static ref SIXTY: Decimal = Decimal::new(60, 0); static ref SIXTY: Decimal = Decimal::new(60, 0);
static ref DEFAULT_PARSER: Parser = Parser::default();
} }
impl From<DecimalError> for ParseError { impl From<DecimalError> for ParseError {
@ -136,8 +147,18 @@ pub enum ParseError {
/// Parser unable to make sense of year/month/day parameters in the time string; /// Parser unable to make sense of year/month/day parameters in the time string;
/// please report to maintainer as the timestring likely exposes a bug in implementation /// please report to maintainer as the timestring likely exposes a bug in implementation
YearMonthDayError(&'static str), YearMonthDayError(&'static str),
/// Parser unable to find any date/time-related content in the supplied string
NoDate,
} }
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self)
}
}
impl Error for ParseError {}
type ParseResult<I> = Result<I, ParseError>; type ParseResult<I> = Result<I, ParseError>;
pub(crate) fn tokenize(parse_string: &str) -> Vec<String> { pub(crate) fn tokenize(parse_string: &str) -> Vec<String> {
@ -151,12 +172,12 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
let mut m = HashMap::new(); let mut m = HashMap::new();
if vec.len() == 1 { if vec.len() == 1 {
for (i, val) in vec.get(0).unwrap().into_iter().enumerate() { for (i, val) in vec.get(0).unwrap().iter().enumerate() {
m.insert(val.to_lowercase(), i); m.insert(val.to_lowercase(), i);
} }
} else { } else {
for (i, val_vec) in vec.into_iter().enumerate() { for (i, val_vec) in vec.iter().enumerate() {
for val in val_vec.into_iter() { for val in val_vec {
m.insert(val.to_lowercase(), i); m.insert(val.to_lowercase(), i);
} }
} }
@ -166,7 +187,7 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
} }
/// Container for specific tokens to be recognized during parsing. /// Container for specific tokens to be recognized during parsing.
/// ///
/// - `jump`: Values that indicate the end of a token for parsing and can be ignored /// - `jump`: Values that indicate the end of a token for parsing and can be ignored
/// - `weekday`: Names of the days of the week /// - `weekday`: Names of the days of the week
/// - `months`: Names of the months /// - `months`: Names of the months
@ -179,7 +200,7 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
/// - `yearfirst`: Upon encountering an ambiguous date, treat the first value as the year /// - `yearfirst`: Upon encountering an ambiguous date, treat the first value as the year
/// - `year`: The current year /// - `year`: The current year
/// - `century`: The first year in the current century /// - `century`: The first year in the current century
/// ///
/// Please note that if both `dayfirst` and `yearfirst` are true, years take precedence /// Please note that if both `dayfirst` and `yearfirst` are true, years take precedence
/// and will be parsed as "YDM" /// and will be parsed as "YDM"
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -220,12 +241,10 @@ impl Default for ParserInfo {
let century = year / 100 * 100; let century = year / 100 * 100;
ParserInfo { ParserInfo {
jump: parse_info(vec![ jump: parse_info(vec![vec![
vec![ " ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st",
" ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "nd", "rd", "th",
"st", "nd", "rd", "th", ]]),
],
]),
weekday: parse_info(vec![ weekday: parse_info(vec![
vec!["Mon", "Monday"], vec!["Mon", "Monday"],
vec!["Tue", "Tues", "Tuesday"], vec!["Tue", "Tues", "Tuesday"],
@ -260,8 +279,8 @@ impl Default for ParserInfo {
tzoffset: parse_info(vec![vec![]]), tzoffset: parse_info(vec![vec![]]),
dayfirst: false, dayfirst: false,
yearfirst: false, yearfirst: false,
year: year, year,
century: century, century,
} }
} }
} }
@ -272,7 +291,7 @@ impl ParserInfo {
} }
fn weekday_index(&self, name: &str) -> Option<usize> { fn weekday_index(&self, name: &str) -> Option<usize> {
self.weekday.get(&name.to_lowercase()).map(|i| *i) self.weekday.get(&name.to_lowercase()).cloned()
} }
fn month_index(&self, name: &str) -> Option<usize> { fn month_index(&self, name: &str) -> Option<usize> {
@ -280,7 +299,7 @@ impl ParserInfo {
} }
fn hms_index(&self, name: &str) -> Option<usize> { fn hms_index(&self, name: &str) -> Option<usize> {
self.hms.get(&name.to_lowercase()).map(|i| *i) self.hms.get(&name.to_lowercase()).cloned()
} }
fn ampm_index(&self, name: &str) -> Option<bool> { fn ampm_index(&self, name: &str) -> Option<bool> {
@ -330,10 +349,13 @@ impl ParserInfo {
res.year = Some(self.convertyear(y, res.century_specified)) res.year = Some(self.convertyear(y, res.century_specified))
}; };
if res.tzoffset == Some(0) && res.tzname.is_none() || res.tzname == Some("Z".to_owned()) { if (res.tzoffset == Some(0) && res.tzname.is_none())
|| (res.tzname == Some("Z".to_owned()) || res.tzname == Some("z".to_owned()))
{
res.tzname = Some("UTC".to_owned()); res.tzname = Some("UTC".to_owned());
res.tzoffset = Some(0); res.tzoffset = Some(0);
} else if res.tzoffset != Some(0) && res.tzname.is_some() } else if res.tzoffset != Some(0)
&& res.tzname.is_some()
&& self.utczone_index(res.tzname.as_ref().unwrap()) && self.utczone_index(res.tzname.as_ref().unwrap())
{ {
res.tzoffset = Some(0); res.tzoffset = Some(0);
@ -350,16 +372,16 @@ fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
}; };
match month { match month {
2 => if leap_year { 2 => {
Ok(29) if leap_year {
} else { Ok(29)
Ok(28) } else {
}, Ok(28)
}
}
1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31), 1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31),
4 | 6 | 9 | 11 => Ok(30), 4 | 6 | 9 | 11 => Ok(30),
_ => { _ => Err(ParseError::ImpossibleTimestamp("Invalid month")),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
}
} }
} }
@ -413,9 +435,7 @@ impl YMD {
Some(YMDLabel::Month) => { Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month")) return Err(ParseError::ImpossibleTimestamp("Invalid month"))
} }
Some(YMDLabel::Day) => { Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
} }
} }
@ -427,9 +447,7 @@ impl YMD {
Some(YMDLabel::Month) => { Some(YMDLabel::Month) => {
return Err(ParseError::ImpossibleTimestamp("Invalid month")) return Err(ParseError::ImpossibleTimestamp("Invalid month"))
} }
Some(YMDLabel::Day) => { Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
return Err(ParseError::ImpossibleTimestamp("Invalid day"))
}
} }
} }
@ -477,7 +495,7 @@ impl YMD {
YMDLabel::Day YMDLabel::Day
}; };
let strids_vals: Vec<usize> = strids.values().map(|u| u.clone()).collect(); let strids_vals: Vec<usize> = strids.values().cloned().collect();
let missing_val = if !strids_vals.contains(&0) { let missing_val = if !strids_vals.contains(&0) {
0 0
} else if !strids_vals.contains(&1) { } else if !strids_vals.contains(&1) {
@ -490,22 +508,19 @@ impl YMD {
} }
if self._ymd.len() != strids.len() { if self._ymd.len() != strids.len() {
return Err(ParseError::YearMonthDayError("Tried to resolve year, month, and day without enough information")); return Err(ParseError::YearMonthDayError(
"Tried to resolve year, month, and day without enough information",
));
} }
Ok(( Ok((
strids strids.get(&YMDLabel::Year).map(|i| self._ymd[*i]),
.get(&YMDLabel::Year) strids.get(&YMDLabel::Month).map(|i| self._ymd[*i]),
.map(|i| self._ymd[*i]), strids.get(&YMDLabel::Day).map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Month)
.map(|i| self._ymd[*i]),
strids
.get(&YMDLabel::Day)
.map(|i| self._ymd[*i]),
)) ))
} }
#[allow(clippy::needless_return)]
fn resolve_ymd( fn resolve_ymd(
&mut self, &mut self,
yearfirst: bool, yearfirst: bool,
@ -514,28 +529,24 @@ impl YMD {
let len_ymd = self._ymd.len(); let len_ymd = self._ymd.len();
let mut strids: HashMap<YMDLabel, usize> = HashMap::new(); let mut strids: HashMap<YMDLabel, usize> = HashMap::new();
self.ystridx self.ystridx.map(|u| strids.insert(YMDLabel::Year, u));
.map(|u| strids.insert(YMDLabel::Year, u.clone())); self.mstridx.map(|u| strids.insert(YMDLabel::Month, u));
self.mstridx self.dstridx.map(|u| strids.insert(YMDLabel::Day, u));
.map(|u| strids.insert(YMDLabel::Month, u.clone()));
self.dstridx
.map(|u| strids.insert(YMDLabel::Day, u.clone()));
// TODO: More Rustiomatic way of doing this? // TODO: More Rustiomatic way of doing this?
if len_ymd == strids.len() && strids.len() > 0 if len_ymd == strids.len() && !strids.is_empty() || (len_ymd == 3 && strids.len() == 2) {
|| (len_ymd == 3 && strids.len() == 2)
{
return self.resolve_from_stridxs(&mut strids); return self.resolve_from_stridxs(&mut strids);
}; };
// Received year, month, day, and ??? // Received year, month, day, and ???
if len_ymd > 3 { if len_ymd > 3 {
return Err(ParseError::YearMonthDayError("Received extra tokens in resolving year, month, and day")); return Err(ParseError::YearMonthDayError(
"Received extra tokens in resolving year, month, and day",
));
} }
match (len_ymd, self.mstridx) { match (len_ymd, self.mstridx) {
(1, Some(val)) | (1, Some(val)) | (2, Some(val)) => {
(2, Some(val)) => {
let other = if len_ymd == 1 { let other = if len_ymd == 1 {
self._ymd[0] self._ymd[0]
} else { } else {
@ -545,7 +556,7 @@ impl YMD {
return Ok((Some(other), Some(self._ymd[val]), None)); return Ok((Some(other), Some(self._ymd[val]), None));
} }
return Ok((None, Some(self._ymd[val]), Some(other))); return Ok((None, Some(self._ymd[val]), Some(other)));
}, }
(2, None) => { (2, None) => {
if self._ymd[0] > 31 { if self._ymd[0] > 31 {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None)); return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None));
@ -557,28 +568,29 @@ impl YMD {
return Ok((None, Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((None, Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((None, Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((None, Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(3, Some(0)) => { (3, Some(0)) => {
if self._ymd[1] > 31 { if self._ymd[1] > 31 {
return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2]))); return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(3, Some(1)) => { (3, Some(1)) => {
if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) { if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) {
return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2]))); return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
}, }
(3, Some(2)) => { (3, Some(2)) => {
// It was in the original docs, so: WTF!? // It was in the original docs, so: WTF!?
if self._ymd[1] > 31 { if self._ymd[1] > 31 {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1]))); return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
}, }
(3, None) => { (3, None) => {
if self._ymd[0] > 31 || self.ystridx == Some(0) if self._ymd[0] > 31
|| self.ystridx == Some(0)
|| (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31) || (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31)
{ {
if dayfirst && self._ymd[2] <= 12 { if dayfirst && self._ymd[2] <= 12 {
@ -589,8 +601,10 @@ impl YMD {
return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0]))); return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
} }
return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1]))); return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
}, }
(_, _) => { return Ok((None, None, None)); }, (_, _) => {
return Ok((None, None, None));
}
} }
} }
} }
@ -612,6 +626,31 @@ struct ParsingResult {
any_unused_tokens: Vec<String>, any_unused_tokens: Vec<String>,
} }
macro_rules! option_len {
($o:expr) => {{
if $o.is_some() {
1
} else {
0
}
}};
}
impl ParsingResult {
fn len(&self) -> usize {
option_len!(self.year)
+ option_len!(self.month)
+ option_len!(self.day)
+ option_len!(self.weekday)
+ option_len!(self.hour)
+ option_len!(self.minute)
+ option_len!(self.second)
+ option_len!(self.microsecond)
+ option_len!(self.tzname)
+ option_len!(self.ampm)
}
}
/// Parser is responsible for doing the actual work of understanding a time string. /// Parser is responsible for doing the actual work of understanding a time string.
/// The root level `parse` function is responsible for constructing a default `Parser` /// The root level `parse` function is responsible for constructing a default `Parser`
/// and triggering its behavior. /// and triggering its behavior.
@ -622,7 +661,7 @@ pub struct Parser {
impl Parser { impl Parser {
/// Create a new `Parser` instance using the provided `ParserInfo`. /// Create a new `Parser` instance using the provided `ParserInfo`.
/// ///
/// This method allows you to set up a parser to handle different /// This method allows you to set up a parser to handle different
/// names for days of the week, months, etc., enabling customization /// names for days of the week, months, etc., enabling customization
/// for different languages or extra values. /// for different languages or extra values.
@ -633,35 +672,36 @@ impl Parser {
/// Main method to trigger parsing of a string using the previously-provided /// Main method to trigger parsing of a string using the previously-provided
/// parser information. Returns a naive timestamp along with timezone and /// parser information. Returns a naive timestamp along with timezone and
/// unused tokens if available. /// unused tokens if available.
/// ///
/// `dayfirst` and `yearfirst` force parser behavior in the event of ambiguous /// `dayfirst` and `yearfirst` force parser behavior in the event of ambiguous
/// dates. Consider the following scenarios where we parse the string '01.02.03' /// dates. Consider the following scenarios where we parse the string '01.02.03'
/// ///
/// - `dayfirst=Some(true)`, `yearfirst=None`: Results in `February 2, 2003` /// - `dayfirst=Some(true)`, `yearfirst=None`: Results in `February 2, 2003`
/// - `dayfirst=None`, `yearfirst=Some(true)`: Results in `February 3, 2001` /// - `dayfirst=None`, `yearfirst=Some(true)`: Results in `February 3, 2001`
/// - `dayfirst=Some(true)`, `yearfirst=Some(true)`: Results in `March 2, 2001` /// - `dayfirst=Some(true)`, `yearfirst=Some(true)`: Results in `March 2, 2001`
/// ///
/// `fuzzy` enables fuzzy parsing mode, allowing the parser to skip tokens if /// `fuzzy` enables fuzzy parsing mode, allowing the parser to skip tokens if
/// they are unrecognized. However, the unused tokens will not be returned /// they are unrecognized. However, the unused tokens will not be returned
/// unless `fuzzy_with_tokens` is set as `true`. /// unless `fuzzy_with_tokens` is set as `true`.
/// ///
/// `default` is the timestamp used to infer missing values, and is midnight /// `default` is the timestamp used to infer missing values, and is midnight
/// of the current day by default. For example, when parsing the text '2003', /// of the current day by default. For example, when parsing the text '2003',
/// we will use the current month and day as a default value, leading to a /// we will use the current month and day as a default value, leading to a
/// result of 'March 3, 2003' if the function was run using a default of /// result of 'March 3, 2003' if the function was run using a default of
/// March 3rd. /// March 3rd.
/// ///
/// `ignoretz` forces the parser to ignore timezone information even if it /// `ignoretz` forces the parser to ignore timezone information even if it
/// is recognized in the time string /// is recognized in the time string
/// ///
/// `tzinfos` is a map of timezone names to the offset seconds. For example, /// `tzinfos` is a map of timezone names to the offset seconds. For example,
/// the parser would ignore the 'EST' part of the string in '10 AM EST' /// the parser would ignore the 'EST' part of the string in '10 AM EST'
/// unless you added a `tzinfos` map of `{"EST": "14400"}`. Please note that /// unless you added a `tzinfos` map of `{"EST": "14400"}`. Please note that
/// timezone name support (i.e. "EST", "BRST") is not available by default /// timezone name support (i.e. "EST", "BRST") is not available by default
/// at the moment, they must be added through `tzinfos` at the moment in /// at the moment, they must be added through `tzinfos` at the moment in
/// order to be resolved. /// order to be resolved.
#[allow(clippy::too_many_arguments)]
pub fn parse( pub fn parse(
&mut self, &self,
timestr: &str, timestr: &str,
dayfirst: Option<bool>, dayfirst: Option<bool>,
yearfirst: Option<bool>, yearfirst: Option<bool>,
@ -678,6 +718,10 @@ impl Parser {
let (res, tokens) = let (res, tokens) =
self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?; self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?;
if res.len() == 0 {
return Err(ParseError::NoDate);
}
let naive = self.build_naive(&res, &default_ts)?; let naive = self.build_naive(&res, &default_ts)?;
if !ignoretz { if !ignoretz {
@ -688,8 +732,9 @@ impl Parser {
} }
} }
#[allow(clippy::cognitive_complexity)] // Imitating Python API is priority
fn parse_with_tokens( fn parse_with_tokens(
&mut self, &self,
timestr: &str, timestr: &str,
dayfirst: Option<bool>, dayfirst: Option<bool>,
yearfirst: Option<bool>, yearfirst: Option<bool>,
@ -734,20 +779,22 @@ impl Parser {
// Jan-01[-99] // Jan-01[-99]
let sep = &l[i + 1]; let sep = &l[i + 1];
// TODO: This seems like a very unsafe unwrap // TODO: This seems like a very unsafe unwrap
ymd.append(l[i + 2].parse::<i32>().unwrap(), &l[i + 2], None)?; ymd.append(l[i + 2].parse::<i32>()?, &l[i + 2], None)?;
if i + 3 < len_l && &l[i + 3] == sep { if i + 3 < len_l && &l[i + 3] == sep {
// Jan-01-99 // Jan-01-99
ymd.append(l[i + 4].parse::<i32>().unwrap(), &l[i + 4], None)?; ymd.append(l[i + 4].parse::<i32>()?, &l[i + 4], None)?;
i += 2; i += 2;
} }
i += 2; i += 2;
} else if i + 4 < len_l && l[i + 1] == l[i + 3] && l[i + 3] == " " } else if i + 4 < len_l
&& l[i + 1] == l[i + 3]
&& l[i + 3] == " "
&& self.info.pertain_index(&l[i + 2]) && self.info.pertain_index(&l[i + 2])
{ {
// Jan of 01 // Jan of 01
if let Some(value) = l[i + 4].parse::<i32>().ok() { if let Ok(value) = l[i + 4].parse::<i32>() {
let year = self.info.convertyear(value, false); let year = self.info.convertyear(value, false);
ymd.append(year, &l[i + 4], Some(YMDLabel::Year))?; ymd.append(year, &l[i + 4], Some(YMDLabel::Year))?;
} }
@ -764,7 +811,7 @@ impl Parser {
} else if fuzzy { } else if fuzzy {
skipped_idxs.push(i); skipped_idxs.push(i);
} }
} else if self.could_be_tzname(res.hour, res.tzname.clone(), res.tzoffset, &l[i]) { } else if self.could_be_tzname(res.hour, &res.tzname, res.tzoffset, &l[i]) {
res.tzname = Some(l[i].clone()); res.tzname = Some(l[i].clone());
let tzname = res.tzname.clone().unwrap(); let tzname = res.tzname.clone().unwrap();
@ -779,10 +826,9 @@ impl Parser {
let item = if l[i + 1] == "+" { let item = if l[i + 1] == "+" {
"-".to_owned() "-".to_owned()
} else { } else {
"-".to_owned() "+".to_owned()
}; };
l.remove(i + 1); l[i + 1] = item;
l.insert(i + 1, item);
res.tzoffset = None; res.tzoffset = None;
@ -800,16 +846,17 @@ impl Parser {
// TODO: check that l[i + 1] is integer? // TODO: check that l[i + 1] is integer?
if len_li == 4 { if len_li == 4 {
// -0300 // -0300
hour_offset = Some(l[i + 1][..2].parse::<i32>().unwrap()); hour_offset = Some(l[i + 1][..2].parse::<i32>()?);
min_offset = Some(l[i + 1][2..4].parse::<i32>().unwrap()); min_offset = Some(l[i + 1][2..4].parse::<i32>()?);
} else if i + 2 < len_l && l[i + 2] == ":" { } else if i + 2 < len_l && l[i + 2] == ":" {
// -03:00 // -03:00
hour_offset = Some(l[i + 1].parse::<i32>().unwrap()); hour_offset = Some(l[i + 1].parse::<i32>()?);
min_offset = Some(l[i + 3].parse::<i32>().unwrap()); min_offset = Some(l[i + 3].parse::<i32>()?);
i += 2; i += 2;
} else if len_li <= 2 { } else if len_li <= 2 {
// -[0]3 // -[0]3
hour_offset = Some(l[i + 1][..2].parse::<i32>().unwrap()); let range_len = min(l[i + 1].len(), 2);
hour_offset = Some(l[i + 1][..range_len].parse::<i32>()?);
min_offset = Some(0); min_offset = Some(0);
} }
@ -817,9 +864,12 @@ impl Parser {
Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60)); Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));
let tzname = res.tzname.clone(); let tzname = res.tzname.clone();
if i + 5 < len_l && self.info.jump_index(&l[i + 2]) && l[i + 3] == "(" if i + 5 < len_l
&& l[i + 5] == ")" && 3 <= l[i + 4].len() && self.info.jump_index(&l[i + 2])
&& self.could_be_tzname(res.hour, tzname, None, &l[i + 4]) && l[i + 3] == "("
&& l[i + 5] == ")"
&& 3 <= l[i + 4].len()
&& self.could_be_tzname(res.hour, &tzname, None, &l[i + 4])
{ {
// (GMT) // (GMT)
res.tzname = Some(l[i + 4].clone()); res.tzname = Some(l[i + 4].clone());
@ -856,23 +906,24 @@ impl Parser {
fn could_be_tzname( fn could_be_tzname(
&self, &self,
hour: Option<i32>, hour: Option<i32>,
tzname: Option<String>, tzname: &Option<String>,
tzoffset: Option<i32>, tzoffset: Option<i32>,
token: &str, token: &str,
) -> bool { ) -> bool {
let all_ascii_upper = token let all_ascii_upper = token
.chars() .chars()
.all(|c| 65u8 as char <= c && c <= 90u8 as char); .all(|c| 65u8 as char <= c && c <= 90u8 as char);
return hour.is_some() && tzname.is_none() && tzoffset.is_none() && token.len() <= 5
&& all_ascii_upper; hour.is_some()
&& tzname.is_none()
&& tzoffset.is_none()
&& token.len() <= 5
&& (all_ascii_upper || self.info.utczone.contains_key(token))
} }
#[allow(clippy::unnecessary_unwrap)]
fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> { fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> {
let mut val_is_ampm = true; let mut val_is_ampm = !(fuzzy && ampm.is_some());
if fuzzy && ampm.is_some() {
val_is_ampm = false;
}
if hour.is_none() { if hour.is_none() {
if fuzzy { if fuzzy {
@ -891,9 +942,13 @@ impl Parser {
Ok(val_is_ampm) Ok(val_is_ampm)
} }
fn build_naive(&self, res: &ParsingResult, default: &NaiveDateTime) -> ParseResult<NaiveDateTime> { fn build_naive(
let y = res.year.unwrap_or(default.year()); &self,
let m = res.month.unwrap_or(default.month() as i32) as u32; res: &ParsingResult,
default: &NaiveDateTime,
) -> ParseResult<NaiveDateTime> {
let y = res.year.unwrap_or_else(|| default.year());
let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
let d_offset = if res.weekday.is_some() && res.day.is_none() { let d_offset = if res.weekday.is_some() && res.day.is_none() {
// TODO: Unwrap not justified // TODO: Unwrap not justified
@ -902,7 +957,7 @@ impl Parser {
// UNWRAP: We've already check res.weekday() is some // UNWRAP: We've already check res.weekday() is some
let actual_weekday = (res.weekday.unwrap() + 1) % 7; let actual_weekday = (res.weekday.unwrap() + 1) % 7;
let other = DayOfWeek::from_numeral(actual_weekday as u32); let other = DayOfWeek::from_numeral(actual_weekday as u32);
Duration::days(dow.difference(other) as i64) Duration::days(i64::from(dow.difference(&other)))
} else { } else {
Duration::days(0) Duration::days(0)
}; };
@ -911,7 +966,10 @@ impl Parser {
let d = NaiveDate::from_ymd( let d = NaiveDate::from_ymd(
y, y,
m, m,
min(res.day.unwrap_or(default.day() as i32) as u32, days_in_month(y, m as i32)?) min(
res.day.unwrap_or(default.day() as i32) as u32,
days_in_month(y, m as i32)?,
),
); );
let d = d + d_offset; let d = d + d_offset;
@ -919,21 +977,23 @@ impl Parser {
let hour = res.hour.unwrap_or(default.hour() as i32) as u32; let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
let minute = res.minute.unwrap_or(default.minute() as i32) as u32; let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
let second = res.second.unwrap_or(default.second() as i32) as u32; let second = res.second.unwrap_or(default.second() as i32) as u32;
let microsecond = res.microsecond let microsecond = res
.microsecond
.unwrap_or(default.timestamp_subsec_micros() as i32) as u32; .unwrap_or(default.timestamp_subsec_micros() as i32) as u32;
let t = NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| { let t =
if hour >= 24 { NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
ParseError::ImpossibleTimestamp("Invalid hour") if hour >= 24 {
} else if minute >= 60 { ParseError::ImpossibleTimestamp("Invalid hour")
ParseError::ImpossibleTimestamp("Invalid minute") } else if minute >= 60 {
} else if second >= 60 { ParseError::ImpossibleTimestamp("Invalid minute")
ParseError::ImpossibleTimestamp("Invalid second") } else if second >= 60 {
} else if microsecond >= 2_000_000 { ParseError::ImpossibleTimestamp("Invalid second")
ParseError::ImpossibleTimestamp("Invalid microsecond") } else if microsecond >= 2_000_000 {
} else { ParseError::ImpossibleTimestamp("Invalid microsecond")
unreachable!(); } else {
} unreachable!();
})?; }
})?;
Ok(NaiveDateTime::new(d, t)) Ok(NaiveDateTime::new(d, t))
} }
@ -944,30 +1004,31 @@ impl Parser {
res: &ParsingResult, res: &ParsingResult,
tzinfos: &HashMap<String, i32>, tzinfos: &HashMap<String, i32>,
) -> ParseResult<Option<FixedOffset>> { ) -> ParseResult<Option<FixedOffset>> {
// TODO: Actual timezone support
if let Some(offset) = res.tzoffset { if let Some(offset) = res.tzoffset {
Ok(Some(FixedOffset::east(offset))) Ok(Some(FixedOffset::east(offset)))
} else if res.tzoffset == None } else if res.tzoffset == None
&& (res.tzname == Some(" ".to_owned()) || res.tzname == Some(".".to_owned()) && (res.tzname == Some(" ".to_owned())
|| res.tzname == Some("-".to_owned()) || res.tzname == None) || res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned())
|| res.tzname == None)
{ {
Ok(None) Ok(None)
} else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) { } else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
Ok(Some(FixedOffset::east( Ok(Some(FixedOffset::east(
tzinfos.get(res.tzname.as_ref().unwrap()).unwrap().clone(), *tzinfos.get(res.tzname.as_ref().unwrap()).unwrap(),
))) )))
} else if res.tzname.is_some() { } else if let Some(tzname) = res.tzname.as_ref() {
// TODO: Dateutil issues a warning/deprecation notice here. Should we force the issue? println!("tzname {} identified but not understood.", tzname);
println!("tzname {} identified but not understood. Ignoring for the time being, but behavior is subject to change.", res.tzname.as_ref().unwrap());
Ok(None) Ok(None)
} else { } else {
Err(ParseError::TimezoneUnsupported) Err(ParseError::TimezoneUnsupported)
} }
} }
#[allow(clippy::unnecessary_unwrap)]
fn parse_numeric_token( fn parse_numeric_token(
&self, &self,
tokens: &Vec<String>, tokens: &[String],
idx: usize, idx: usize,
info: &ParserInfo, info: &ParserInfo,
ymd: &mut YMD, ymd: &mut YMD,
@ -983,7 +1044,9 @@ impl Parser {
// TODO: I miss the `x in y` syntax // TODO: I miss the `x in y` syntax
// TODO: Decompose this logic a bit // TODO: Decompose this logic a bit
if ymd.len() == 3 && (len_li == 2 || len_li == 4) && res.hour.is_none() if ymd.len() == 3
&& (len_li == 2 || len_li == 4)
&& res.hour.is_none()
&& (idx + 1 >= len_l && (idx + 1 >= len_l
|| (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none())) || (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none()))
{ {
@ -994,14 +1057,14 @@ impl Parser {
if len_li == 4 { if len_li == 4 {
res.minute = Some(s[2..4].parse::<i32>()?) res.minute = Some(s[2..4].parse::<i32>()?)
} }
} else if len_li == 6 || (len_li > 6 && tokens[idx].find(".") == Some(6)) { } else if len_li == 6 || (len_li > 6 && tokens[idx].find('.') == Some(6)) {
// YYMMDD or HHMMSS[.ss] // YYMMDD or HHMMSS[.ss]
let s = &tokens[idx]; let s = &tokens[idx];
if ymd.len() == 0 && tokens[idx].find(".") == None { if ymd.len() == 0 && tokens[idx].find('.') == None {
ymd.append(s[0..2].parse::<i32>().unwrap(), &s[0..2], None)?; ymd.append(s[0..2].parse::<i32>()?, &s[0..2], None)?;
ymd.append(s[2..4].parse::<i32>().unwrap(), &s[2..4], None)?; ymd.append(s[2..4].parse::<i32>()?, &s[2..4], None)?;
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?; ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?;
} else { } else {
// 19990101T235959[.59] // 19990101T235959[.59]
res.hour = s[0..2].parse::<i32>().ok(); res.hour = s[0..2].parse::<i32>().ok();
@ -1014,9 +1077,9 @@ impl Parser {
} else if vec![8, 12, 14].contains(&len_li) { } else if vec![8, 12, 14].contains(&len_li) {
// YYMMDD // YYMMDD
let s = &tokens[idx]; let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>().unwrap(), &s[..4], Some(YMDLabel::Year))?; ymd.append(s[..4].parse::<i32>()?, &s[..4], Some(YMDLabel::Year))?;
ymd.append(s[4..6].parse::<i32>().unwrap(), &s[4..6], None)?; ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?;
ymd.append(s[6..8].parse::<i32>().unwrap(), &s[6..8], None)?; ymd.append(s[6..8].parse::<i32>()?, &s[6..8], None)?;
if len_li > 8 { if len_li > 8 {
res.hour = Some(s[8..10].parse::<i32>()?); res.hour = Some(s[8..10].parse::<i32>()?);
@ -1058,7 +1121,7 @@ impl Parser {
{ {
// TODO: There's got to be a better way of handling the condition above // TODO: There's got to be a better way of handling the condition above
let sep = &tokens[idx + 1]; let sep = &tokens[idx + 1];
ymd.append(value_repr.parse::<i32>().unwrap(), &value_repr, None)?; ymd.append(value_repr.parse::<i32>()?, &value_repr, None)?;
if idx + 2 < len_l && !info.jump_index(&tokens[idx + 2]) { if idx + 2 < len_l && !info.jump_index(&tokens[idx + 2]) {
if let Ok(val) = tokens[idx + 2].parse::<i32>() { if let Ok(val) = tokens[idx + 2].parse::<i32>() {
@ -1070,12 +1133,10 @@ impl Parser {
if idx + 3 < len_l && &tokens[idx + 3] == sep { if idx + 3 < len_l && &tokens[idx + 3] == sep {
if let Some(value) = info.month_index(&tokens[idx + 4]) { if let Some(value) = info.month_index(&tokens[idx + 4]) {
ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?; ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
} else if let Ok(val) = tokens[idx + 4].parse::<i32>() {
ymd.append(val, &tokens[idx + 4], None)?;
} else { } else {
if let Ok(val) = tokens[idx + 4].parse::<i32>() { return Err(ParseError::UnrecognizedFormat);
ymd.append(val, &tokens[idx + 4], None)?;
} else {
return Err(ParseError::UnrecognizedFormat);
}
} }
idx += 2; idx += 2;
@ -1123,8 +1184,8 @@ impl Parser {
} }
fn parsems(&self, seconds_str: &str) -> ParseResult<(i32, i32)> { fn parsems(&self, seconds_str: &str) -> ParseResult<(i32, i32)> {
if seconds_str.contains(".") { if seconds_str.contains('.') {
let split: Vec<&str> = seconds_str.split(".").collect(); let split: Vec<&str> = seconds_str.split('.').collect();
let (i, f): (&str, &str) = (split[0], split[1]); let (i, f): (&str, &str) = (split[0], split[1]);
let i_parse = i.parse::<i32>()?; let i_parse = i.parse::<i32>()?;
@ -1138,7 +1199,7 @@ impl Parser {
fn find_hms_index( fn find_hms_index(
&self, &self,
idx: usize, idx: usize,
tokens: &Vec<String>, tokens: &[String],
info: &ParserInfo, info: &ParserInfo,
allow_jump: bool, allow_jump: bool,
) -> Option<usize> { ) -> Option<usize> {
@ -1154,7 +1215,7 @@ impl Parser {
len_l - 2 len_l - 2
} else if idx > 1 { } else if idx > 1 {
idx - 2 idx - 2
} else if len_l == 0{ } else if len_l == 0 {
panic!("Attempting to find_hms_index() wih no tokens."); panic!("Attempting to find_hms_index() wih no tokens.");
} else { } else {
0 0
@ -1162,13 +1223,18 @@ impl Parser {
if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() { if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() {
hms_idx = Some(idx + 1) hms_idx = Some(idx + 1)
} else if allow_jump && idx + 2 < len_l && tokens[idx + 1] == " " } else if allow_jump
&& idx + 2 < len_l
&& tokens[idx + 1] == " "
&& info.hms_index(&tokens[idx + 2]).is_some() && info.hms_index(&tokens[idx + 2]).is_some()
{ {
hms_idx = Some(idx + 2) hms_idx = Some(idx + 2)
} else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() { } else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() {
hms_idx = Some(idx - 1) hms_idx = Some(idx - 1)
} else if len_l > 0 && idx > 0 && idx == len_l - 1 && tokens[idx - 1] == " " } else if len_l > 0
&& idx > 0
&& idx == len_l - 1
&& tokens[idx - 1] == " "
&& info.hms_index(&tokens[idx_minus_two]).is_some() && info.hms_index(&tokens[idx_minus_two]).is_some()
{ {
hms_idx = Some(idx - 2) hms_idx = Some(idx - 2)
@ -1177,10 +1243,11 @@ impl Parser {
hms_idx hms_idx
} }
#[allow(clippy::unnecessary_unwrap)]
fn parse_hms( fn parse_hms(
&self, &self,
idx: usize, idx: usize,
tokens: &Vec<String>, tokens: &[String],
info: &ParserInfo, info: &ParserInfo,
hms_index: Option<usize>, hms_index: Option<usize>,
) -> (usize, Option<usize>) { ) -> (usize, Option<usize>) {
@ -1246,10 +1313,10 @@ impl Parser {
if i > 0 && idx - 1 == skipped_idxs[i - 1] { if i > 0 && idx - 1 == skipped_idxs[i - 1] {
// UNWRAP: Having an initial value and unconditional push at end guarantees value // UNWRAP: Having an initial value and unconditional push at end guarantees value
let mut t = skipped_tokens.pop().unwrap(); let mut t = skipped_tokens.pop().unwrap();
t.push_str(tokens[idx.clone()].as_ref()); t.push_str(tokens[*idx].as_ref());
skipped_tokens.push(t); skipped_tokens.push(t);
} else { } else {
skipped_tokens.push(tokens[idx.clone()].to_owned()); skipped_tokens.push(tokens[*idx].to_owned());
} }
} }
@ -1272,11 +1339,11 @@ fn ljust(s: &str, chars: usize, replace: char) -> String {
/// Main entry point for using `dtparse`. The parse function is responsible for /// Main entry point for using `dtparse`. The parse function is responsible for
/// taking in a string representing some time value, and turning it into /// taking in a string representing some time value, and turning it into
/// a timestamp with optional timezone information if it can be identified. /// a timestamp with optional timezone information if it can be identified.
/// ///
/// The default implementation assumes English values for names of months, /// The default implementation assumes English values for names of months,
/// days of the week, etc. It is equivalent to Python's `dateutil.parser.parse()` /// days of the week, etc. It is equivalent to Python's `dateutil.parser.parse()`
pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> { pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {
let res = Parser::default().parse( let res = DEFAULT_PARSER.parse(
timestr, timestr,
None, None,
None, None,

View File

@ -7,18 +7,44 @@ use Parser;
#[test] #[test]
fn test_fuzz() { fn test_fuzz() {
assert_eq!(
assert_eq!(parse("\x2D\x38\x31\x39\x34\x38\x34"), Err(ParseError::ImpossibleTimestamp("Invalid month"))); parse("\x2D\x38\x31\x39\x34\x38\x34"),
Err(ParseError::ImpossibleTimestamp("Invalid month"))
);
// Garbage in the third delimited field // Garbage in the third delimited field
assert_eq!(parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"), assert_eq!(
Err(ParseError::UnrecognizedFormat)); parse("2..\x00\x000d\x00+\x010d\x01\x00\x00\x00+"),
// OverflowError: Python int too large to convert to C long Err(ParseError::UnrecognizedFormat)
// assert_eq!(parse("8888884444444888444444444881"), Err(ParseError::AmPmWithoutHour)); );
let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0);
let mut p = Parser::default();
let res = p.parse("\x0D\x31", None, None, false, false, Some(&default), false, &HashMap::new()).unwrap();
assert_eq!(res.0, default);
assert_eq!(parse("\x2D\x2D\x32\x31\x38\x6D"), Err(ParseError::ImpossibleTimestamp("Invalid minute"))); let default = NaiveDate::from_ymd(2016, 6, 29).and_hms(0, 0, 0);
let p = Parser::default();
let res = p.parse(
"\x0D\x31",
None,
None,
false,
false,
Some(&default),
false,
&HashMap::new(),
);
assert_eq!(res, Err(ParseError::NoDate));
assert_eq!(
parse("\x2D\x2D\x32\x31\x38\x6D"),
Err(ParseError::ImpossibleTimestamp("Invalid minute"))
);
}
#[test]
fn large_int() {
let parse_result = parse("1412409095009.jpg");
assert!(parse_result.is_err());
}
#[test]
fn empty_string() {
assert_eq!(parse(""), Err(ParseError::NoDate))
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,3 @@
//! This code has been generated by running the `build_pycompat_tokenizer.py` script //! This code has been generated by running the `build_pycompat_tokenizer.py` script
//! in the repository root. Please do not edit it, as your edits will be destroyed //! in the repository root. Please do not edit it, as your edits will be destroyed
//! upon re-running code generation. //! upon re-running code generation.
@ -12,7 +11,9 @@ fn tokenize_assert(test_str: &str, comparison: Vec<&str>) {
#[test] #[test]
fn test_tokenize0() { fn test_tokenize0() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28",
];
tokenize_assert("Thu Sep 25 10:36:28", comp); tokenize_assert("Thu Sep 25 10:36:28", comp);
} }
@ -294,7 +295,9 @@ fn test_tokenize46() {
#[test] #[test]
fn test_tokenize47() { fn test_tokenize47() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 2003", comp);
} }
@ -306,7 +309,9 @@ fn test_tokenize48() {
#[test] #[test]
fn test_tokenize49() { fn test_tokenize49() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41",
];
tokenize_assert("2003-09-25T10:49:41", comp); tokenize_assert("2003-09-25T10:49:41", comp);
} }
@ -354,7 +359,9 @@ fn test_tokenize56() {
#[test] #[test]
fn test_tokenize57() { fn test_tokenize57() {
let comp = vec!["2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502"]; let comp = vec![
"2003", "-", "09", "-", "25", " ", "10", ":", "49", ":", "41.502",
];
tokenize_assert("2003-09-25 10:49:41,502", comp); tokenize_assert("2003-09-25 10:49:41,502", comp);
} }
@ -510,7 +517,10 @@ fn test_tokenize82() {
#[test] #[test]
fn test_tokenize83() { fn test_tokenize83() {
let comp = vec![" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":", "01", ":", "02", " ", " ", " ", "am", " ", " "]; let comp = vec![
" ", " ", "July", " ", " ", " ", "4", " ", ",", " ", " ", "1976", " ", " ", " ", "12", ":",
"01", ":", "02", " ", " ", " ", "am", " ", " ",
];
tokenize_assert(" July 4 , 1976 12:01:02 am ", comp); tokenize_assert(" July 4 , 1976 12:01:02 am ", comp);
} }
@ -522,7 +532,9 @@ fn test_tokenize84() {
#[test] #[test]
fn test_tokenize85() { fn test_tokenize85() {
let comp = vec!["1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM"]; let comp = vec![
"1996", ".", "July", ".", "10", " ", "AD", " ", "12", ":", "08", " ", "PM",
];
tokenize_assert("1996.July.10 AD 12:08 PM", comp); tokenize_assert("1996.July.10 AD 12:08 PM", comp);
} }
@ -558,25 +570,33 @@ fn test_tokenize90() {
#[test] #[test]
fn test_tokenize91() { fn test_tokenize91() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp); tokenize_assert("0:01:02 on July 4, 1976", comp);
} }
#[test] #[test]
fn test_tokenize92() { fn test_tokenize92() {
let comp = vec!["0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976"]; let comp = vec![
"0", ":", "01", ":", "02", " ", "on", " ", "July", " ", "4", ",", " ", "1976",
];
tokenize_assert("0:01:02 on July 4, 1976", comp); tokenize_assert("0:01:02 on July 4, 1976", comp);
} }
#[test] #[test]
fn test_tokenize93() { fn test_tokenize93() {
let comp = vec!["July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am"]; let comp = vec![
"July", " ", "4", ",", " ", "1976", " ", "12", ":", "01", ":", "02", " ", "am",
];
tokenize_assert("July 4, 1976 12:01:02 am", comp); tokenize_assert("July 4, 1976 12:01:02 am", comp);
} }
#[test] #[test]
fn test_tokenize94() { fn test_tokenize94() {
let comp = vec!["Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995"]; let comp = vec![
"Mon", " ", "Jan", " ", " ", "2", " ", "04", ":", "24", ":", "27", " ", "1995",
];
tokenize_assert("Mon Jan 2 04:24:27 1995", comp); tokenize_assert("Mon Jan 2 04:24:27 1995", comp);
} }
@ -588,7 +608,9 @@ fn test_tokenize95() {
#[test] #[test]
fn test_tokenize96() { fn test_tokenize96() {
let comp = vec!["Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578"]; let comp = vec![
"Jan", " ", "1", " ", "1999", " ", "11", ":", "23", ":", "34.578",
];
tokenize_assert("Jan 1 1999 11:23:34.578", comp); tokenize_assert("Jan 1 1999 11:23:34.578", comp);
} }
@ -618,13 +640,17 @@ fn test_tokenize100() {
#[test] #[test]
fn test_tokenize101() { fn test_tokenize101() {
let comp = vec!["0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; let comp = vec![
"0099", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0099-01-01T00:00:00", comp); tokenize_assert("0099-01-01T00:00:00", comp);
} }
#[test] #[test]
fn test_tokenize102() { fn test_tokenize102() {
let comp = vec!["0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00"]; let comp = vec![
"0031", "-", "01", "-", "01", "T", "00", ":", "00", ":", "00",
];
tokenize_assert("0031-01-01T00:00:00", comp); tokenize_assert("0031-01-01T00:00:00", comp);
} }
@ -666,31 +692,42 @@ fn test_tokenize108() {
#[test] #[test]
fn test_tokenize109() { fn test_tokenize109() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
} }
#[test] #[test]
fn test_tokenize110() { fn test_tokenize110() {
let comp = vec!["2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu"]; let comp = vec![
"2003", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "25", " ", "Sep", " ", "Thu",
];
tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp); tokenize_assert("2003 10:36:28 BRST 25 Sep Thu", comp);
} }
#[test] #[test]
fn test_tokenize111() { fn test_tokenize111() {
let comp = vec!["Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-", "0300"]; let comp = vec![
"Thu", ",", " ", "25", " ", "Sep", " ", "2003", " ", "10", ":", "49", ":", "41", " ", "-",
"0300",
];
tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp); tokenize_assert("Thu, 25 Sep 2003 10:49:41 -0300", comp);
} }
#[test] #[test]
fn test_tokenize112() { fn test_tokenize112() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41.5", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41.5-03:00", comp); tokenize_assert("2003-09-25T10:49:41.5-03:00", comp);
} }
#[test] #[test]
fn test_tokenize113() { fn test_tokenize113() {
let comp = vec!["2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00"]; let comp = vec![
"2003", "-", "09", "-", "25", "T", "10", ":", "49", ":", "41", "-", "03", ":", "00",
];
tokenize_assert("2003-09-25T10:49:41-03:00", comp); tokenize_assert("2003-09-25T10:49:41-03:00", comp);
} }
@ -708,198 +745,346 @@ fn test_tokenize115() {
#[test] #[test]
fn test_tokenize116() { fn test_tokenize116() {
let comp = vec![
"2018", "-", "08", "-", "10", " ", "10", ":", "00", ":", "00", " ", "UTC", "+", "3",
];
tokenize_assert("2018-08-10 10:00:00 UTC+3", comp);
}
#[test]
fn test_tokenize117() {
let comp = vec![
"2018", "-", "08", "-", "10", " ", "03", ":", "36", ":", "47", " ", "PM", " ", "GMT", "-",
"4",
];
tokenize_assert("2018-08-10 03:36:47 PM GMT-4", comp);
}
#[test]
fn test_tokenize118() {
let comp = vec![
"2018", "-", "08", "-", "10", " ", "04", ":", "15", ":", "00", " ", "AM", " ", "Z", "-",
"02", ":", "00",
];
tokenize_assert("2018-08-10 04:15:00 AM Z-02:00", comp);
}
#[test]
fn test_tokenize119() {
let comp = vec!["10", "-", "09", "-", "2003"]; let comp = vec!["10", "-", "09", "-", "2003"];
tokenize_assert("10-09-2003", comp); tokenize_assert("10-09-2003", comp);
} }
#[test] #[test]
fn test_tokenize117() { fn test_tokenize120() {
let comp = vec!["10", ".", "09", ".", "2003"]; let comp = vec!["10", ".", "09", ".", "2003"];
tokenize_assert("10.09.2003", comp); tokenize_assert("10.09.2003", comp);
} }
#[test] #[test]
fn test_tokenize118() { fn test_tokenize121() {
let comp = vec!["10", "/", "09", "/", "2003"]; let comp = vec!["10", "/", "09", "/", "2003"];
tokenize_assert("10/09/2003", comp); tokenize_assert("10/09/2003", comp);
} }
#[test] #[test]
fn test_tokenize119() { fn test_tokenize122() {
let comp = vec!["10", " ", "09", " ", "2003"]; let comp = vec!["10", " ", "09", " ", "2003"];
tokenize_assert("10 09 2003", comp); tokenize_assert("10 09 2003", comp);
} }
#[test] #[test]
fn test_tokenize120() { fn test_tokenize123() {
let comp = vec!["090107"]; let comp = vec!["090107"];
tokenize_assert("090107", comp); tokenize_assert("090107", comp);
} }
#[test] #[test]
fn test_tokenize121() { fn test_tokenize124() {
let comp = vec!["2015", " ", "09", " ", "25"]; let comp = vec!["2015", " ", "09", " ", "25"];
tokenize_assert("2015 09 25", comp); tokenize_assert("2015 09 25", comp);
} }
#[test] #[test]
fn test_tokenize122() { fn test_tokenize125() {
let comp = vec!["10", "-", "09", "-", "03"]; let comp = vec!["10", "-", "09", "-", "03"];
tokenize_assert("10-09-03", comp); tokenize_assert("10-09-03", comp);
} }
#[test] #[test]
fn test_tokenize123() { fn test_tokenize126() {
let comp = vec!["10", ".", "09", ".", "03"]; let comp = vec!["10", ".", "09", ".", "03"];
tokenize_assert("10.09.03", comp); tokenize_assert("10.09.03", comp);
} }
#[test] #[test]
fn test_tokenize124() { fn test_tokenize127() {
let comp = vec!["10", "/", "09", "/", "03"]; let comp = vec!["10", "/", "09", "/", "03"];
tokenize_assert("10/09/03", comp); tokenize_assert("10/09/03", comp);
} }
#[test] #[test]
fn test_tokenize125() { fn test_tokenize128() {
let comp = vec!["10", " ", "09", " ", "03"]; let comp = vec!["10", " ", "09", " ", "03"];
tokenize_assert("10 09 03", comp); tokenize_assert("10 09 03", comp);
} }
#[test]
fn test_tokenize126() {
let comp = vec!["090107"];
tokenize_assert("090107", comp);
}
#[test]
fn test_tokenize127() {
let comp = vec!["2015", " ", "09", " ", "25"];
tokenize_assert("2015 09 25", comp);
}
#[test]
fn test_tokenize128() {
let comp = vec!["090107"];
tokenize_assert("090107", comp);
}
#[test] #[test]
fn test_tokenize129() { fn test_tokenize129() {
let comp = vec!["2015", " ", "09", " ", "25"]; let comp = vec!["090107"];
tokenize_assert("2015 09 25", comp); tokenize_assert("090107", comp);
} }
#[test] #[test]
fn test_tokenize130() { fn test_tokenize130() {
let comp = vec!["2015", " ", "09", " ", "25"];
tokenize_assert("2015 09 25", comp);
}
#[test]
fn test_tokenize131() {
let comp = vec!["090107"];
tokenize_assert("090107", comp);
}
#[test]
fn test_tokenize132() {
let comp = vec!["2015", " ", "09", " ", "25"];
tokenize_assert("2015 09 25", comp);
}
#[test]
fn test_tokenize133() {
let comp = vec!["April", " ", "2009"]; let comp = vec!["April", " ", "2009"];
tokenize_assert("April 2009", comp); tokenize_assert("April 2009", comp);
} }
#[test] #[test]
fn test_tokenize131() { fn test_tokenize134() {
let comp = vec!["Feb", " ", "2007"]; let comp = vec!["Feb", " ", "2007"];
tokenize_assert("Feb 2007", comp); tokenize_assert("Feb 2007", comp);
} }
#[test] #[test]
fn test_tokenize132() { fn test_tokenize135() {
let comp = vec!["Feb", " ", "2008"]; let comp = vec!["Feb", " ", "2008"];
tokenize_assert("Feb 2008", comp); tokenize_assert("Feb 2008", comp);
} }
#[test] #[test]
fn test_tokenize133() { fn test_tokenize136() {
let comp = vec!["Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003"]; let comp = vec![
"Thu", " ", "Sep", " ", "25", " ", "10", ":", "36", ":", "28", " ", "BRST", " ", "2003",
];
tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp); tokenize_assert("Thu Sep 25 10:36:28 BRST 2003", comp);
} }
#[test] #[test]
fn test_tokenize134() { fn test_tokenize137() {
let comp = vec!["1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ", "PDT"]; let comp = vec![
"1996", ".", "07", ".", "10", " ", "AD", " ", "at", " ", "15", ":", "08", ":", "56", " ",
"PDT",
];
tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp); tokenize_assert("1996.07.10 AD at 15:08:56 PDT", comp);
} }
#[test] #[test]
fn test_tokenize135() { fn test_tokenize138() {
let comp = vec!["Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30", ":", "42", "pm", " ", "PST"]; let comp = vec![
"Tuesday", ",", " ", "April", " ", "12", ",", " ", "1952", " ", "AD", " ", "3", ":", "30",
":", "42", "pm", " ", "PST",
];
tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp); tokenize_assert("Tuesday, April 12, 1952 AD 3:30:42pm PST", comp);
} }
#[test] #[test]
fn test_tokenize136() { fn test_tokenize139() {
let comp = vec!["November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am", " ", "EST"]; let comp = vec![
"November", " ", "5", ",", " ", "1994", ",", " ", "8", ":", "15", ":", "30", " ", "am",
" ", "EST",
];
tokenize_assert("November 5, 1994, 8:15:30 am EST", comp); tokenize_assert("November 5, 1994, 8:15:30 am EST", comp);
} }
#[test] #[test]
fn test_tokenize137() { fn test_tokenize140() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00"]; let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "-", "05", ":", "00",
];
tokenize_assert("1994-11-05T08:15:30-05:00", comp); tokenize_assert("1994-11-05T08:15:30-05:00", comp);
} }
#[test] #[test]
fn test_tokenize138() { fn test_tokenize141() {
let comp = vec!["1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z"]; let comp = vec![
"1994", "-", "11", "-", "05", "T", "08", ":", "15", ":", "30", "Z",
];
tokenize_assert("1994-11-05T08:15:30Z", comp); tokenize_assert("1994-11-05T08:15:30Z", comp);
} }
#[test] #[test]
fn test_tokenize139() { fn test_tokenize142() {
let comp = vec!["1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z"]; let comp = vec![
"1976", "-", "07", "-", "04", "T", "00", ":", "01", ":", "02", "Z",
];
tokenize_assert("1976-07-04T00:01:02Z", comp); tokenize_assert("1976-07-04T00:01:02Z", comp);
} }
#[test] #[test]
fn test_tokenize140() { fn test_tokenize143() {
let comp = vec!["Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995"]; let comp = vec![
"Tue", " ", "Apr", " ", "4", " ", "00", ":", "22", ":", "12", " ", "PDT", " ", "1995",
];
tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp); tokenize_assert("Tue Apr 4 00:22:12 PDT 1995", comp);
} }
#[test]
fn test_tokenize141() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."];
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp);
}
#[test]
fn test_tokenize142() {
let comp = vec!["Today", " ", "is", " ", "25", " ", "of", " ", "September", " ", "of", " ", "2003", ",", " ", "exactly", " ", "at", " ", "10", ":", "49", ":", "41", " ", "with", " ", "timezone", " ", "-", "03", ":", "00", "."];
tokenize_assert("Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.", comp);
}
#[test]
fn test_tokenize143() {
let comp = vec!["I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ", "1974"];
tokenize_assert("I have a meeting on March 1, 1974", comp);
}
#[test] #[test]
fn test_tokenize144() { fn test_tokenize144() {
let comp = vec!["On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ", "going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ", "Mars"]; let comp = vec![
tokenize_assert("On June 8th, 2020, I am going to be the first man on Mars", comp); "Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
} }
#[test] #[test]
fn test_tokenize145() { fn test_tokenize145() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003"]; let comp = vec![
tokenize_assert("Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003", comp); "Today",
" ",
"is",
" ",
"25",
" ",
"of",
" ",
"September",
" ",
"of",
" ",
"2003",
",",
" ",
"exactly",
" ",
"at",
" ",
"10",
":",
"49",
":",
"41",
" ",
"with",
" ",
"timezone",
" ",
"-",
"03",
":",
"00",
".",
];
tokenize_assert(
"Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.",
comp,
);
} }
#[test] #[test]
fn test_tokenize146() { fn test_tokenize146() {
let comp = vec!["Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset"]; let comp = vec![
tokenize_assert("Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset", comp); "I", " ", "have", " ", "a", " ", "meeting", " ", "on", " ", "March", " ", "1", ",", " ",
"1974",
];
tokenize_assert("I have a meeting on March 1, 1974", comp);
} }
#[test] #[test]
fn test_tokenize147() { fn test_tokenize147() {
let comp = vec!["Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ", "going", " ", "to", " ", "see", " ", "you", " ", "there", "?"]; let comp = vec![
tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp); "On", " ", "June", " ", "8", "th", ",", " ", "2020", ",", " ", "I", " ", "am", " ",
"going", " ", "to", " ", "be", " ", "the", " ", "first", " ", "man", " ", "on", " ",
"Mars",
];
tokenize_assert(
"On June 8th, 2020, I am going to be the first man on Mars",
comp,
);
} }
#[test] #[test]
fn test_tokenize148() { fn test_tokenize148() {
let comp = vec![
"Meet", " ", "me", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on", " ", "Sunset",
" ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December", " ", "3", "rd", ",",
" ", "2003",
];
tokenize_assert(
"Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003",
comp,
);
}
#[test]
fn test_tokenize149() {
let comp = vec![
"Meet", " ", "me", " ", "at", " ", "3", ":", "00", " ", "AM", " ", "on", " ", "December",
" ", "3", "rd", ",", " ", "2003", " ", "at", " ", "the", " ", "AM", "/", "PM", " ", "on",
" ", "Sunset",
];
tokenize_assert(
"Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset",
comp,
);
}
#[test]
fn test_tokenize150() {
let comp = vec![
"Jan", " ", "29", ",", " ", "1945", " ", "14", ":", "45", " ", "AM", " ", "I", " ",
"going", " ", "to", " ", "see", " ", "you", " ", "there", "?",
];
tokenize_assert("Jan 29, 1945 14:45 AM I going to see you there?", comp);
}
#[test]
fn test_tokenize151() {
let comp = vec!["2017", "-", "07", "-", "17", " ", "06", ":", "15", ":"]; let comp = vec!["2017", "-", "07", "-", "17", " ", "06", ":", "15", ":"];
tokenize_assert("2017-07-17 06:15:", comp); tokenize_assert("2017-07-17 06:15:", comp);
} }

View File

@ -14,7 +14,6 @@ pub(crate) enum ParseState {
} }
impl Tokenizer { impl Tokenizer {
pub(crate) fn new(parse_string: &str) -> Self { pub(crate) fn new(parse_string: &str) -> Self {
Tokenizer { Tokenizer {
token_stack: vec![], token_stack: vec![],
@ -92,7 +91,7 @@ impl Iterator for Tokenizer {
} else { } else {
break; break;
} }
}, }
ParseState::Alpha => { ParseState::Alpha => {
seenletters = true; seenletters = true;
if self.isword(nextchar) { if self.isword(nextchar) {
@ -105,19 +104,21 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::Numeric => { ParseState::Numeric => {
if self.isnum(nextchar) { if self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
token.as_mut().unwrap().push(nextchar); token.as_mut().unwrap().push(nextchar);
} else if nextchar == '.' || (nextchar == ',' && token.as_ref().unwrap().len() >= 2) { } else if nextchar == '.'
|| (nextchar == ',' && token.as_ref().unwrap().len() >= 2)
{
token.as_mut().unwrap().push(nextchar); token.as_mut().unwrap().push(nextchar);
state = ParseState::NumericDecimal; state = ParseState::NumericDecimal;
} else { } else {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::AlphaDecimal => { ParseState::AlphaDecimal => {
seenletters = true; seenletters = true;
if nextchar == '.' || self.isword(nextchar) { if nextchar == '.' || self.isword(nextchar) {
@ -130,7 +131,7 @@ impl Iterator for Tokenizer {
self.parse_string.push(nextchar); self.parse_string.push(nextchar);
break; break;
} }
}, }
ParseState::NumericDecimal => { ParseState::NumericDecimal => {
if nextchar == '.' || self.isnum(nextchar) { if nextchar == '.' || self.isnum(nextchar) {
// UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token // UNWRAP: Because we're in non-empty parse state, we're guaranteed to have a token
@ -150,20 +151,25 @@ impl Iterator for Tokenizer {
// We do something slightly different to express the same logic // We do something slightly different to express the same logic
if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal { if state == ParseState::AlphaDecimal || state == ParseState::NumericDecimal {
// UNWRAP: The state check guarantees that we have a value // UNWRAP: The state check guarantees that we have a value
let dot_count = token.as_ref().unwrap().chars().filter(|c| *c == '.').count(); let dot_count = token
.as_ref()
.unwrap()
.chars()
.filter(|c| *c == '.')
.count();
let last_char = token.as_ref().unwrap().chars().last(); let last_char = token.as_ref().unwrap().chars().last();
let last_splittable = last_char == Some('.') || last_char == Some(','); let last_splittable = last_char == Some('.') || last_char == Some(',');
if seenletters || dot_count > 1 || last_splittable { if seenletters || dot_count > 1 || last_splittable {
let mut l = self.decimal_split(token.as_ref().unwrap()); let mut l = self.decimal_split(token.as_ref().unwrap());
let remaining = l.split_off(1); let remaining = l.split_off(1);
token = Some(l[0].clone()); token = Some(l[0].clone());
for t in remaining { for t in remaining {
self.token_stack.push(t); self.token_stack.push(t);
} }
} }
if state == ParseState::NumericDecimal && dot_count == 0 { if state == ParseState::NumericDecimal && dot_count == 0 {
token = Some(token.unwrap().replace(',', ".")); token = Some(token.unwrap().replace(',', "."));
} }

View File

@ -1,5 +1,5 @@
use ParseResult;
use ParseError; use ParseError;
use ParseResult;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum DayOfWeek { pub enum DayOfWeek {
@ -9,13 +9,12 @@ pub enum DayOfWeek {
Wednesday, Wednesday,
Thursday, Thursday,
Friday, Friday,
Saturday Saturday,
} }
impl DayOfWeek { impl DayOfWeek {
pub fn to_numeral(&self) -> u32 { pub fn to_numeral(&self) -> u32 {
match self { match *self {
DayOfWeek::Sunday => 0, DayOfWeek::Sunday => 0,
DayOfWeek::Monday => 1, DayOfWeek::Monday => 1,
DayOfWeek::Tuesday => 2, DayOfWeek::Tuesday => 2,
@ -35,12 +34,12 @@ impl DayOfWeek {
4 => DayOfWeek::Thursday, 4 => DayOfWeek::Thursday,
5 => DayOfWeek::Friday, 5 => DayOfWeek::Friday,
6 => DayOfWeek::Saturday, 6 => DayOfWeek::Saturday,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
} }
} }
/// Given the current day of the week, how many days until the next day? /// Given the current day of the week, how many days until the next day?
pub fn difference(&self, other: DayOfWeek) -> u32 { pub fn difference(&self, other: &DayOfWeek) -> u32 {
// Have to use i32 because of wraparound issues // Have to use i32 because of wraparound issues
let s_num = self.to_numeral() as i32; let s_num = self.to_numeral() as i32;
let o_num = other.to_numeral() as i32; let o_num = other.to_numeral() as i32;
@ -59,12 +58,12 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => { 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 => {
let c = year / 100; let c = year / 100;
(c, year - 100 * c) (c, year - 100 * c)
}, }
1 | 2 => { 1 | 2 => {
let c = (year - 1) / 100; let c = (year - 1) / 100;
(c, year - 1 - 100 * c) (c, year - 1 - 100 * c)
}, }
_ => return Err(ParseError::ImpossibleTimestamp("Invalid month")) _ => return Err(ParseError::ImpossibleTimestamp("Invalid month")),
}; };
let e = match month { let e = match month {
@ -75,7 +74,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
8 => 1, 8 => 1,
9 | 12 => 4, 9 | 12 => 4,
10 => 6, 10 => 6,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
}; };
// This implementation is Gregorian-only. // This implementation is Gregorian-only.
@ -84,7 +83,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
1 => 5, 1 => 5,
2 => 3, 2 => 3,
3 => 1, 3 => 1,
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
}; };
match (day + e + f + g + g / 4) % 7 { match (day + e + f + g + g / 4) % 7 {
@ -95,7 +94,7 @@ pub fn day_of_week(year: u32, month: u32, day: u32) -> ParseResult<DayOfWeek> {
4 => Ok(DayOfWeek::Thursday), 4 => Ok(DayOfWeek::Thursday),
5 => Ok(DayOfWeek::Friday), 5 => Ok(DayOfWeek::Friday),
6 => Ok(DayOfWeek::Saturday), 6 => Ok(DayOfWeek::Saturday),
_ => panic!("Unreachable.") _ => panic!("Unreachable."),
} }
} }
@ -114,19 +113,18 @@ mod test {
#[test] #[test]
fn weekday_difference() { fn weekday_difference() {
assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Sunday), 0);
assert_eq!(DayOfWeek::Sunday.difference(DayOfWeek::Sunday), 0); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Monday), 1);
assert_eq!(DayOfWeek::Sunday.difference(DayOfWeek::Monday), 1); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Tuesday), 2);
assert_eq!(DayOfWeek::Sunday.difference(DayOfWeek::Tuesday), 2); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Wednesday), 3);
assert_eq!(DayOfWeek::Sunday.difference(DayOfWeek::Wednesday), 3); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Thursday), 4);
assert_eq!(DayOfWeek::Sunday.difference(DayOfWeek::Thursday), 4); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Friday), 5);
assert_eq!(DayOfWeek::Sunday.difference(DayOfWeek::Friday), 5); assert_eq!(DayOfWeek::Sunday.difference(&DayOfWeek::Saturday), 6);
assert_eq!(DayOfWeek::Sunday.difference(DayOfWeek::Saturday), 6); assert_eq!(DayOfWeek::Monday.difference(&DayOfWeek::Sunday), 6);
assert_eq!(DayOfWeek::Monday.difference(DayOfWeek::Sunday), 6); assert_eq!(DayOfWeek::Tuesday.difference(&DayOfWeek::Sunday), 5);
assert_eq!(DayOfWeek::Tuesday.difference(DayOfWeek::Sunday), 5); assert_eq!(DayOfWeek::Wednesday.difference(&DayOfWeek::Sunday), 4);
assert_eq!(DayOfWeek::Wednesday.difference(DayOfWeek::Sunday), 4); assert_eq!(DayOfWeek::Thursday.difference(&DayOfWeek::Sunday), 3);
assert_eq!(DayOfWeek::Thursday.difference(DayOfWeek::Sunday), 3); assert_eq!(DayOfWeek::Friday.difference(&DayOfWeek::Sunday), 2);
assert_eq!(DayOfWeek::Friday.difference(DayOfWeek::Sunday), 2); assert_eq!(DayOfWeek::Saturday.difference(&DayOfWeek::Sunday), 1);
assert_eq!(DayOfWeek::Saturday.difference(DayOfWeek::Sunday), 1);
} }
} }