Skip to content

Commit

Permalink
perf: string inference runs fewer expensive tests
Browse files Browse the repository at this point in the history
  • Loading branch information
hgrsd committed Jul 2, 2024
1 parent 755d544 commit 0b32e80
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 22 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "drivel"
description = "Infer a schema from JSON input, and generate synthetic data based on the inferred schema."
license = "MIT"
authors = ["Daniël Hogers <[email protected]>"]
version = "0.2.1"
version = "0.2.2"
edition = "2021"
repository = "https://github.com/hgrsd/drivel"

Expand Down
54 changes: 34 additions & 20 deletions src/infer_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,40 @@ lazy_static! {
}

pub(crate) fn infer_string_type(s: &str) -> StringType {
if ISO_DATE_REGEX.is_match(s) {
StringType::IsoDate
} else if chrono::DateTime::parse_from_rfc2822(s).is_ok() {
StringType::DateTimeISO8601
} else if chrono::DateTime::parse_from_rfc3339(s).is_ok() {
StringType::DateTimeISO8601
} else if UUIDREGEX.is_match(s) {
StringType::UUID
} else if EMAIL_REGEX.is_match(s) {
StringType::Email
} else if url::Url::parse(s).is_ok() {
StringType::Url
} else if HOSTNAME_REGEX.is_match(s) {
StringType::Hostname
} else {
StringType::Unknown {
strings_seen: vec![s.to_owned()],
chars_seen: s.chars().collect(),
min_length: Some(s.len()),
max_length: Some(s.len()),
if s.len() == 36 && UUIDREGEX.is_match(s) {
return StringType::UUID;
}

if s.contains('@') && EMAIL_REGEX.is_match(s) {
return StringType::Email;
}

if s.contains('.') {
if url::Url::parse(s).is_ok() {
return StringType::Url;
}
if HOSTNAME_REGEX.is_match(s) {
return StringType::Hostname;
}
}

if s.chars().take(1).all(|char| char.is_numeric()) {
if ISO_DATE_REGEX.is_match(s) {
return StringType::IsoDate;
}
if chrono::DateTime::parse_from_rfc3339(s).is_ok() {
return StringType::DateTimeISO8601;
}
}

if chrono::DateTime::parse_from_rfc2822(s).is_ok() {
return StringType::DateTimeISO8601;
}

return StringType::Unknown {
strings_seen: vec![s.to_owned()],
chars_seen: s.chars().collect(),
min_length: Some(s.len()),
max_length: Some(s.len()),
};
}

0 comments on commit 0b32e80

Please sign in to comment.