Skip to content

Commit

Permalink
make matching nicer to work with
Browse files Browse the repository at this point in the history
  • Loading branch information
hgrsd committed Jul 4, 2024
1 parent 0b32e80 commit d62be32
Showing 1 changed file with 29 additions and 8 deletions.
37 changes: 29 additions & 8 deletions src/infer_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,56 @@ lazy_static! {
regex::Regex::new(r"[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]{2,}$").unwrap();
}

pub(crate) fn infer_string_type(s: &str) -> StringType {
fn uuid(s: &str) -> Option<StringType> {
if s.len() == 36 && UUIDREGEX.is_match(s) {
return StringType::UUID;
Some(StringType::UUID)
} else {
None
}
}

fn email(s: &str) -> Option<StringType> {
if s.contains('@') && EMAIL_REGEX.is_match(s) {
return StringType::Email;
Some(StringType::Email)
} else {
None
}
}

fn url_host(s: &str) -> Option<StringType> {
if s.contains('.') {
if url::Url::parse(s).is_ok() {
return StringType::Url;
return Some(StringType::Url);
}
if HOSTNAME_REGEX.is_match(s) {
return StringType::Hostname;
return Some(StringType::Hostname);
}
}
None
}

fn dates(s: &str) -> Option<StringType> {
if s.chars().take(1).all(|char| char.is_numeric()) {
if ISO_DATE_REGEX.is_match(s) {
return StringType::IsoDate;
return Some(StringType::IsoDate);
}
if chrono::DateTime::parse_from_rfc3339(s).is_ok() {
return StringType::DateTimeISO8601;
return Some(StringType::DateTimeISO8601);
}
}

if chrono::DateTime::parse_from_rfc2822(s).is_ok() {
return StringType::DateTimeISO8601;
return Some(StringType::DateTimeISO8601);
}

None
}

pub(crate) fn infer_string_type(s: &str) -> StringType {
for matcher in [uuid, email, url_host, dates] {
if let Some(string_type) = matcher(s) {
return string_type;
}
}

return StringType::Unknown {
Expand Down

0 comments on commit d62be32

Please sign in to comment.