diff --git a/src/infer.rs b/src/infer.rs index d989969..4915542 100644 --- a/src/infer.rs +++ b/src/infer.rs @@ -1,6 +1,17 @@ use crate::{infer_string::infer_string_type, NumberType, SchemaState, StringType}; use rayon::prelude::*; +pub struct EnumInference { + /// The maximum ratio of unique values to total values in a collection of strings for it to be considered an enum. + pub max_unique_ratio: f64, + /// The minimum number of values in a collection of strings for enum inference to be applied. + pub min_sample_size: usize, +} + +pub struct InferenceOptions { + pub enum_inference: Option, +} + fn min(left: T, right: T) -> T { if left < right { left @@ -27,11 +38,13 @@ fn merge(initial: SchemaState, new: SchemaState) -> SchemaState { // --- String merging --- ( SchemaState::String(StringType::Unknown { + mut strings_seen, mut chars_seen, min_length, max_length, }), SchemaState::String(StringType::Unknown { + strings_seen: second_strings_seen, chars_seen: second_chars_seen, min_length: second_min_length, max_length: second_max_length, @@ -54,40 +67,25 @@ fn merge(initial: SchemaState, new: SchemaState) -> SchemaState { }; chars_seen.extend(second_chars_seen); + strings_seen.extend(second_strings_seen); SchemaState::String(StringType::Unknown { + strings_seen, chars_seen, min_length, max_length, }) } - ( - SchemaState::String(StringType::Unknown { - chars_seen, - min_length, - max_length, - }), - SchemaState::String(_), - ) - | ( - SchemaState::String(_), - SchemaState::String(StringType::Unknown { - chars_seen, - min_length, - max_length, - }), - ) => SchemaState::String(StringType::Unknown { - chars_seen, - min_length, - max_length, - }), + (s @ SchemaState::String(StringType::Unknown { .. }), SchemaState::String(_)) + | (SchemaState::String(_), s @ SchemaState::String(StringType::Unknown { .. })) => s, (SchemaState::String(first_type), SchemaState::String(second_type)) => { if first_type == second_type { SchemaState::String(first_type) } else { SchemaState::String(StringType::Unknown { + strings_seen: vec![], chars_seen: vec![], min_length: None, max_length: None, @@ -267,6 +265,59 @@ fn merge(initial: SchemaState, new: SchemaState) -> SchemaState { } } +fn apply_enum_inner(s: StringType, opts: &EnumInference) -> StringType { + match &s { + StringType::Unknown { + strings_seen, + .. + } => { + if strings_seen.len() < opts.min_sample_size { + return s; } + + let variants = strings_seen + .iter() + .cloned() + .collect::>(); + + let unique_ratio = variants.len() as f64 / strings_seen.len() as f64; + if unique_ratio > opts.max_unique_ratio { + return s; } + + StringType::Enum { variants } + } + _ => s, + } +} + +fn apply_enum_recursive(s: SchemaState, opts: &EnumInference) -> SchemaState { + match s { + SchemaState::String(s) => SchemaState::String(apply_enum_inner(s, opts)), + SchemaState::Array { + min_length, + max_length, + schema, + } => SchemaState::Array { + min_length, + max_length, + schema: Box::new(apply_enum_recursive(*schema, opts)), + }, + SchemaState::Object { required, optional } => SchemaState::Object { + required: required + .into_iter() + .map(|(k, v)| (k, apply_enum_recursive(v, opts))) + .collect(), + optional: optional + .into_iter() + .map(|(k, v)| (k, apply_enum_recursive(v, opts))) + .collect(), + }, + SchemaState::Nullable(inner) => { + SchemaState::Nullable(Box::new(apply_enum_recursive(*inner, opts))) + } + _ => s, + } +} + /// Infer a schema, encoded as a SchemaState struct, from a JSON value. /// This function will recursively traverse the given JSON structure and return a SchemaState struct. /// @@ -275,7 +326,11 @@ fn merge(initial: SchemaState, new: SchemaState) -> SchemaState { /// ``` /// use serde_json::json; /// use std::collections::{HashMap, HashSet}; -/// use drivel::{infer_schema, SchemaState, StringType, NumberType}; +/// use drivel::{infer_schema, SchemaState, StringType, NumberType, InferenceOptions}; +/// +/// let opts = InferenceOptions { +/// enum_inference: None +/// }; /// /// // Define a JSON value /// let input = json!({ @@ -286,10 +341,11 @@ fn merge(initial: SchemaState, new: SchemaState) -> SchemaState { /// }); /// /// assert_eq!( -/// infer_schema(input), +/// infer_schema(input, &opts), /// SchemaState::Object { /// required: HashMap::from_iter([ /// ("name".to_string(), SchemaState::String(StringType::Unknown { +/// strings_seen: vec!["John".to_string()], /// chars_seen: vec!['J', 'o', 'h', 'n'], /// min_length: Some(4), /// max_length: Some(4) @@ -306,8 +362,8 @@ fn merge(initial: SchemaState, new: SchemaState) -> SchemaState { /// } /// ); /// ``` -pub fn infer_schema(json: serde_json::Value) -> SchemaState { - match json { +pub fn infer_schema(json: serde_json::Value, options: &InferenceOptions) -> SchemaState { + let inferred = match json { serde_json::Value::Null => SchemaState::Null, serde_json::Value::String(value) => SchemaState::String(infer_string_type(&value)), serde_json::Value::Number(n) => SchemaState::Number(if n.is_f64() { @@ -325,15 +381,21 @@ pub fn infer_schema(json: serde_json::Value) -> SchemaState { serde_json::Value::Array(array) => SchemaState::Array { min_length: array.len(), max_length: array.len(), - schema: Box::new(infer_schema_from_iter(array)), + schema: Box::new(infer_schema_from_iter(array, options)), }, serde_json::Value::Object(object) => SchemaState::Object { required: object .into_iter() - .map(|(k, v)| (k, infer_schema(v))) + .map(|(k, v)| (k, infer_schema(v, options))) .collect(), optional: std::collections::HashMap::new(), }, + }; + + if let Some(enum_opts) = &options.enum_inference { + apply_enum_recursive(inferred, enum_opts) + } else { + inferred } } @@ -348,7 +410,7 @@ pub fn infer_schema(json: serde_json::Value) -> SchemaState { /// ``` /// use serde_json::json; /// use std::collections::{HashMap, HashSet}; -/// use drivel::{infer_schema_from_iter, SchemaState, StringType, NumberType}; +/// use drivel::{infer_schema_from_iter, SchemaState, StringType, NumberType, InferenceOptions}; /// /// // Define a collection of JSON values /// let values = vec![ @@ -364,14 +426,19 @@ pub fn infer_schema(json: serde_json::Value) -> SchemaState { /// }) /// ]; /// +/// let opts = InferenceOptions { +/// enum_inference: None +/// }; +/// /// // Infer the schema from the iterator of JSON values -/// let schema = infer_schema_from_iter(values); +/// let schema = infer_schema_from_iter(values, &opts); /// /// assert_eq!( /// schema, /// SchemaState::Object { /// required: HashMap::from_iter([ /// ("name".to_string(), SchemaState::String(StringType::Unknown { +/// strings_seen: vec!["Alice".to_string(), "Bob".to_string()], /// chars_seen: vec!['A', 'l', 'i', 'c', 'e', 'B', 'o', 'b'], /// min_length: Some(3), /// max_length: Some(5) @@ -383,10 +450,13 @@ pub fn infer_schema(json: serde_json::Value) -> SchemaState { /// } /// ); /// ``` -pub fn infer_schema_from_iter(values: Vec) -> SchemaState { +pub fn infer_schema_from_iter( + values: Vec, + options: &InferenceOptions, +) -> SchemaState { values .into_par_iter() - .map(|value| infer_schema(value)) + .map(|value| infer_schema(value, options)) .reduce(|| SchemaState::Initial, |left, right| merge(left, right)) } @@ -399,7 +469,10 @@ mod tests { #[test] fn infers_null() { let input = json!(null); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::Null) } @@ -407,11 +480,15 @@ mod tests { #[test] fn infers_string_unknown_type() { let input = json!("foo"); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, SchemaState::String(StringType::Unknown { + strings_seen: vec!["foo".to_owned()], chars_seen: vec!['f', 'o', 'o'], min_length: Some(3), max_length: Some(3) @@ -422,7 +499,10 @@ mod tests { #[test] fn infers_string_iso_date() { let input = json!("2013-01-12"); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::String(StringType::IsoDate)) } @@ -430,7 +510,10 @@ mod tests { #[test] fn infers_string_iso_date_time_rfc_2822() { let input = json!("Thu, 18 Mar 2021 10:37:31 +0000"); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::String(StringType::DateTimeISO8601)) } @@ -438,7 +521,10 @@ mod tests { #[test] fn infers_string_iso_date_time_rfc_3339_offset() { let input = json!("2013-01-12T00:00:00.000+00:00"); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::String(StringType::DateTimeISO8601)) } @@ -446,7 +532,10 @@ mod tests { #[test] fn infers_string_iso_date_time_rfc_3339_utc() { let input = json!("2013-01-12T00:00:00.000Z"); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::String(StringType::DateTimeISO8601)) } @@ -454,7 +543,10 @@ mod tests { #[test] fn infers_string_uuid() { let input = json!("988c2c6d-df1b-4bb9-b837-6ba706c0b4ad"); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::String(StringType::UUID)) } @@ -462,7 +554,10 @@ mod tests { #[test] fn infers_string_email() { let input = json!("test@example.com"); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::String(StringType::Email)) } @@ -470,7 +565,10 @@ mod tests { #[test] fn infers_string_url() { let input = json!("https://somedomain.somehost.nl/somepage"); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::String(StringType::Url)) } @@ -478,7 +576,10 @@ mod tests { #[test] fn infers_string_hostname() { let input = json!("somehost.com"); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::String(StringType::Hostname)) } @@ -486,7 +587,10 @@ mod tests { #[test] fn infers_number() { let input = json!(42); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -497,7 +601,10 @@ mod tests { #[test] fn infers_number_float() { let input = json!(42.0); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -511,7 +618,10 @@ mod tests { #[test] fn infers_boolean_true() { let input = json!(true); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::Boolean) } @@ -519,7 +629,10 @@ mod tests { #[test] fn infers_boolean_false() { let input = json!(false); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!(schema, SchemaState::Boolean) } @@ -537,7 +650,10 @@ mod tests { "string": "foo" } }); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -546,6 +662,7 @@ mod tests { ( "string".to_string(), SchemaState::String(StringType::Unknown { + strings_seen: vec!["foo".to_owned()], chars_seen: vec!['f', 'o', 'o'], min_length: Some(3), max_length: Some(3) @@ -569,6 +686,7 @@ mod tests { min_length: 1, max_length: 1, schema: Box::new(SchemaState::String(StringType::Unknown { + strings_seen: vec!["baz".to_owned()], chars_seen: vec!['b', 'a', 'z'], min_length: Some(3), max_length: Some(3) @@ -582,6 +700,7 @@ mod tests { required: std::collections::HashMap::from_iter([( "string".to_owned(), SchemaState::String(StringType::Unknown { + strings_seen: vec!["foo".to_owned()], chars_seen: vec!['f', 'o', 'o'], min_length: Some(3), max_length: Some(3) @@ -599,7 +718,10 @@ mod tests { #[test] fn infers_array_null() { let input = json!([null, null]); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -614,7 +736,10 @@ mod tests { #[test] fn infers_array_string() { let input = json!(["foo", "barbar"]); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -622,6 +747,7 @@ mod tests { min_length: 2, max_length: 2, schema: Box::new(SchemaState::String(StringType::Unknown { + strings_seen: vec!["foo".to_owned(), "barbar".to_owned()], chars_seen: vec!['f', 'o', 'o', 'b', 'a', 'r', 'b', 'a', 'r'], min_length: Some(3), max_length: Some(6) @@ -630,10 +756,100 @@ mod tests { ); } + #[test] + fn infers_array_string_enum() { + let input = json!(["foo", "barbar", "barbar", "foo"]); + + let enum_opts = EnumInference { + max_unique_ratio: 0.5, + min_sample_size: 2, + }; + let options = InferenceOptions { + enum_inference: Some(enum_opts), + }; + + let schema = infer_schema(input, &options); + + assert_eq!( + schema, + SchemaState::Array { + min_length: 4, + max_length: 4, + schema: Box::new(SchemaState::String(StringType::Enum { + variants: vec!["foo".to_owned(), "barbar".to_owned()] + .into_iter() + .collect() + })) + } + ); + } + + #[test] + fn infers_array_string_enum_uniq_ratio_too_high() { + let input = json!(["foo", "barbar", "foo", "barbar"]); + + let enum_opts = EnumInference { + max_unique_ratio: 0.4, // 2 unique values out of 4 = unique ratio of 0.5 + min_sample_size: 2, + }; + let options = InferenceOptions { + enum_inference: Some(enum_opts), + }; + + let schema = infer_schema(input, &options); + + assert_eq!( + schema, + SchemaState::Array { + min_length: 4, + max_length: 4, + schema: Box::new(SchemaState::String(StringType::Unknown { + strings_seen: vec!["foo".to_owned(), "barbar".to_owned(), "foo".to_owned(), "barbar".to_owned()], + chars_seen: vec!['f', 'o', 'o', 'b', 'a', 'r', 'b', 'a', 'r', 'f', 'o', 'o', 'b', 'a', 'r', 'b', 'a', 'r'], + min_length: Some(3), + max_length: Some(6) + })) + } + ); + } + + #[test] + fn infers_array_string_enum_sample_size_too_small() { + let input = json!(["foo", "barbar", "foo", "barbar"]); + + let enum_opts = EnumInference { + max_unique_ratio: 0.5, + min_sample_size: 5, // sample size too small (4 vs 5) + }; + let options = InferenceOptions { + enum_inference: Some(enum_opts), + }; + + let schema = infer_schema(input, &options); + + assert_eq!( + schema, + SchemaState::Array { + min_length: 4, + max_length: 4, + schema: Box::new(SchemaState::String(StringType::Unknown { + strings_seen: vec!["foo".to_owned(), "barbar".to_owned(), "foo".to_owned(), "barbar".to_owned()], + chars_seen: vec!['f', 'o', 'o', 'b', 'a', 'r', 'b', 'a', 'r', 'f', 'o', 'o', 'b', 'a', 'r', 'b', 'a', 'r'], + min_length: Some(3), + max_length: Some(6) + })) + } + ); + } + + #[test] fn infers_array_string_mixed() { let input = json!(["48f41410-2d97-4d54-8bfa-aa4e22acca01", "barbar"]); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -641,6 +857,7 @@ mod tests { min_length: 2, max_length: 2, schema: Box::new(SchemaState::String(StringType::Unknown { + strings_seen: vec!["barbar".to_owned()], chars_seen: vec!['b', 'a', 'r', 'b', 'a', 'r'], min_length: Some(6), max_length: Some(6), @@ -652,7 +869,10 @@ mod tests { #[test] fn infers_array_number() { let input = json!([100, 104]); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -670,7 +890,10 @@ mod tests { #[test] fn infers_array_number_float() { let input = json!([100, 104.5]); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -688,7 +911,10 @@ mod tests { #[test] fn infers_array_boolean() { let input = json!([true, false]); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -718,7 +944,10 @@ mod tests { "qux": true }, ]); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -738,6 +967,7 @@ mod tests { optional: std::collections::HashMap::from_iter([( "foo".to_owned(), SchemaState::String(StringType::Unknown { + strings_seen: vec!["bar".to_owned(), "barbar".to_owned()], chars_seen: vec!['b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', 'r'], min_length: Some(3), max_length: Some(6) @@ -748,10 +978,58 @@ mod tests { ) } + #[test] + fn infers_array_object_enum() { + let input = json!([ + { + "foo": "bar", + }, + { + "foo": "bar", + }, + { + "foo": "baz", + }, + { + "foo": "bar", + } + ]); + let enun_opts = EnumInference { + max_unique_ratio: 0.5, + min_sample_size: 2, + }; + let options = InferenceOptions { + enum_inference: Some(enun_opts), + }; + let schema = infer_schema(input, &options); + + assert_eq!( + schema, + SchemaState::Array { + min_length: 4, + max_length: 4, + schema: Box::new(SchemaState::Object { + required: std::collections::HashMap::from_iter([( + "foo".to_owned(), + SchemaState::String(StringType::Enum { + variants: vec!["bar".to_owned(), "baz".to_owned()] + .into_iter() + .collect() + }) + )]), + optional: std::collections::HashMap::new() + }) + } + ) + } + #[test] fn infers_nested_array() { let input = json!([[true, false], [false]]); - let schema = infer_schema(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema(input, &options); assert_eq!( schema, @@ -770,10 +1048,13 @@ mod tests { #[test] fn infers_nullable_array() { let input_1 = json!(["foo", null]); - let schema_1 = infer_schema(input_1); + let options = InferenceOptions { + enum_inference: None, + }; + let schema_1 = infer_schema(input_1, &options); let input_2 = json!([null, "foo"]); - let schema_2 = infer_schema(input_2); + let schema_2 = infer_schema(input_2, &options); assert_eq!( schema_1, @@ -782,6 +1063,7 @@ mod tests { max_length: 2, schema: Box::new(SchemaState::Nullable(Box::new(SchemaState::String( StringType::Unknown { + strings_seen: vec!["foo".to_owned()], chars_seen: vec!['f', 'o', 'o'], min_length: Some(3), max_length: Some(3) @@ -811,7 +1093,10 @@ mod tests { "qux": true }), ]; - let schema = infer_schema_from_iter(input); + let options = InferenceOptions { + enum_inference: None, + }; + let schema = infer_schema_from_iter(input, &options); assert_eq!( schema, SchemaState::Object { @@ -828,6 +1113,7 @@ mod tests { optional: std::collections::HashMap::from_iter([( "foo".to_owned(), SchemaState::String(StringType::Unknown { + strings_seen: vec!["bar".to_owned(), "barbar".to_owned()], chars_seen: vec!['b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', 'r'], min_length: Some(3), max_length: Some(6) diff --git a/src/infer_string.rs b/src/infer_string.rs index e562e7e..e394074 100644 --- a/src/infer_string.rs +++ b/src/infer_string.rs @@ -28,6 +28,7 @@ pub(crate) fn infer_string_type(s: &str) -> StringType { StringType::Hostname } else { StringType::Unknown { + strings_seen: vec![s.to_owned()], chars_seen: s.chars().collect(), min_length: Some(s.len()), max_length: Some(s.len()), diff --git a/src/main.rs b/src/main.rs index ff0d117..1d45ab4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,8 +22,37 @@ enum Mode { struct Args { #[command(subcommand)] mode: Mode, + + /// Infer that some string fields are enums based on the number of unique values seen. + #[arg(long, global=true)] + infer_enum: bool, + + /// The maximum ratio of unique values to total values for a field to be considered an enum. Default = 0.1. + #[arg(long, global=true)] + enum_max_uniq: Option, + + /// The minimum sample size of strings before enum inference will be attempted. Default = 1. + #[arg(long, global=true)] + enum_min_n: Option, } +impl From<&Args> for Option { + fn from(value: &Args) -> Self { + if value.infer_enum { + let max_unique_ratio = value.enum_max_uniq.unwrap_or(0.1); + let min_sample_size = value.enum_min_n.unwrap_or(1); + Some( + drivel::EnumInference { + max_unique_ratio, + min_sample_size, + } + ) + } else { + None + } + } +} + fn main() { let args = Args::parse(); let input = match std::io::read_to_string(std::io::stdin()) { @@ -34,8 +63,12 @@ fn main() { } }; + let opts = drivel::InferenceOptions { + enum_inference: (&args).into(), + }; + let schema = if let Ok(json) = serde_json::from_str(&input) { - drivel::infer_schema(json) + drivel::infer_schema(json, &opts) } else { // unable to parse input as JSON; try JSON lines format as fallback let values = input @@ -51,7 +84,7 @@ fn main() { } }) .collect(); - drivel::infer_schema_from_iter(values) + drivel::infer_schema_from_iter(values, &opts) }; match &args.mode { diff --git a/src/produce.rs b/src/produce.rs index b888448..10a4b10 100644 --- a/src/produce.rs +++ b/src/produce.rs @@ -65,6 +65,7 @@ fn produce_inner(schema: &SchemaState, repeat_n: usize, current_depth: usize) -> chars_seen, min_length, max_length, + .. } => { let min = min_length.unwrap_or(0); let max = max_length.unwrap_or(32); @@ -89,6 +90,11 @@ fn produce_inner(schema: &SchemaState, repeat_n: usize, current_depth: usize) -> s } } + StringType::Enum { variants } => { + let variants_vec = variants.iter().cloned().collect::>(); + let idx = thread_rng().gen_range(0..variants_vec.len()); + variants_vec[idx].clone() + } }; serde_json::Value::String(value) } diff --git a/src/schema.rs b/src/schema.rs index 4249d9e..565ebd0 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -3,6 +3,7 @@ use std::fmt::Display; #[derive(PartialEq, Debug)] pub enum StringType { Unknown { + strings_seen: Vec, chars_seen: Vec, min_length: Option, max_length: Option, @@ -14,12 +15,16 @@ pub enum StringType { Email, Url, Hostname, + Enum { + variants: std::collections::HashSet, + }, } impl Display for StringType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let text = match self { StringType::Unknown { + strings_seen: _, chars_seen: _, min_length, max_length, @@ -45,6 +50,11 @@ impl Display for StringType { StringType::Email => "string (email)".to_owned(), StringType::Hostname => "string (hostname)".to_owned(), StringType::Url => "string (url)".to_owned(), + StringType::Enum { variants } => { + let variants_vec = variants.iter().cloned().collect::>(); + let formatted = variants_vec.join(", "); + format!("string (enum: {})", formatted) + } }; write!(f, "{}", text) } @@ -201,6 +211,7 @@ impl SchemaState { /// /// let required = HashMap::from_iter(vec![ /// ("name".to_string(), SchemaState::String(StringType::Unknown { + /// strings_seen: vec!["abc".to_string()], /// chars_seen: vec!['a', 'b', 'c'], /// min_length: Some(1), /// max_length: Some(10),