diff --git a/Cargo.toml b/Cargo.toml index c29ff72d59..f97d9db673 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -144,7 +144,7 @@ idna = { version = "0.5", optional = true } indexmap = { version = "2", default-features = false, features = ["std"], optional = true} influxdb-line-protocol = { version = "2.0.0", optional = true } indoc = {version = "2", optional = true } -itertools = { version = "0.13", default-features = false, optional = true } +itertools = { version = "0.13", default-features = false, features=["use_std"], optional = true } lalrpop-util = { version = "0.21", optional = true } mlua = { version = "0.9", default-features = false, features = ["lua54", "send", "vendored"], optional = true} nom = { version = "7", default-features = false, features = ["std"], optional = true } diff --git a/benches/stdlib.rs b/benches/stdlib.rs index 5673201886..81b9e5a2d1 100644 --- a/benches/stdlib.rs +++ b/benches/stdlib.rs @@ -160,6 +160,7 @@ criterion_group!( to_syslog_severity, to_unix_timestamp, truncate, + unflatten, unique, // TODO: Cannot pass a Path to bench_function //unnest @@ -2789,6 +2790,31 @@ bench_function! { } } +bench_function! { + unflatten => vrl::stdlib::Unflatten; + + nested_map { + args: func_args![value: value!({"parent.child1": 1, "parent.child2": 2, key: "val"})], + want: Ok(value!({parent: {child1: 1, child2: 2}, key: "val"})), + } + + map_and_array { + args: func_args![value: value!({ + "parent.child1": [1, [2, 3]], + "parent.child2.grandchild1": 1, + "parent.child2.grandchild2": [1, [2, 3], 4], + "key": "val", + })], + want: Ok(value!({ + "parent": { + "child1": [1, [2, 3]], + "child2": {"grandchild1": 1, "grandchild2": [1, [2, 3], 4]}, + }, + "key": "val", + })), + } +} + bench_function! { unique => vrl::stdlib::Unique; diff --git a/changelog.d/81.feature.md b/changelog.d/81.feature.md new file mode 100644 index 0000000000..e4278ebfd1 --- /dev/null +++ b/changelog.d/81.feature.md @@ -0,0 +1,3 @@ +Added `unflatten` function to inverse the result of the `flatten` function. This function is useful when you want to convert a flattened object back to its original form. + +authors: jorgehermo9 \ No newline at end of file diff --git a/lib/tests/tests/functions/flatten/from_unflatten.vrl b/lib/tests/tests/functions/flatten/from_unflatten.vrl new file mode 100644 index 0000000000..b6f89ebc62 --- /dev/null +++ b/lib/tests/tests/functions/flatten/from_unflatten.vrl @@ -0,0 +1,4 @@ +# object: { "a.b.c.d": 1, "a.b.c.e": 2, "a.b.f": 3, "a.g": 4 } +# result: { "a.b.c.d": 1, "a.b.c.e": 2, "a.b.f": 3, "a.g": 4 } + +flatten(unflatten(.)) diff --git a/lib/tests/tests/functions/flatten/simple.vrl b/lib/tests/tests/functions/flatten/simple.vrl new file mode 100644 index 0000000000..0934f13b5e --- /dev/null +++ b/lib/tests/tests/functions/flatten/simple.vrl @@ -0,0 +1,4 @@ +# object: { "a": { "b": { "c": { "d": 1, "e": 2 }, "f": 3 }, "g": 4 } } +# result: { "a.b.c.d": 1, "a.b.c.e": 2, "a.b.f": 3, "a.g": 4 } + +flatten(.) diff --git a/lib/tests/tests/functions/unflatten/from_flatten.vrl b/lib/tests/tests/functions/unflatten/from_flatten.vrl new file mode 100644 index 0000000000..8e8bb3e558 --- /dev/null +++ b/lib/tests/tests/functions/unflatten/from_flatten.vrl @@ -0,0 +1,4 @@ +# object: { "a": { "b": { "c": { "d": 1, "e": 2 }, "f": 3 }, "g": 4 } } +# result: { "a": { "b": { "c": { "d": 1, "e": 2 }, "f": 3 }, "g": 4 } } + +unflatten(flatten(.)) diff --git a/lib/tests/tests/functions/unflatten/simple.vrl b/lib/tests/tests/functions/unflatten/simple.vrl new file mode 100644 index 0000000000..4ba8e4f364 --- /dev/null +++ b/lib/tests/tests/functions/unflatten/simple.vrl @@ -0,0 +1,4 @@ +# object: { "a.b.c.d": 1, "a.b.c.e": 2, "a.b.f": 3, "a.g": 4 } +# result: { "a": { "b": { "c": { "d": 1, "e": 2 }, "f": 3 }, "g": 4 } } + +unflatten(.) diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index bd0b85a257..acc5f96538 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -203,6 +203,7 @@ cfg_if::cfg_if! { mod to_unix_timestamp; mod community_id; mod truncate; + mod unflatten; mod type_def; mod unique; mod unnest; @@ -381,6 +382,7 @@ cfg_if::cfg_if! { pub use to_unix_timestamp::ToUnixTimestamp; pub use truncate::Truncate; pub use type_def::TypeDef; + pub use unflatten::Unflatten; pub use unique::Unique; pub use unnest::Unnest; pub use upcase::Upcase; @@ -569,6 +571,7 @@ pub fn all() -> Vec> { Box::new(CommunityID), Box::new(Truncate), Box::new(TypeDef), + Box::new(Unflatten), Box::new(Unique), Box::new(Unnest), Box::new(Upcase), diff --git a/src/stdlib/unflatten.rs b/src/stdlib/unflatten.rs new file mode 100644 index 0000000000..be082246f2 --- /dev/null +++ b/src/stdlib/unflatten.rs @@ -0,0 +1,416 @@ +use itertools::Itertools; + +use crate::compiler::prelude::*; + +static DEFAULT_SEPARATOR: &str = "."; + +fn unflatten(value: Value, separator: Value, recursive: Value) -> Resolved { + let separator = separator.try_bytes_utf8_lossy()?.into_owned(); + let recursive = recursive.try_boolean()?; + let map = value.try_object()?; + Ok(do_unflatten(map.into(), &separator, recursive)) +} + +fn do_unflatten(value: Value, separator: &str, recursive: bool) -> Value { + match value { + Value::Object(map) => do_unflatten_entries(map, separator, recursive).into(), + // Note that objects inside arrays are not unflattened + _ => value, + } +} + +fn do_unflatten_entries(entries: I, separator: &str, recursive: bool) -> ObjectMap +where + I: IntoIterator, +{ + let grouped = entries + .into_iter() + .map(|(key, value)| { + let (head, rest) = match key.split_once(separator) { + Some((key, rest)) => (key.to_string().into(), Some(rest.to_string().into())), + None => (key.clone(), None), + }; + (head, rest, value) + }) + .into_group_map_by(|(head, _, _)| head.clone()); + + grouped + .into_iter() + .map(|(key, mut values)| { + if values.len() == 1 { + match values.pop().expect("exactly one element") { + (_, None, value) => { + let value = if recursive { + do_unflatten(value, separator, recursive) + } else { + value + }; + return (key, value); + } + (_, Some(rest), value) => { + let result = do_unflatten_entry((rest, value), separator, recursive); + return (key, result); + } + } + } + + let new_entries = values + .into_iter() + .filter_map(|(_, rest, value)| { + // In this case, there is more than one value prefixed with the same key + // and therefore there must be nested values, so we can't set a single top-level value + // and we must filter it out. + // Example input of this case: + // { + // "a.b": 1, + // "a": 2 + // } + // Here, we will have two items grouped by "a", + // one will have `"b"` as rest and the other will have `None`. + // We have to filter the second, as we can't set the second value + // as the value of the entry `"a"` (considered the top-level key at this level) + rest.map(|rest| (rest, value)) + }) + .collect::>(); + let result = do_unflatten_entries(new_entries, separator, recursive); + (key, result.into()) + }) + .collect() +} + +// Optimization in the case we have to flatten objects like +// { "a.b.c.d": 1 } +// and avoid doing recursive calls to `do_unflatten_entries` with a single entry every time +fn do_unflatten_entry(entry: (KeyString, Value), separator: &str, recursive: bool) -> Value { + let (key, value) = entry; + let keys = key.split(separator).map(Into::into).collect::>(); + let mut result = if recursive { + do_unflatten(value, separator, recursive) + } else { + value + }; + for key in keys.into_iter().rev() { + result = Value::Object(ObjectMap::from_iter([(key, result)])); + } + result +} + +#[derive(Clone, Copy, Debug)] +pub struct Unflatten; + +impl Function for Unflatten { + fn identifier(&self) -> &'static str { + "unflatten" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::OBJECT, + required: true, + }, + Parameter { + keyword: "separator", + kind: kind::BYTES, + required: false, + }, + Parameter { + keyword: "recursive", + kind: kind::BOOLEAN, + required: false, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "simple", + source: r#"unflatten({ "foo.bar.baz": true, "foo.bar.qux": false, "foo.quux": 42 })"#, + result: Ok(r#"{ "foo": { "bar": { "baz": true, "qux": false }, "quux": 42 } }"#), + }, + Example { + title: "inner flattened recursive", + source: r#"unflatten({ "flattened.parent": { "foo.bar": true, "foo.baz": false } })"#, + result: Ok( + r#"{ "flattened": { "parent": { "foo": { "bar": true, "baz": false } } } }"#, + ), + }, + Example { + title: "inner flattened not recursive", + source: r#"unflatten({ "flattened.parent": { "foo.bar": true, "foo.baz": false } }, recursive: false)"#, + result: Ok( + r#"{ "flattened": { "parent": { "foo.bar": true, "foo.baz": false } } }"#, + ), + }, + Example { + title: "with custom separator", + source: r#"unflatten({ "foo_bar": true }, "_")"#, + result: Ok(r#"{"foo": { "bar": true }}"#), + }, + ] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let separator = arguments + .optional("separator") + .unwrap_or_else(|| expr!(DEFAULT_SEPARATOR)); + let recursive = arguments + .optional("recursive") + .unwrap_or_else(|| expr!(true)); + + Ok(UnflattenFn { + value, + separator, + recursive, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct UnflattenFn { + value: Box, + separator: Box, + recursive: Box, +} + +impl FunctionExpression for UnflattenFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let separator = self.separator.resolve(ctx)?; + let recursive = self.recursive.resolve(ctx)?; + + unflatten(value, separator, recursive) + } + + fn type_def(&self, _: &TypeState) -> TypeDef { + TypeDef::object(Collection::any()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::value; + + test_function![ + unflatten => Unflatten; + + map { + args: func_args![value: value!({parent: "child"})], + want: Ok(value!({parent: "child"})), + tdef: TypeDef::object(Collection::any()), + } + + nested_map { + args: func_args![value: value!({"parent.child1": 1, "parent.child2": 2, key: "val"})], + want: Ok(value!({parent: {child1: 1, child2: 2}, key: "val"})), + tdef: TypeDef::object(Collection::any()), + } + + nested_map_with_separator { + args: func_args![value: value!({"parent_child1": 1, "parent_child2": 2, key: "val"}), separator: "_"], + want: Ok(value!({parent: {child1: 1, child2: 2}, key: "val"})), + tdef: TypeDef::object(Collection::any()), + } + + double_nested_map { + args: func_args![value: value!({ + "parent.child1": 1, + "parent.child2.grandchild1": 1, + "parent.child2.grandchild2": 2, + key: "val", + })], + want: Ok(value!({ + parent: { + child1: 1, + child2: { grandchild1: 1, grandchild2: 2 }, + }, + key: "val", + })), + tdef: TypeDef::object(Collection::any()), + } + + // Not only keys at first level are unflattened + double_inner_nested_map_not_recursive { + args: func_args![value: value!({ + "parent.children": {"child1":1, "child2.grandchild1": 1, "child2.grandchild2": 2 }, + key: "val", + }), recursive: false], + want: Ok(value!({ + parent: { + children: {child1: 1, "child2.grandchild1": 1, "child2.grandchild2": 2 } + }, + key: "val", + })), + tdef: TypeDef::object(Collection::any()), + } + + // Not only keys at first level are unflattened + double_inner_nested_map_recursive { + args: func_args![value: value!({ + "parent.children": {child1:1, "child2.grandchild1": 1, "child2.grandchild2": 2 }, + key: "val", + })], + want: Ok(value!({ + parent: { + children: { + child1: 1, + child2: { grandchild1: 1, grandchild2: 2 }, + }, + }, + key: "val", + })), + tdef: TypeDef::object(Collection::any()), + } + + map_and_array { + args: func_args![value: value!({ + "parent.child1": [1, [2, 3]], + "parent.child2.grandchild1": 1, + "parent.child2.grandchild2": [1, [2, 3], 4], + key: "val", + })], + want: Ok(value!({ + parent: { + child1: [1, [2, 3]], + child2: {grandchild1: 1, grandchild2: [1, [2, 3], 4]}, + }, + key: "val", + })), + tdef: TypeDef::object(Collection::any()), + } + + map_and_array_with_separator { + args: func_args![value: value!({ + "parent_child1": [1, [2, 3]], + "parent_child2_grandchild1": 1, + "parent_child2_grandchild2": [1, [2, 3], 4], + key: "val", + }), separator: "_"], + want: Ok(value!({ + parent: { + child1: [1, [2, 3]], + child2: {grandchild1: 1, grandchild2: [1, [2, 3], 4]}, + }, + key: "val", + })), + tdef: TypeDef::object(Collection::any()), + } + + // Objects inside arrays are not unflattened + objects_inside_arrays { + args: func_args![value: value!({ + "parent": [{"child1":1},{"child2.grandchild1": 1, "child2.grandchild2": 2 }], + key: "val", + })], + want: Ok(value!({ + "parent": [{"child1":1},{"child2.grandchild1": 1, "child2.grandchild2": 2 }], + key: "val", + })), + tdef: TypeDef::object(Collection::any()), + } + + triple_nested_map { + args: func_args![value: value!({ + "parent1.child1.grandchild1": 1, + "parent1.child2.grandchild2": 2, + "parent1.child2.grandchild3": 3, + parent2: 4, + })], + want: Ok(value!({ + parent1: { + child1: { grandchild1: 1 }, + child2: { grandchild2: 2, grandchild3: 3 }, + }, + parent2: 4, + })), + tdef: TypeDef::object(Collection::any()), + } + + single_very_nested_map{ + args: func_args![value: value!({ + "a.b.c.d.e.f.g": 1, + })], + want: Ok(value!({ + a: { + b: { + c: { + d: { + e: { + f: { + g: 1, + }, + }, + }, + }, + }, + }, + })), + tdef: TypeDef::object(Collection::any()), + } + + consecutive_separators { + args: func_args![value: value!({ + "a..b": 1, + "a...c": 2, + })], + want: Ok(value!({ + a: { + "": { + b: 1, + "": { + c: 2, + }, + }, + }, + })), + tdef: TypeDef::object(Collection::any()), + } + + traling_separator{ + args: func_args![value: value!({ + "a.": 1, + })], + want: Ok(value!({ + a: { + "": 1, + }, + })), + tdef: TypeDef::object(Collection::any()), + } + + consecutive_trailing_separator{ + args: func_args![value: value!({ + "a..": 1, + })], + want: Ok(value!({ + a: { + "": { + "": 1, + } + }, + })), + tdef: TypeDef::object(Collection::any()), + } + + filter_out_top_level_value_when_multiple_values { + args: func_args![value: value!({ + "a.b": 1, + "a": 2, + })], + want: Ok(value!({ + a: { b: 1 }, + })), + tdef: TypeDef::object(Collection::any()), + } + ]; +}