From 06d58d0fc1ec2b23c5030ef3d97a59b794bb4a15 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Tue, 22 Oct 2024 23:23:46 +0800 Subject: [PATCH 1/8] add json_path_match udf --- src/common/function/src/scalars/json.rs | 2 + .../src/scalars/json/json_path_match.rs | 184 ++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 src/common/function/src/scalars/json/json_path_match.rs diff --git a/src/common/function/src/scalars/json.rs b/src/common/function/src/scalars/json.rs index 133eb145a715..2c420c1661e1 100644 --- a/src/common/function/src/scalars/json.rs +++ b/src/common/function/src/scalars/json.rs @@ -16,6 +16,7 @@ use std::sync::Arc; mod json_get; mod json_is; mod json_path_exists; +mod json_path_match; mod json_to_string; mod parse_json; @@ -49,5 +50,6 @@ impl JsonFunction { registry.register(Arc::new(JsonIsObject)); registry.register(Arc::new(json_path_exists::JsonPathExistsFunction)); + registry.register(Arc::new(json_path_match::JsonPathMatchFunction)); } } diff --git a/src/common/function/src/scalars/json/json_path_match.rs b/src/common/function/src/scalars/json/json_path_match.rs new file mode 100644 index 000000000000..ff80004cae4f --- /dev/null +++ b/src/common/function/src/scalars/json/json_path_match.rs @@ -0,0 +1,184 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::{self, Display}; + +use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu}; +use common_query::prelude::Signature; +use datafusion::logical_expr::Volatility; +use datatypes::data_type::ConcreteDataType; +use datatypes::prelude::VectorRef; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{BooleanVectorBuilder, MutableVector}; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; + +/// Check if the given JSON data contains the given JSON path. +#[derive(Clone, Debug, Default)] +pub struct JsonPathMatchFunction; + +const NAME: &str = "json_path_match"; + +impl Function for JsonPathMatchFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::boolean_datatype()) + } + + fn signature(&self) -> Signature { + Signature::exact( + vec![ + ConcreteDataType::json_datatype(), + ConcreteDataType::string_datatype(), + ], + Volatility::Immutable, + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly two, have: {}", + columns.len() + ), + } + ); + let jsons = &columns[0]; + let paths = &columns[1]; + + let size = jsons.len(); + let datatype = jsons.data_type(); + let mut results = BooleanVectorBuilder::with_capacity(size); + + match datatype { + // JSON data type uses binary vector + ConcreteDataType::Binary(_) => { + for i in 0..size { + let json = jsons.get_ref(i); + let path = paths.get_ref(i); + + let json = json.as_binary(); + let path = path.as_string(); + let result = match (json, path) { + (Ok(Some(json)), Ok(Some(path))) => { + let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes()); + match json_path { + Ok(json_path) => jsonb::path_match(json, json_path).ok(), + Err(_) => None, + } + } + _ => None, + }; + + results.push(result); + } + } + _ => { + return UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + } + .fail(); + } + } + + Ok(results.to_vector()) + } +} + +impl Display for JsonPathMatchFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "JSON_PATH_MATCH") + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use common_query::prelude::TypeSignature; + use datatypes::vectors::{BinaryVector, StringVector}; + + use super::*; + + #[test] + fn test_json_path_match_function() { + let json_path_match = JsonPathMatchFunction; + + assert_eq!("json_path_match", json_path_match.name()); + assert_eq!( + ConcreteDataType::boolean_datatype(), + json_path_match + .return_type(&[ConcreteDataType::json_datatype()]) + .unwrap() + ); + + assert!(matches!(json_path_match.signature(), + Signature { + type_signature: TypeSignature::Exact(valid_types), + volatility: Volatility::Immutable + } if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()] + )); + + let json_strings = [ + Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()), + Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()), + Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()), + Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()), + ]; + + let paths = vec![ + Some("$.a.b == 2".to_string()), + Some("$.b[1 to last] >= 2".to_string()), + Some("$.c > 0".to_string()), + None, + ]; + + let results = vec![Some(true), Some(true), Some(false), None]; + + let jsonbs = json_strings + .into_iter() + .map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec())) + .collect::>(); + + let json_vector = BinaryVector::from(jsonbs); + let path_vector = StringVector::from(paths); + let args: Vec = vec![Arc::new(json_vector), Arc::new(path_vector)]; + let vector = json_path_match + .eval(FunctionContext::default(), &args) + .unwrap(); + + assert_eq!(4, vector.len()); + for (i, expected) in results.iter().enumerate() { + let result = vector.get_ref(i); + + match expected { + Some(expected_value) => { + assert!(!result.is_null()); + let result_value = result.as_boolean().unwrap().unwrap(); + assert_eq!(*expected_value, result_value); + } + None => { + assert!(result.is_null()); + } + } + } + } +} From 0f752717db51afd8d2405e88ef834e55faaa6385 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Tue, 22 Oct 2024 23:24:09 +0800 Subject: [PATCH 2/8] sql tests for json_path_match --- .../common/function/json/json.result | 41 +++++++++++++++++++ .../standalone/common/function/json/json.sql | 15 +++++++ 2 files changed, 56 insertions(+) diff --git a/tests/cases/standalone/common/function/json/json.result b/tests/cases/standalone/common/function/json/json.result index bae33135488b..70737991c126 100644 --- a/tests/cases/standalone/common/function/json/json.result +++ b/tests/cases/standalone/common/function/json/json.result @@ -31,3 +31,44 @@ SELECT json_path_exists(parse_json('[1, 2]'), '[2]'); | false | +----------------------------------------------------------+ +SELECT json_path_exists(parse_json('[1, 2]'), NULL); + ++---------------------------------------------------+ +| json_path_exists(parse_json(Utf8("[1, 2]")),NULL) | ++---------------------------------------------------+ +| | ++---------------------------------------------------+ + +--- json_path_match --- +SELECT json_path_match(parse_json('{"a": 1, "b": 2}'), '$.a == 1'); + ++------------------------------------------------------------------------+ +| json_path_match(parse_json(Utf8("{"a": 1, "b": 2}")),Utf8("$.a == 1")) | ++------------------------------------------------------------------------+ +| true | ++------------------------------------------------------------------------+ + +SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[0] > 1'); + ++-----------------------------------------------------------------------------+ +| json_path_match(parse_json(Utf8("{"a":1,"b":[1,2,3]}")),Utf8("$.b[0] > 1")) | ++-----------------------------------------------------------------------------+ +| false | ++-----------------------------------------------------------------------------+ + +SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[1 to last] >= 2'); + ++--------------------------------------------------------------------------------------+ +| json_path_match(parse_json(Utf8("{"a":1,"b":[1,2,3]}")),Utf8("$.b[1 to last] >= 2")) | ++--------------------------------------------------------------------------------------+ +| true | ++--------------------------------------------------------------------------------------+ + +SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), NULL); + ++---------------------------------------------------------------+ +| json_path_match(parse_json(Utf8("{"a":1,"b":[1,2,3]}")),NULL) | ++---------------------------------------------------------------+ +| | ++---------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/json/json.sql b/tests/cases/standalone/common/function/json/json.sql index 76914e257571..25b844583563 100644 --- a/tests/cases/standalone/common/function/json/json.sql +++ b/tests/cases/standalone/common/function/json/json.sql @@ -6,3 +6,18 @@ SELECT json_path_exists(parse_json('{"a": 1, "b": 2}'), '$.c'); SELECT json_path_exists(parse_json('[1, 2]'), '[0]'); SELECT json_path_exists(parse_json('[1, 2]'), '[2]'); + +SELECT json_path_exists(parse_json('[1, 2]'), NULL); + + +--- json_path_match --- + +SELECT json_path_match(parse_json('{"a": 1, "b": 2}'), '$.a == 1'); + +SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[0] > 1'); + +SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[1 to last] >= 2'); + +SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), NULL); + +-- SELECT JSON_PATH_MATCH(NULL, '$.a == 1'); From 7d6fa26cf7f5a8167749049a944a5027f997ebe8 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Tue, 22 Oct 2024 23:43:54 +0800 Subject: [PATCH 3/8] fix clippy & comment --- src/common/function/src/scalars/json/json_path_match.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/function/src/scalars/json/json_path_match.rs b/src/common/function/src/scalars/json/json_path_match.rs index ff80004cae4f..6b526e656c2d 100644 --- a/src/common/function/src/scalars/json/json_path_match.rs +++ b/src/common/function/src/scalars/json/json_path_match.rs @@ -25,7 +25,7 @@ use snafu::ensure; use crate::function::{Function, FunctionContext}; -/// Check if the given JSON data contains the given JSON path. +/// Check if the given JSON data match the given JSON path's predicate. #[derive(Clone, Debug, Default)] pub struct JsonPathMatchFunction; @@ -151,7 +151,7 @@ mod tests { None, ]; - let results = vec![Some(true), Some(true), Some(false), None]; + let results = [Some(true), Some(true), Some(false), None]; let jsonbs = json_strings .into_iter() From 5c62471edf88fd04d2bd23207af69101aea88760 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Wed, 23 Oct 2024 00:47:20 +0800 Subject: [PATCH 4/8] fix null value behavior --- .../src/scalars/json/json_path_match.rs | 80 ++++++++++++++----- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/src/common/function/src/scalars/json/json_path_match.rs b/src/common/function/src/scalars/json/json_path_match.rs index 6b526e656c2d..151874fb43fe 100644 --- a/src/common/function/src/scalars/json/json_path_match.rs +++ b/src/common/function/src/scalars/json/json_path_match.rs @@ -15,7 +15,7 @@ use std::fmt::{self, Display}; use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu}; -use common_query::prelude::Signature; +use common_query::prelude::{Signature, TypeSignature}; use datafusion::logical_expr::Volatility; use datatypes::data_type::ConcreteDataType; use datatypes::prelude::VectorRef; @@ -41,10 +41,24 @@ impl Function for JsonPathMatchFunction { } fn signature(&self) -> Signature { - Signature::exact( + Signature::one_of( vec![ - ConcreteDataType::json_datatype(), - ConcreteDataType::string_datatype(), + TypeSignature::Exact(vec![ + ConcreteDataType::json_datatype(), + ConcreteDataType::string_datatype(), + ]), + TypeSignature::Exact(vec![ + ConcreteDataType::null_datatype(), + ConcreteDataType::string_datatype(), + ]), + TypeSignature::Exact(vec![ + ConcreteDataType::json_datatype(), + ConcreteDataType::null_datatype(), + ]), + TypeSignature::Exact(vec![ + ConcreteDataType::null_datatype(), + ConcreteDataType::null_datatype(), + ]), ], Volatility::Immutable, ) @@ -64,16 +78,15 @@ impl Function for JsonPathMatchFunction { let paths = &columns[1]; let size = jsons.len(); - let datatype = jsons.data_type(); let mut results = BooleanVectorBuilder::with_capacity(size); - match datatype { - // JSON data type uses binary vector - ConcreteDataType::Binary(_) => { - for i in 0..size { - let json = jsons.get_ref(i); - let path = paths.get_ref(i); + for i in 0..size { + let json = jsons.get_ref(i); + let path = paths.get_ref(i); + match json.data_type() { + // JSON data type uses binary vector + ConcreteDataType::Binary(_) => { let json = json.as_binary(); let path = path.as_string(); let result = match (json, path) { @@ -89,13 +102,16 @@ impl Function for JsonPathMatchFunction { results.push(result); } - } - _ => { - return UnsupportedInputDataTypeSnafu { - function: NAME, - datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + + ConcreteDataType::Null(_) => results.push(None), + + _ => { + return UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + } + .fail(); } - .fail(); } } @@ -132,9 +148,27 @@ mod tests { assert!(matches!(json_path_match.signature(), Signature { - type_signature: TypeSignature::Exact(valid_types), + type_signature: TypeSignature::OneOf(valid_types), volatility: Volatility::Immutable - } if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()] + } if + valid_types == vec![ + TypeSignature::Exact(vec![ + ConcreteDataType::json_datatype(), + ConcreteDataType::string_datatype(), + ]), + TypeSignature::Exact(vec![ + ConcreteDataType::null_datatype(), + ConcreteDataType::string_datatype(), + ]), + TypeSignature::Exact(vec![ + ConcreteDataType::json_datatype(), + ConcreteDataType::null_datatype(), + ]), + TypeSignature::Exact(vec![ + ConcreteDataType::null_datatype(), + ConcreteDataType::null_datatype(), + ]) + ] )); let json_strings = [ @@ -142,6 +176,8 @@ mod tests { Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()), Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()), Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()), + None, + None, ]; let paths = vec![ @@ -149,9 +185,11 @@ mod tests { Some("$.b[1 to last] >= 2".to_string()), Some("$.c > 0".to_string()), None, + Some("$.c > 0".to_string()), + None, ]; - let results = [Some(true), Some(true), Some(false), None]; + let results = [Some(true), Some(true), Some(false), None, None, None]; let jsonbs = json_strings .into_iter() @@ -165,7 +203,7 @@ mod tests { .eval(FunctionContext::default(), &args) .unwrap(); - assert_eq!(4, vector.len()); + assert_eq!(6, vector.len()); for (i, expected) in results.iter().enumerate() { let result = vector.get_ref(i); From 5ad2440569be139af9f2b5e367f2e062742ca227 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Wed, 23 Oct 2024 00:58:09 +0800 Subject: [PATCH 5/8] added null tests --- tests/cases/standalone/common/function/json/json.result | 9 +++++++++ tests/cases/standalone/common/function/json/json.sql | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/cases/standalone/common/function/json/json.result b/tests/cases/standalone/common/function/json/json.result index 70737991c126..1e1d6ad1ea2a 100644 --- a/tests/cases/standalone/common/function/json/json.result +++ b/tests/cases/standalone/common/function/json/json.result @@ -39,6 +39,7 @@ SELECT json_path_exists(parse_json('[1, 2]'), NULL); | | +---------------------------------------------------+ +-- SELECT json_path_exists(NULL, '$.a'); --- json_path_match --- SELECT json_path_match(parse_json('{"a": 1, "b": 2}'), '$.a == 1'); @@ -72,3 +73,11 @@ SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), NULL); | | +---------------------------------------------------------------+ +SELECT json_path_match(NULL, '$.a == 1'); + ++----------------------------------------+ +| json_path_match(NULL,Utf8("$.a == 1")) | ++----------------------------------------+ +| | ++----------------------------------------+ + diff --git a/tests/cases/standalone/common/function/json/json.sql b/tests/cases/standalone/common/function/json/json.sql index 25b844583563..d7fcfcb44e94 100644 --- a/tests/cases/standalone/common/function/json/json.sql +++ b/tests/cases/standalone/common/function/json/json.sql @@ -9,6 +9,7 @@ SELECT json_path_exists(parse_json('[1, 2]'), '[2]'); SELECT json_path_exists(parse_json('[1, 2]'), NULL); +-- SELECT json_path_exists(NULL, '$.a'); --- json_path_match --- @@ -20,4 +21,4 @@ SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[1 to last] >= 2') SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), NULL); --- SELECT JSON_PATH_MATCH(NULL, '$.a == 1'); +SELECT json_path_match(NULL, '$.a == 1'); From bbc92d9834757bf9c1f2ee37d69b55b35d982e6c Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:52:16 +0800 Subject: [PATCH 6/8] adjust function's behavior on nulls --- .../src/scalars/json/json_path_match.rs | 66 +++++-------------- 1 file changed, 18 insertions(+), 48 deletions(-) diff --git a/src/common/function/src/scalars/json/json_path_match.rs b/src/common/function/src/scalars/json/json_path_match.rs index 151874fb43fe..bdb2edfc86c1 100644 --- a/src/common/function/src/scalars/json/json_path_match.rs +++ b/src/common/function/src/scalars/json/json_path_match.rs @@ -15,7 +15,7 @@ use std::fmt::{self, Display}; use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu}; -use common_query::prelude::{Signature, TypeSignature}; +use common_query::prelude::Signature; use datafusion::logical_expr::Volatility; use datatypes::data_type::ConcreteDataType; use datatypes::prelude::VectorRef; @@ -41,24 +41,10 @@ impl Function for JsonPathMatchFunction { } fn signature(&self) -> Signature { - Signature::one_of( + Signature::exact( vec![ - TypeSignature::Exact(vec![ - ConcreteDataType::json_datatype(), - ConcreteDataType::string_datatype(), - ]), - TypeSignature::Exact(vec![ - ConcreteDataType::null_datatype(), - ConcreteDataType::string_datatype(), - ]), - TypeSignature::Exact(vec![ - ConcreteDataType::json_datatype(), - ConcreteDataType::null_datatype(), - ]), - TypeSignature::Exact(vec![ - ConcreteDataType::null_datatype(), - ConcreteDataType::null_datatype(), - ]), + ConcreteDataType::json_datatype(), + ConcreteDataType::string_datatype(), ], Volatility::Immutable, ) @@ -91,10 +77,14 @@ impl Function for JsonPathMatchFunction { let path = path.as_string(); let result = match (json, path) { (Ok(Some(json)), Ok(Some(path))) => { - let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes()); - match json_path { - Ok(json_path) => jsonb::path_match(json, json_path).ok(), - Err(_) => None, + if jsonb::is_object(json) { + let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes()); + match json_path { + Ok(json_path) => jsonb::path_match(json, json_path).ok(), + Err(_) => None, + } + } else { + None } } _ => None, @@ -103,8 +93,6 @@ impl Function for JsonPathMatchFunction { results.push(result); } - ConcreteDataType::Null(_) => results.push(None), - _ => { return UnsupportedInputDataTypeSnafu { function: NAME, @@ -148,27 +136,9 @@ mod tests { assert!(matches!(json_path_match.signature(), Signature { - type_signature: TypeSignature::OneOf(valid_types), + type_signature: TypeSignature::Exact(valid_types), volatility: Volatility::Immutable - } if - valid_types == vec![ - TypeSignature::Exact(vec![ - ConcreteDataType::json_datatype(), - ConcreteDataType::string_datatype(), - ]), - TypeSignature::Exact(vec![ - ConcreteDataType::null_datatype(), - ConcreteDataType::string_datatype(), - ]), - TypeSignature::Exact(vec![ - ConcreteDataType::json_datatype(), - ConcreteDataType::null_datatype(), - ]), - TypeSignature::Exact(vec![ - ConcreteDataType::null_datatype(), - ConcreteDataType::null_datatype(), - ]) - ] + } if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()], )); let json_strings = [ @@ -176,17 +146,17 @@ mod tests { Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()), Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()), Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()), - None, - None, + Some(r#"null"#.to_string()), + Some(r#"null"#.to_string()), ]; let paths = vec![ Some("$.a.b == 2".to_string()), Some("$.b[1 to last] >= 2".to_string()), Some("$.c > 0".to_string()), - None, + Some(r#"null"#.to_string()), Some("$.c > 0".to_string()), - None, + Some(r#"null"#.to_string()), ]; let results = [Some(true), Some(true), Some(false), None, None, None]; From 0c3c671d1879d6fe025b1809d5637e4669e94283 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:52:35 +0800 Subject: [PATCH 7/8] update test cases --- .../common/function/json/json.result | 45 +++++++++++-------- .../standalone/common/function/json/json.sql | 8 ++-- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/tests/cases/standalone/common/function/json/json.result b/tests/cases/standalone/common/function/json/json.result index 1e1d6ad1ea2a..42db0b263e30 100644 --- a/tests/cases/standalone/common/function/json/json.result +++ b/tests/cases/standalone/common/function/json/json.result @@ -31,15 +31,22 @@ SELECT json_path_exists(parse_json('[1, 2]'), '[2]'); | false | +----------------------------------------------------------+ -SELECT json_path_exists(parse_json('[1, 2]'), NULL); +SELECT json_path_exists(parse_json('[1, 2]'), 'null'); -+---------------------------------------------------+ -| json_path_exists(parse_json(Utf8("[1, 2]")),NULL) | -+---------------------------------------------------+ -| | -+---------------------------------------------------+ ++-----------------------------------------------------------+ +| json_path_exists(parse_json(Utf8("[1, 2]")),Utf8("null")) | ++-----------------------------------------------------------+ +| false | ++-----------------------------------------------------------+ + +SELECT json_path_exists(parse_json('null'), '$.a'); + ++--------------------------------------------------------+ +| json_path_exists(parse_json(Utf8("null")),Utf8("$.a")) | ++--------------------------------------------------------+ +| false | ++--------------------------------------------------------+ --- SELECT json_path_exists(NULL, '$.a'); --- json_path_match --- SELECT json_path_match(parse_json('{"a": 1, "b": 2}'), '$.a == 1'); @@ -65,19 +72,19 @@ SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[1 to last] >= 2') | true | +--------------------------------------------------------------------------------------+ -SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), NULL); +SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), 'null'); -+---------------------------------------------------------------+ -| json_path_match(parse_json(Utf8("{"a":1,"b":[1,2,3]}")),NULL) | -+---------------------------------------------------------------+ -| | -+---------------------------------------------------------------+ ++-----------------------------------------------------------------------+ +| json_path_match(parse_json(Utf8("{"a":1,"b":[1,2,3]}")),Utf8("null")) | ++-----------------------------------------------------------------------+ +| | ++-----------------------------------------------------------------------+ -SELECT json_path_match(NULL, '$.a == 1'); +SELECT json_path_match(parse_json('null'), '$.a == 1'); -+----------------------------------------+ -| json_path_match(NULL,Utf8("$.a == 1")) | -+----------------------------------------+ -| | -+----------------------------------------+ ++------------------------------------------------------------+ +| json_path_match(parse_json(Utf8("null")),Utf8("$.a == 1")) | ++------------------------------------------------------------+ +| | ++------------------------------------------------------------+ diff --git a/tests/cases/standalone/common/function/json/json.sql b/tests/cases/standalone/common/function/json/json.sql index d7fcfcb44e94..8980be33e310 100644 --- a/tests/cases/standalone/common/function/json/json.sql +++ b/tests/cases/standalone/common/function/json/json.sql @@ -7,9 +7,9 @@ SELECT json_path_exists(parse_json('[1, 2]'), '[0]'); SELECT json_path_exists(parse_json('[1, 2]'), '[2]'); -SELECT json_path_exists(parse_json('[1, 2]'), NULL); +SELECT json_path_exists(parse_json('[1, 2]'), 'null'); --- SELECT json_path_exists(NULL, '$.a'); +SELECT json_path_exists(parse_json('null'), '$.a'); --- json_path_match --- @@ -19,6 +19,6 @@ SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[0] > 1'); SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[1 to last] >= 2'); -SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), NULL); +SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), 'null'); -SELECT json_path_match(NULL, '$.a == 1'); +SELECT json_path_match(parse_json('null'), '$.a == 1'); From 49e4e05dc54ec7415525eaeec273cd9901599889 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:02:03 +0800 Subject: [PATCH 8/8] fix null check of json --- .../function/src/scalars/json/json_path_match.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/common/function/src/scalars/json/json_path_match.rs b/src/common/function/src/scalars/json/json_path_match.rs index bdb2edfc86c1..8ea1bf082b7f 100644 --- a/src/common/function/src/scalars/json/json_path_match.rs +++ b/src/common/function/src/scalars/json/json_path_match.rs @@ -77,7 +77,7 @@ impl Function for JsonPathMatchFunction { let path = path.as_string(); let result = match (json, path) { (Ok(Some(json)), Ok(Some(path))) => { - if jsonb::is_object(json) { + if !jsonb::is_null(json) { let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes()); match json_path { Ok(json_path) => jsonb::path_match(json, json_path).ok(), @@ -145,6 +145,7 @@ mod tests { Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()), Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()), Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()), + Some(r#"[1,2,3]"#.to_string()), Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()), Some(r#"null"#.to_string()), Some(r#"null"#.to_string()), @@ -154,12 +155,21 @@ mod tests { Some("$.a.b == 2".to_string()), Some("$.b[1 to last] >= 2".to_string()), Some("$.c > 0".to_string()), + Some("$[0 to last] > 0".to_string()), Some(r#"null"#.to_string()), Some("$.c > 0".to_string()), Some(r#"null"#.to_string()), ]; - let results = [Some(true), Some(true), Some(false), None, None, None]; + let results = [ + Some(true), + Some(true), + Some(false), + Some(true), + None, + None, + None, + ]; let jsonbs = json_strings .into_iter() @@ -173,7 +183,7 @@ mod tests { .eval(FunctionContext::default(), &args) .unwrap(); - assert_eq!(6, vector.len()); + assert_eq!(7, vector.len()); for (i, expected) in results.iter().enumerate() { let result = vector.get_ref(i);