From 499bb74f0fed1fb08c38061d12bde5ca1ef0a9a5 Mon Sep 17 00:00:00 2001 From: TengJianPing <18241664+jacktengg@users.noreply.github.com> Date: Fri, 29 Dec 2023 01:01:33 +0800 Subject: [PATCH] [fix](decimalv2) fix decimalv2 agg errors (#29189) --- be/src/util/string_parser.hpp | 34 +++++----- be/src/vec/core/decimal_comparison.h | 2 +- be/src/vec/data_types/data_type_decimal.cpp | 23 +++++-- be/src/vec/data_types/data_type_decimal.h | 25 +------- be/src/vec/sink/vtablet_sink.cpp | 2 +- .../decimalv2/test_decimalv2_agg.out | 19 ++++++ .../decimalv2/test_decimalv2_load.out | 4 ++ .../data/datatype_p0/decimalv3/test_load.out | 4 ++ .../decimalv2/test_decimalv2_agg.groovy | 64 +++++++++++++++++++ .../decimalv2/test_decimalv2_load.groovy | 58 +++++++++++++++++ .../datatype_p0/decimalv3/test_load.groovy | 47 ++++++++++++++ 11 files changed, 233 insertions(+), 49 deletions(-) create mode 100644 regression-test/data/datatype_p0/decimalv2/test_decimalv2_agg.out create mode 100644 regression-test/suites/datatype_p0/decimalv2/test_decimalv2_agg.groovy diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp index b928d14057e95c..e531be32d18fb7 100644 --- a/be/src/util/string_parser.hpp +++ b/be/src/util/string_parser.hpp @@ -39,6 +39,7 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "runtime/large_int_value.h" +#include "runtime/primitive_type.h" #include "vec/common/int_exp.h" #include "vec/data_types/data_type_decimal.h" @@ -158,7 +159,8 @@ class StringParser { return string_to_bool_internal(s + i, len - i, result); } - template + template ::ColumnType::value_type> static inline T string_to_decimal(const char* s, int len, int type_precision, int type_scale, ParseResult* result); @@ -568,7 +570,7 @@ inline int StringParser::StringParseTraits<__int128>::max_ascii_len() { return 39; } -template +template T StringParser::string_to_decimal(const char* s, int len, int type_precision, int type_scale, ParseResult* result) { static_assert( @@ -646,10 +648,9 @@ T StringParser::string_to_decimal(const char* s, int len, int type_precision, in value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... } else { *result = StringParser::PARSE_OVERFLOW; - value = is_negative ? vectorized::min_decimal_value>( - type_precision) - : vectorized::max_decimal_value>( - type_precision); + value = is_negative + ? vectorized::min_decimal_value(type_precision) + : vectorized::max_decimal_value(type_precision); return value; } DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. @@ -696,10 +697,9 @@ T StringParser::string_to_decimal(const char* s, int len, int type_precision, in cur_digit = precision - scale; } else if (!found_dot && max_digit < (precision - scale)) { *result = StringParser::PARSE_OVERFLOW; - value = is_negative ? vectorized::min_decimal_value>( - type_precision) - : vectorized::max_decimal_value>( - type_precision); + value = is_negative + ? vectorized::min_decimal_value(type_precision) + : vectorized::max_decimal_value(type_precision); return value; } else if (found_dot && scale >= type_scale && !has_round) { // make rounding cases @@ -739,11 +739,10 @@ T StringParser::string_to_decimal(const char* s, int len, int type_precision, in if (!is_numeric_ascii(c)) { if (cur_digit > type_precision) { *result = StringParser::PARSE_OVERFLOW; - value = is_negative - ? vectorized::min_decimal_value>( - type_precision) - : vectorized::max_decimal_value>( - type_precision); + value = is_negative ? vectorized::min_decimal_value( + type_precision) + : vectorized::max_decimal_value( + type_precision); return value; } return is_negative ? T(-value) : T(value); @@ -782,9 +781,8 @@ T StringParser::string_to_decimal(const char* s, int len, int type_precision, in *result = StringParser::PARSE_OVERFLOW; if constexpr (TYPE_DECIMALV2 != P) { // decimalv3 overflow will return max min value for type precision - value = is_negative - ? vectorized::min_decimal_value>(type_precision) - : vectorized::max_decimal_value>(type_precision); + value = is_negative ? vectorized::min_decimal_value(type_precision) + : vectorized::max_decimal_value(type_precision); return value; } } else if (UNLIKELY(scale > type_scale)) { diff --git a/be/src/vec/core/decimal_comparison.h b/be/src/vec/core/decimal_comparison.h index 68a083dc159e3e..81f749f7e9930b 100644 --- a/be/src/vec/core/decimal_comparison.h +++ b/be/src/vec/core/decimal_comparison.h @@ -99,7 +99,7 @@ class DecimalComparison { } static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b) { - static const UInt32 max_scale = max_decimal_precision(); + static const UInt32 max_scale = max_decimal_precision(); if (scale_a > max_scale || scale_b > max_scale) { LOG(FATAL) << "Bad scale of decimal field"; } diff --git a/be/src/vec/data_types/data_type_decimal.cpp b/be/src/vec/data_types/data_type_decimal.cpp index eab6a345358a7f..6d0e694f4ab6fd 100644 --- a/be/src/vec/data_types/data_type_decimal.cpp +++ b/be/src/vec/data_types/data_type_decimal.cpp @@ -166,11 +166,12 @@ bool DataTypeDecimal::parse_from_string(const std::string& str, T* res) const } DataTypePtr create_decimal(UInt64 precision_value, UInt64 scale_value, bool use_v2) { - if (precision_value < min_decimal_precision() || - precision_value > max_decimal_precision()) { + auto max_precision = + use_v2 ? max_decimal_precision() : max_decimal_precision(); + if (precision_value < min_decimal_precision() || precision_value > max_precision) { throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Wrong precision {}, min: {}, max: {}", precision_value, - min_decimal_precision(), max_decimal_precision()); + min_decimal_precision(), max_precision); } if (static_cast(scale_value) > precision_value) { @@ -228,10 +229,16 @@ Int64 max_decimal_value(UInt32 precision) { } template <> Int128 max_decimal_value(UInt32 precision) { + return DecimalV2Value::get_max_decimal().value() / + DataTypeDecimal::get_scale_multiplier( + (UInt64)max_decimal_precision() - precision); +} +template <> +Int128 max_decimal_value(UInt32 precision) { return (static_cast(999999999999999999ll) * 100000000000000000ll * 1000ll + static_cast(99999999999999999ll) * 1000ll + 999ll) / DataTypeDecimal::get_scale_multiplier( - (UInt64)max_decimal_precision() - precision); + (UInt64)max_decimal_precision() - precision); } template @@ -249,9 +256,15 @@ Int64 min_decimal_value(UInt32 precision) { (UInt64)max_decimal_precision() - precision); } template <> -Int128 min_decimal_value(UInt32 precision) { +Int128 min_decimal_value(UInt32 precision) { return -(static_cast(999999999999999999ll) * 100000000000000000ll * 1000ll + static_cast(99999999999999999ll) * 1000ll + 999ll) / + DataTypeDecimal::get_scale_multiplier( + (UInt64)max_decimal_precision() - precision); +} +template <> +Int128 min_decimal_value(UInt32 precision) { + return DecimalV2Value::get_min_decimal().value() / DataTypeDecimal::get_scale_multiplier( (UInt64)max_decimal_precision() - precision); } diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h index 211fa58924212b..008060014b909a 100644 --- a/be/src/vec/data_types/data_type_decimal.h +++ b/be/src/vec/data_types/data_type_decimal.h @@ -85,36 +85,13 @@ constexpr size_t max_decimal_precision() { } template <> constexpr size_t max_decimal_precision() { - return 38; + return 27; } template <> constexpr size_t max_decimal_precision() { return 38; } -template -constexpr typename T::NativeType max_decimal_value() { - return 0; -} -template <> -constexpr Int32 max_decimal_value() { - return 999999999; -} -template <> -constexpr Int64 max_decimal_value() { - return 999999999999999999; -} -template <> -constexpr Int128 max_decimal_value() { - return static_cast(999999999999999999ll) * 100000000000000000ll * 1000ll + - static_cast(99999999999999999ll) * 1000ll + 999ll; -} -template <> -constexpr Int128 max_decimal_value() { - return static_cast(999999999999999999ll) * 100000000000000000ll * 1000ll + - static_cast(99999999999999999ll) * 1000ll + 999ll; -} - DataTypePtr create_decimal(UInt64 precision, UInt64 scale, bool use_v2); inline UInt32 least_decimal_precision_for(TypeIndex int_type) { diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index 161a6572b9d2bd..be940b51aa9b2b 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -1900,7 +1900,7 @@ Status VOlapTableSink::_validate_column(RuntimeState* state, const TypeDescripto break; } case TYPE_DECIMAL128I: { - CHECK_VALIDATION_FOR_DECIMALV3(Decimal128I, Decimal128); + CHECK_VALIDATION_FOR_DECIMALV3(Decimal128I, Decimal128I); break; } case TYPE_ARRAY: { diff --git a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_agg.out b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_agg.out new file mode 100644 index 00000000000000..19e5ec004b6af0 --- /dev/null +++ b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_agg.out @@ -0,0 +1,19 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !decimalv2_min -- +-123456789012345678.901234567 + +-- !decimalv2_max -- +123456789012345678.901234567 + +-- !decimalv2_count -- +3 + +-- !decimalv2_sum -- +0.012345679 + +-- !decimalv2_avg -- +0.004115226 + +-- !decimalv2_sum2 -- +1000000000000000000.000000000 + diff --git a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out index 8156a9144aa4c2..a0f046fc4e954a 100644 --- a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out +++ b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out @@ -15,3 +15,7 @@ 11.99990 837.43444 +-- !decimalv2_insert -- +999999999999999999.999999999 1.000000000 +-999999999999999999.999999999 2.000000000 + diff --git a/regression-test/data/datatype_p0/decimalv3/test_load.out b/regression-test/data/datatype_p0/decimalv3/test_load.out index 35c355781e68d9..6907d61a12af83 100644 --- a/regression-test/data/datatype_p0/decimalv3/test_load.out +++ b/regression-test/data/datatype_p0/decimalv3/test_load.out @@ -4,3 +4,7 @@ 0.000135039891190100 0.000390160098269360 +-- !decimalv3_insert -- +-99999999999999999999999999999999.999999 4.000000 +99999999999999999999999999999999.999999 3.000000 + diff --git a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_agg.groovy b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_agg.groovy new file mode 100644 index 00000000000000..36fc347c0c4607 --- /dev/null +++ b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_agg.groovy @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_decimalv2_agg", "nonConcurrent") { + + sql """ + admin set frontend config("enable_decimal_conversion" = "false"); + """ + + sql """ + drop table if exists test_decimalv2_agg; + """ + sql """ + create table test_decimalv2_agg ( + k1 decimalv2(27,9) + )distributed by hash(k1) + properties("replication_num"="1"); + """ + sql """ + insert into test_decimalv2_agg values + (123456789012345678.901234567), + (-123456789012345678.901234567), + (0.012345679), + (NULL); + """ + qt_decimalv2_min " select min(k1) from test_decimalv2_agg; " + qt_decimalv2_max " select max(k1) from test_decimalv2_agg; " + qt_decimalv2_count " select count(k1) from test_decimalv2_agg; " + qt_decimalv2_sum " select sum(k1) from test_decimalv2_agg; " + qt_decimalv2_avg " select avg(k1) from test_decimalv2_agg; " + + // test overflow + sql """ + drop table if exists test_decimalv2_agg; + """ + sql """ + create table test_decimalv2_agg ( + k1 decimalv2(27,9) + )distributed by hash(k1) + properties("replication_num"="1"); + """ + sql """ + insert into test_decimalv2_agg values + (999999999999999999.999999999), + (0.000000001), + (NULL); + """ + qt_decimalv2_sum2 " select sum(k1) from test_decimalv2_agg; " + +} \ No newline at end of file diff --git a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy index 5c065a921a0b29..c8f156a3bf92c3 100644 --- a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy +++ b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy @@ -84,6 +84,64 @@ suite("test_decimalv2_load", "nonConcurrent") { select * from ${tableName2} order by 1; """ + sql """ + drop table if exists test_decimalv2_insert; + """ + sql """ + CREATE TABLE `test_decimalv2_insert` ( + `k1` decimalv2(27, 9) null, + `k2` decimalv2(27, 9) null + ) + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql "set enable_insert_strict=true;" + test { + sql """ + insert into test_decimalv2_insert values("999999999999999999999999999999",1); + """ + exception "Invalid" + } + test { + sql """ + insert into test_decimalv2_insert values("-999999999999999999999999999999",2); + """ + exception "Invalid" + } + test { + sql """ + insert into test_decimalv2_insert values("999999999999999999.9999999991", 3); + """ + exception "Invalid" + } + test { + sql """ + insert into test_decimalv2_insert values("-999999999999999999.9999999991",4); + """ + exception "Invalid" + } + test { + sql """ + insert into test_decimalv2_insert values("999999999999999999.9999999995",5); + """ + exception "Invalid" + } + test { + sql """ + insert into test_decimalv2_insert values("-999999999999999999.9999999995",6); + """ + exception "Invalid" + } + sql """ + insert into test_decimalv2_insert values("999999999999999999.999999999", 1); + """ + sql """ + insert into test_decimalv2_insert values("-999999999999999999.999999999", 2); + """ + qt_decimalv2_insert "select * from test_decimalv2_insert order by 2; " + sql """ admin set frontend config("enable_decimal_conversion" = "true"); """ diff --git a/regression-test/suites/datatype_p0/decimalv3/test_load.groovy b/regression-test/suites/datatype_p0/decimalv3/test_load.groovy index c2651b3c908a6d..b6c9e1dcbfdf9d 100644 --- a/regression-test/suites/datatype_p0/decimalv3/test_load.groovy +++ b/regression-test/suites/datatype_p0/decimalv3/test_load.groovy @@ -57,4 +57,51 @@ suite("test_load") { } finally { try_sql("DROP TABLE IF EXISTS ${tableName}") } + + sql """ + drop table if exists test_decimalv3_insert; + """ + sql """ + CREATE TABLE `test_decimalv3_insert` ( + `k1` decimalv3(38, 6) null, + `k2` decimalv3(38, 6) null + ) + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql "set enable_insert_strict=true;" + test { + sql """ + insert into test_decimalv3_insert values("9999999999999999999999999999999999999999",1); + """ + exception "Invalid" + } + test { + sql """ + insert into test_decimalv3_insert values("-9999999999999999999999999999999999999999",2); + """ + exception "Invalid" + } + sql """ + insert into test_decimalv3_insert values("99999999999999999999999999999999.9999991",3); + """ + sql """ + insert into test_decimalv3_insert values("-99999999999999999999999999999999.9999991",4); + """ + + test { + sql """ + insert into test_decimalv3_insert values("99999999999999999999999999999999.9999999",5); + """ + exception "error" + } + test { + sql """ + insert into test_decimalv3_insert values("-99999999999999999999999999999999.9999999",6); + """ + exception "error" + } + qt_decimalv3_insert "select * from test_decimalv3_insert order by 1;" }