Skip to content

Commit

Permalink
[fix](ES Catalog)Fix int parse error when querying by doc_values (apa…
Browse files Browse the repository at this point in the history
…che#40385)

When querying by doc_values, the result may be embrace by quotes, which
will lead the int parsing error.
  • Loading branch information
qidaye committed Sep 9, 2024
1 parent 2023eab commit 19ae695
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 20 deletions.
48 changes: 28 additions & 20 deletions be/src/exec/es/es_scroll_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,31 +383,39 @@ Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
return Status::OK();
}

if (pure_doc_value && col.IsArray() && !col.Empty()) {
RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
StringParser::ParseResult result;
std::string val = col_value.GetString();
// ES allows inserting numbers and characters containing decimals in numeric types.
// To parse these numbers in Doris, we remove the decimals here.
size_t pos = val.find('.');
if (pos != std::string::npos) {
val = val.substr(0, pos);
}
size_t len = val.length();
T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);

col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
return Status::OK();
};

if (pure_doc_value && col.IsArray() && !col.Empty()) {
if (col.IsNumber()) {
RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
return Status::OK();
} else {
RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
return parse_and_insert_data(col[0]);
}
}

RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);

StringParser::ParseResult result;
std::string val = col.GetString();
// ES allows inserting numbers and characters containing decimals in numeric types.
// To parse these numbers in Doris, we remove the decimals here.
size_t pos = val.find(".");
if (pos != std::string::npos) {
val = val.substr(0, pos);
}
size_t len = val.length();
T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
RETURN_ERROR_IF_PARSING_FAILED(result, col, type);

col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);

return Status::OK();
return parse_and_insert_data(col);
}

ScrollParser::ScrollParser(bool doc_value_mode) : _size(0), _line_index(0) {}
Expand Down
152 changes: 152 additions & 0 deletions regression-test/data/external_table_p0/es/test_es_query.out
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql11 --
2022-08-08T12:10:10
2022-08-09T12:10:10
2022-08-10T12:10:10
2022-08-11T12:10:10
2022-08-11T12:10:10

-- !sql20 --
["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}]

Expand Down Expand Up @@ -81,6 +88,13 @@ text_ignore_above_10
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql25 --
2022-08-08T12:10:10
2022-08-09T12:10:10
2022-08-10T12:10:10
2022-08-11T12:10:10
2022-08-11T12:10:10

-- !sql_5_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -182,6 +196,20 @@ text2
text3_4*5
text_ignore_above_10

-- !sql_5_25 --
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_5_26 --
2022-08-08T12:10:10
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_6_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -283,6 +311,20 @@ text2
text3_4*5
text_ignore_above_10

-- !sql_6_25 --
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_6_26 --
2022-08-08T12:10:10
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_7_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 2022-08-08T12:10:10 2022-08-08T20:10:10 12345

Expand Down Expand Up @@ -423,6 +465,23 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql_7_32 --
1659931810000
1660018210000
1660104610000
1660191010000
1660191010000

-- !sql_7_33 --
1659931810000
1659931810000
1660018210000
1660018210000
1660104610000
1660104610000
1660191010000
1660191010000

-- !sql_7_50 --
value1 value2

Expand Down Expand Up @@ -563,6 +622,23 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql_8_30 --
1659931810000
1660018210000
1660104610000
1660191010000
1660191010000

-- !sql_8_31 --
1659931810000
1659931810000
1660018210000
1660018210000
1660104610000
1660104610000
1660191010000
1660191010000

-- !sql01 --
["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] \N [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}]

Expand Down Expand Up @@ -618,6 +694,13 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql11 --
2022-08-08T12:10:10
2022-08-09T12:10:10
2022-08-10T12:10:10
2022-08-11T12:10:10
2022-08-11T12:10:10

-- !sql20 --
["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}]

Expand Down Expand Up @@ -645,6 +728,13 @@ text_ignore_above_10
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql25 --
2022-08-08T12:10:10
2022-08-09T12:10:10
2022-08-10T12:10:10
2022-08-11T12:10:10
2022-08-11T12:10:10

-- !sql_5_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -746,6 +836,20 @@ text2
text3_4*5
text_ignore_above_10

-- !sql_5_25 --
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_5_26 --
2022-08-08T12:10:10
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_6_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -847,6 +951,20 @@ text2
text3_4*5
text_ignore_above_10

-- !sql_6_25 --
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_6_26 --
2022-08-08T12:10:10
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_7_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 2022-08-08T12:10:10 2022-08-08T20:10:10 12345

Expand Down Expand Up @@ -987,6 +1105,23 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql_7_32 --
1659931810000
1660018210000
1660104610000
1660191010000
1660191010000

-- !sql_7_33 --
1659931810000
1659931810000
1660018210000
1660018210000
1660104610000
1660104610000
1660191010000
1660191010000

-- !sql_7_50 --
value1 value2

Expand Down Expand Up @@ -1127,3 +1262,20 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql_8_30 --
1659931810000
1660018210000
1660104610000
1660191010000
1660191010000

-- !sql_8_31 --
1659931810000
1659931810000
1660018210000
1660018210000
1660104610000
1660104610000
1660191010000
1660191010000

10 changes: 10 additions & 0 deletions regression-test/suites/external_table_p0/es/test_es_query.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,14 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql08 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test_v1;"""
order_qt_sql09 """select test1 from test_v1;"""
order_qt_sql10 """select test2 from test_v1;"""
order_qt_sql11 """select test6 from test_v1;"""

order_qt_sql20 """select * from test_v2 where test2='text#1'"""
order_qt_sql21 """select * from test_v2 where esquery(test2, '{"match":{"test2":"text#1"}}')"""
order_qt_sql22 """select test4,test5,test6,test7,test8 from test_v2 order by test8"""
order_qt_sql23 """select * from test_v2 where esquery(c_long, '{"term":{"c_long":"-1"}}');"""
order_qt_sql24 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test_v2;"""
order_qt_sql25 """select test6 from test_v2;"""

sql """switch test_es_query_es5"""
order_qt_sql_5_02 """select * from test1 where test2='text#1'"""
Expand All @@ -208,6 +210,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_5_22 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 12' limit 4;"""
order_qt_sql_5_23 """select test1 from test1;"""
order_qt_sql_5_24 """select test2 from test1;"""
order_qt_sql_5_25 """select test6 from test1;"""
order_qt_sql_5_26 """select test6 from test2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down Expand Up @@ -241,6 +245,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_6_22 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 12' limit 4;"""
order_qt_sql_6_23 """select test1 from test1;"""
order_qt_sql_6_24 """select test2 from test1;"""
order_qt_sql_6_25 """select test6 from test1;"""
order_qt_sql_6_26 """select test6 from test2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down Expand Up @@ -299,6 +305,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_7_29 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-11' limit 2;"""
order_qt_sql_7_30 """select test1 from test1;"""
order_qt_sql_7_31 """select test2 from test1;"""
order_qt_sql_7_32 """select test6 from test1;"""
order_qt_sql_7_33 """select test6 from test2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down Expand Up @@ -357,6 +365,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_8_27 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-11' limit 2;"""
order_qt_sql_8_28 """select test1 from test1;"""
order_qt_sql_8_29 """select test2 from test1;"""
order_qt_sql_8_30 """select test6 from test1;"""
order_qt_sql_8_31 """select test6 from test2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down

0 comments on commit 19ae695

Please sign in to comment.