Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance OS value parsing #3002

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ public void testCompare() throws IOException {
var result =
executeQuery(
String.format(
"source=%s | eval `%s` = %s | fields `%s`",
"source=%s | head 1 | eval `%s` = %s | fields `%s`",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is current result if remove head 1? is it breaking change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I add more rows in TEST_INDEX_DATATYPE_NONNUMERIC index, head 1 to avoid change the test.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the return result order is not guaranteed, why not add another test dataset?

TEST_INDEX_DATATYPE_NONNUMERIC, name, functionCall, name));
verifySchema(result, schema(name, null, "boolean"));
verifyDataRows(result, rows(expectedResult));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public void typeof_sql_types() throws IOException {
JSONObject response =
executeQuery(
String.format(
"source=%s | eval `str` = typeof('pewpew'),"
"source=%s | head 1 | eval `str` = typeof('pewpew'),"
+ " `double` = typeof(1.0),"
+ "`int` = typeof(12345),"
+ " `long` = typeof(1234567891011),"
Expand All @@ -42,7 +42,7 @@ public void typeof_sql_types() throws IOException {
response =
executeQuery(
String.format(
"source=%s | eval "
"source=%s | head 1 | eval "
+ "`timestamp` = typeof(CAST('1961-04-12 09:07:00' AS TIMESTAMP)),"
+ "`time` = typeof(CAST('09:07:00' AS TIME)),"
+ "`date` = typeof(CAST('1961-04-12' AS DATE))"
Expand All @@ -56,7 +56,7 @@ public void typeof_opensearch_types() throws IOException {
JSONObject response =
executeQuery(
String.format(
"source=%s | eval `double` = typeof(double_number), `long` ="
"source=%s | head 1 | eval `double` = typeof(double_number), `long` ="
+ " typeof(long_number),`integer` = typeof(integer_number), `byte` ="
+ " typeof(byte_number),`short` = typeof(short_number), `float` ="
+ " typeof(float_number),`half_float` = typeof(half_float_number),"
Expand All @@ -69,11 +69,11 @@ public void typeof_opensearch_types() throws IOException {
response =
executeQuery(
String.format(
"source=%s | eval `text` = typeof(text_value), `date` = typeof(date_value),"
+ " `date_nanos` = typeof(date_nanos_value),`boolean` = typeof(boolean_value),"
+ " `object` = typeof(object_value),`keyword` = typeof(keyword_value), `ip` ="
+ " typeof(ip_value),`binary` = typeof(binary_value), `geo_point` ="
+ " typeof(geo_point_value)"
"source=%s | head 1 | eval `text` = typeof(text_value), `date` ="
+ " typeof(date_value), `date_nanos` = typeof(date_nanos_value),`boolean` ="
+ " typeof(boolean_value), `object` = typeof(object_value),`keyword` ="
+ " typeof(keyword_value), `ip` = typeof(ip_value),`binary` ="
+ " typeof(binary_value), `geo_point` = typeof(geo_point_value)"
// TODO activate this test once `ARRAY` type supported, see
// ExpressionAnalyzer::isTypeNotSupported
// + ", `nested` = typeof(nested_value)"
Expand Down
60 changes: 60 additions & 0 deletions integ-test/src/test/java/org/opensearch/sql/sql/DataTypeIT.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql;

import static org.opensearch.sql.legacy.SQLIntegTestCase.Index.DATA_TYPE_NONNUMERIC;
import static org.opensearch.sql.legacy.SQLIntegTestCase.Index.DATA_TYPE_NUMERIC;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.schema;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
import static org.opensearch.sql.util.MatcherUtils.verifySchema;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.sql.legacy.SQLIntegTestCase;

public class DataTypeIT extends SQLIntegTestCase {
@Override
public void init() throws IOException {
loadIndex(DATA_TYPE_NUMERIC);
loadIndex(DATA_TYPE_NONNUMERIC);
}

@Test
public void testReadingData() throws IOException {
String query =
String.format(
"SELECT"
+ " long_number,integer_number,short_number,byte_number,double_number,float_number,half_float_number,scaled_float_number"
+ " FROM %s LIMIT 5",
Index.DATA_TYPE_NUMERIC.getName());
JSONObject result = executeJdbcRequest(query);
verifySchema(
result,
schema("long_number", "long"),
schema("integer_number", "integer"),
schema("short_number", "short"),
schema("byte_number", "byte"),
schema("float_number", "float"),
schema("double_number", "double"),
schema("half_float_number", "float"),
schema("scaled_float_number", "double"));
verifyDataRows(
result,
rows(1, 2, 3, 4, 5.1, 6.2, 7.3, 8.4),
rows(1, 2, 3, 4, 5.1, 6.2, 7.3, 8.4),
rows(1, 2, 3, 4, 5.1, 6.2, 7.3, 8.4));

query =
String.format(
"SELECT boolean_value,keyword_value FROM %s LIMIT 5",
Index.DATA_TYPE_NONNUMERIC.getName());
result = executeJdbcRequest(query);
verifySchema(result, schema("boolean_value", "boolean"), schema("keyword_value", "keyword"));
verifyDataRows(result, rows(true, "123"), rows(true, "123"), rows(true, "123"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@ public void testDateNanosGroupBy() {
@SneakyThrows
public void testDateNanosWithNanos() {
String query =
String.format("SELECT date_nanos_value" + " FROM %s", TEST_INDEX_DATATYPE_NONNUMERIC);
String.format(
"SELECT date_nanos_value" + " FROM %s limit 1", TEST_INDEX_DATATYPE_NONNUMERIC);
JSONObject result = executeQuery(query);
verifySchema(result, schema("date_nanos_value", null, "timestamp"));
verifyDataRows(result, rows("2019-03-24 01:34:46.123456789"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void typeof_opensearch_types() {
String.format(
"SELECT typeof(double_number),typeof(long_number), typeof(integer_number),"
+ " typeof(byte_number), typeof(short_number),typeof(float_number),"
+ " typeof(half_float_number), typeof(scaled_float_number) from %s;",
+ " typeof(half_float_number), typeof(scaled_float_number) from %s limit 1;",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why add limit 1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above

TEST_INDEX_DATATYPE_NUMERIC));
verifyDataRows(
response, rows("DOUBLE", "LONG", "INTEGER", "BYTE", "SHORT", "FLOAT", "FLOAT", "DOUBLE"));
Expand All @@ -61,7 +61,7 @@ public void typeof_opensearch_types() {
// TODO activate this test once `ARRAY` type supported, see
// ExpressionAnalyzer::isTypeNotSupported
// + ", typeof(nested_value)"
+ " from %s;",
+ " from %s limit 1;",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

TEST_INDEX_DATATYPE_NONNUMERIC));
verifyDataRows(
response,
Expand Down
6 changes: 5 additions & 1 deletion integ-test/src/test/resources/datatypes.json
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
{"index":{"_id":"1"}}
{"boolean_value": true, "keyword_value": "keyword", "text_value": "text", "binary_value": "U29tZSBiaW5hcnkgYmxvYg==", "date_value": "2020-10-13 13:00:00", "date_nanos_value": "2019-03-23T21:34:46.123456789-04:00", "ip_value": "127.0.0.1", "object_value": {"first": "Dale", "last": "Dale"}, "nested_value": [{"first" : "John", "last" : "Smith"}, {"first" : "Alice", "last" : "White"}], "geo_point_value": { "lat": 40.71, "lon": 74.00 }}
{"boolean_value": true, "keyword_value": "123", "text_value": "text", "binary_value": "U29tZSBiaW5hcnkgYmxvYg==", "date_value": "2020-10-13 13:00:00", "date_nanos_value": "2019-03-23T21:34:46.123456789-04:00", "ip_value": "127.0.0.1", "object_value": {"first": "Dale", "last": "Dale"}, "nested_value": [{"first" : "John", "last" : "Smith"}, {"first" : "Alice", "last" : "White"}], "geo_point_value": { "lat": 40.71, "lon": 74.00 }}
{"index":{"_id":"2"}}
{"boolean_value": "true", "keyword_value": 123, "text_value": "text", "binary_value": "U29tZSBiaW5hcnkgYmxvYg==", "date_value": "2020-10-13 13:00:00", "date_nanos_value": "2019-03-23T21:34:46.123456789-04:00", "ip_value": "127.0.0.1", "object_value": {"first": "Dale", "last": "Dale"}, "nested_value": [{"first" : "John", "last" : "Smith"}, {"first" : "Alice", "last" : "White"}], "geo_point_value": { "lat": 40.71, "lon": 74.00 }}
{"index":{"_id":"3"}}
{"boolean_value": ["true"], "keyword_value": [123], "text_value": "text", "binary_value": "U29tZSBiaW5hcnkgYmxvYg==", "date_value": "2020-10-13 13:00:00", "date_nanos_value": "2019-03-23T21:34:46.123456789-04:00", "ip_value": "127.0.0.1", "object_value": {"first": "Dale", "last": "Dale"}, "nested_value": [{"first" : "John", "last" : "Smith"}, {"first" : "Alice", "last" : "White"}], "geo_point_value": { "lat": 40.71, "lon": 74.00 }}
4 changes: 4 additions & 0 deletions integ-test/src/test/resources/datatypes_numeric.json
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
{"index":{"_id":"1"}}
{"long_number": 1, "integer_number": 2, "short_number": 3, "byte_number": 4, "double_number": 5.1, "float_number": 6.2, "half_float_number": 7.3, "scaled_float_number": 8.4}
{"index":{"_id":"2"}}
{"long_number": "1", "integer_number": "2", "short_number": "3", "byte_number": "4", "double_number": "5.1", "float_number": "6.2", "half_float_number": "7.3", "scaled_float_number": "8.4"}
{"index":{"_id":"3"}}
{"long_number": ["1"], "integer_number": ["2"], "short_number": ["3"], "byte_number": ["4"], "double_number": ["5.1"], "float_number": ["6.2"], "half_float_number": ["7.3"], "scaled_float_number": ["8.4"]}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.OpenSearchParseException;
import org.opensearch.common.Numbers;
import org.opensearch.common.geo.GeoPoint;
import org.opensearch.common.geo.GeoUtils;
import org.opensearch.common.xcontent.json.JsonXContentParser;
Expand All @@ -29,32 +30,32 @@ public class OpenSearchJsonContent implements Content {

@Override
public Integer intValue() {
return value().intValue();
return (int) extractDoubleValue(value());
penghuo marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public Long longValue() {
return value().longValue();
return extractLongValue(value());
}

@Override
public Short shortValue() {
return value().shortValue();
return (short) extractDoubleValue(value());
}

@Override
public Byte byteValue() {
return (byte) value().shortValue();
return (byte) extractDoubleValue(value());
}

@Override
public Float floatValue() {
return value().floatValue();
return (float) extractDoubleValue(value());
}

@Override
public Double doubleValue() {
return value().doubleValue();
return extractDoubleValue(value());
}

@Override
Expand All @@ -64,7 +65,7 @@ public String stringValue() {

@Override
public Boolean booleanValue() {
return value().booleanValue();
return extractBooleanValue(value());
}

@Override
Expand Down Expand Up @@ -148,4 +149,40 @@ public Pair<Double, Double> geoValue() {
private JsonNode value() {
return value;
}

/** Get double value from JsonNode if possible. */
private double extractDoubleValue(JsonNode node) {
if (node.isTextual()) {
return Double.parseDouble(node.textValue());
}
if (node.isNumber()) {
return node.doubleValue();
} else {
throw new OpenSearchParseException("node must be a number");
}
}

/** Get long value from JsonNode if possible. */
private long extractLongValue(JsonNode node) {
if (node.isTextual()) {
return Numbers.toLong(node.textValue(), true);
}
if (node.isNumber()) {
return node.longValue();
} else {
throw new OpenSearchParseException("node must be a number");
}
}

/** Get boolean value from JsonNode if possible. */
private boolean extractBooleanValue(JsonNode node) {
if (node.isTextual()) {
return Boolean.parseBoolean(node.textValue());
}
if (node.isBoolean()) {
return node.booleanValue();
} else {
throw new OpenSearchParseException("node must be a boolean");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,8 @@
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.opensearch.data.type.OpenSearchBinaryType;
import org.opensearch.sql.opensearch.data.type.OpenSearchDataType;
import org.opensearch.sql.opensearch.data.type.OpenSearchDateType;
import org.opensearch.sql.opensearch.data.type.OpenSearchIpType;
import org.opensearch.sql.opensearch.data.utils.Content;
import org.opensearch.sql.opensearch.data.utils.ObjectContent;
import org.opensearch.sql.opensearch.data.utils.OpenSearchJsonContent;
Expand Down Expand Up @@ -346,9 +344,8 @@ private ExprValue parseArray(
if (content.objectValue() instanceof ObjectNode) {
result.add(parseStruct(content, prefix, supportArrays));
// non-object type arrays are only supported when parsing inner_hits of OS response.
} else if (!(type instanceof OpenSearchDataType
&& ((OpenSearchDataType) type).getExprType().equals(ARRAY))
&& !supportArrays) {
} else if (((OpenSearchDataType) type).getExprType().equals(ARRAY) == false
&& supportArrays == false) {
Comment on lines +347 to +348
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the change? could you elaborate more?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the type must be OpenSearchDataType after my change, please correct me if I'm wrong

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is not required, the change improvment the json parse behaviour to support read int value from [int, string] value.

return parseInnerArrayValue(content.array().next(), prefix, type, supportArrays);
} else {
content
Expand Down Expand Up @@ -415,10 +412,10 @@ private ExprValue parseGeoPoint(Content content, boolean supportArrays) {
*/
private ExprValue parseInnerArrayValue(
Content content, String prefix, ExprType type, boolean supportArrays) {
if (type instanceof OpenSearchIpType
|| type instanceof OpenSearchBinaryType
|| type instanceof OpenSearchDateType) {
return parse(content, prefix, Optional.of(type), supportArrays);
if (content.isArray()) {
return parseArray(content, prefix, type, supportArrays);
} else if (typeActionMap.containsKey(type)) {
return typeActionMap.get(type).apply(content, type);
} else if (content.isString()) {
return parse(content, prefix, Optional.of(OpenSearchDataType.of(STRING)), supportArrays);
} else if (content.isLong()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ public void constructNullArrayValue() {
public void constructByte() {
assertAll(
() -> assertEquals(byteValue((byte) 1), tupleValue("{\"byteV\":1}").get("byteV")),
() -> assertEquals(byteValue((byte) 1), tupleValue("{\"byteV\":\"1\"}").get("byteV")),
() -> assertEquals(byteValue((byte) 1), constructFromObject("byteV", 1)),
() -> assertEquals(byteValue((byte) 1), constructFromObject("byteV", "1.0")));
}
Expand All @@ -164,6 +165,7 @@ public void constructByte() {
public void constructShort() {
assertAll(
() -> assertEquals(shortValue((short) 1), tupleValue("{\"shortV\":1}").get("shortV")),
() -> assertEquals(shortValue((short) 1), tupleValue("{\"shortV\":\"1\"}").get("shortV")),
() -> assertEquals(shortValue((short) 1), constructFromObject("shortV", 1)),
() -> assertEquals(shortValue((short) 1), constructFromObject("shortV", "1.0")));
}
Expand All @@ -172,19 +174,16 @@ public void constructShort() {
public void constructInteger() {
assertAll(
() -> assertEquals(integerValue(1), tupleValue("{\"intV\":1}").get("intV")),
() -> assertEquals(integerValue(1), tupleValue("{\"intV\":\"1\"}").get("intV")),
() -> assertEquals(integerValue(1), constructFromObject("intV", 1)),
() -> assertEquals(integerValue(1), constructFromObject("intV", "1.0")));
}

@Test
public void constructIntegerValueInStringValue() {
assertEquals(integerValue(1), constructFromObject("intV", "1"));
}

@Test
public void constructLong() {
assertAll(
() -> assertEquals(longValue(1L), tupleValue("{\"longV\":1}").get("longV")),
() -> assertEquals(longValue(1L), tupleValue("{\"longV\":\"1\"}").get("longV")),
() -> assertEquals(longValue(1L), constructFromObject("longV", 1L)),
() -> assertEquals(longValue(1L), constructFromObject("longV", "1.0")));
}
Expand All @@ -193,13 +192,15 @@ public void constructLong() {
public void constructFloat() {
assertAll(
() -> assertEquals(floatValue(1f), tupleValue("{\"floatV\":1.0}").get("floatV")),
() -> assertEquals(floatValue(1f), tupleValue("{\"floatV\":\"1.0\"}").get("floatV")),
() -> assertEquals(floatValue(1f), constructFromObject("floatV", 1f)));
}

@Test
public void constructDouble() {
assertAll(
() -> assertEquals(doubleValue(1d), tupleValue("{\"doubleV\":1.0}").get("doubleV")),
() -> assertEquals(doubleValue(1d), tupleValue("{\"doubleV\":\"1.0\"}").get("doubleV")),
() -> assertEquals(doubleValue(1d), constructFromObject("doubleV", 1d)));
}

Expand All @@ -215,6 +216,7 @@ public void constructString() {
public void constructBoolean() {
assertAll(
() -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":true}").get("boolV")),
() -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":\"true\"}").get("boolV")),
() -> assertEquals(booleanValue(true), constructFromObject("boolV", true)),
() -> assertEquals(booleanValue(true), constructFromObject("boolV", "true")),
() -> assertEquals(booleanValue(true), constructFromObject("boolV", 1)),
Expand Down Expand Up @@ -755,6 +757,27 @@ public void constructGeoPointFromUnsupportedFormatShouldThrowException() {
assertEquals("lat must be a number", exception.getMessage());
}

@Test
public void constructNumberFromUnsupportedFormatShouldThrowException() {
OpenSearchParseException exception =
assertThrows(
OpenSearchParseException.class, () -> tupleValue("{\"intV\": false}").get("intV"));
assertEquals("node must be a number", exception.getMessage());

exception =
assertThrows(
OpenSearchParseException.class, () -> tupleValue("{\"longV\": false}").get("intV"));
assertEquals("node must be a number", exception.getMessage());
}

@Test
public void constructBooleanFromUnsupportedFormatShouldThrowException() {
OpenSearchParseException exception =
assertThrows(
OpenSearchParseException.class, () -> tupleValue("{\"boolV\": 1}").get("boolV"));
assertEquals("node must be a boolean", exception.getMessage());
}

@Test
public void constructBinary() {
assertEquals(
Expand All @@ -769,6 +792,7 @@ public void constructBinary() {
@Test
public void constructFromOpenSearchArrayReturnFirstElement() {
assertEquals(integerValue(1), tupleValue("{\"intV\":[1, 2, 3]}").get("intV"));
assertEquals(integerValue(1), tupleValue("{\"intV\":[\"1\", 2, 3]}").get("intV"));
assertEquals(
new ExprTupleValue(
new LinkedHashMap<String, ExprValue>() {
Expand Down
Loading