From 97e5c35325873f24b5f3f992039405d707479066 Mon Sep 17 00:00:00 2001 From: Lin Xintao Date: Sun, 22 Sep 2024 10:17:57 +0800 Subject: [PATCH] Improve like predicate speed --- .../iotdb/db/it/query/IoTDBNullOperandIT.java | 2 +- .../it/query/old/IoTDBNestedQueryTableIT.java | 98 ++++++++++++------- .../relational/ColumnTransformerBuilder.java | 17 ++-- .../plan/expression/unary/LikeExpression.java | 16 +-- .../expression/unary/RegularExpression.java | 6 +- .../visitor/ColumnTransformerVisitor.java | 3 +- .../visitor/IntermediateLayerVisitor.java | 3 +- .../ConvertPredicateToFilterVisitor.java | 14 ++- ...ConvertSchemaPredicateToFilterVisitor.java | 10 +- .../column/unary/LikeColumnTransformer.java | 61 ++++++++++++ .../transformer/unary/LikeTransformer.java | 67 +++++++++++++ .../SchemaRegionTableDeviceTest.java | 8 +- .../unary/UnaryColumnTransformerTest.java | 15 +++ .../filter/impl/StringValueFilterVisitor.java | 2 +- .../schema/filter/impl/values/LikeFilter.java | 38 +++++-- .../unary/LikeViewExpression.java | 72 ++------------ .../commons/schema/SchemaFilterSerDeTest.java | 5 +- pom.xml | 2 +- 18 files changed, 297 insertions(+), 142 deletions(-) create mode 100644 iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/transformation/dag/column/unary/LikeColumnTransformer.java create mode 100644 iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/transformation/dag/transformer/unary/LikeTransformer.java diff --git a/integration-test/src/test/java/org/apache/iotdb/db/it/query/IoTDBNullOperandIT.java b/integration-test/src/test/java/org/apache/iotdb/db/it/query/IoTDBNullOperandIT.java index 375289953cd2..b1aa94805f39 100644 --- a/integration-test/src/test/java/org/apache/iotdb/db/it/query/IoTDBNullOperandIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/db/it/query/IoTDBNullOperandIT.java @@ -104,7 +104,7 @@ public void testCompareOperations() { "root.test.sg1.s1 = root.test.sg1.s2", "root.test.sg1.s1 > root.test.sg1.s2", "root.test.sg1.s1 < root.test.sg1.s2", - "root.test.sg1.s5 LIKE '^test$'", + "root.test.sg1.s5 LIKE 'LikePattern{pattern='test', escape=\\}'", "root.test.sg1.s2 IN (1,2)", "root.test.sg1.s2 BETWEEN 1 AND 3", }; diff --git a/integration-test/src/test/java/org/apache/iotdb/relational/it/query/old/IoTDBNestedQueryTableIT.java b/integration-test/src/test/java/org/apache/iotdb/relational/it/query/old/IoTDBNestedQueryTableIT.java index 48fbd7096aeb..e4cc7f41e3ce 100644 --- a/integration-test/src/test/java/org/apache/iotdb/relational/it/query/old/IoTDBNestedQueryTableIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/relational/it/query/old/IoTDBNestedQueryTableIT.java @@ -79,6 +79,8 @@ private static void createTable() { statement.execute( "create table vehicle2(device_id STRING ID, s1 FLOAT MEASUREMENT, s2 DOUBLE MEASUREMENT, empty DOUBLE MEASUREMENT)"); + statement.execute( + "create table likeTest(device_id STRING ID, s1 TEXT MEASUREMENT, s2 STRING MEASUREMENT)"); } catch (SQLException throwable) { fail(throwable.getMessage()); } @@ -101,6 +103,12 @@ private static void generateData() { statement.execute("insert into vehicle1(time,device_id,s5) values(1, 'd1', '2024-01-01')"); statement.execute("insert into vehicle1(time,device_id,s5) values(2, 'd1','2024-01-02')"); statement.execute("insert into vehicle1(time,device_id,s5) values(3, 'd1','2024-01-03')"); + statement.execute( + "insert into likeTest(time,device_id,s1,s2) values(1, 'd1','abcdef', '123456')"); + statement.execute( + "insert into likeTest(time,device_id,s1,s2) values(2, 'd1','_abcdef', '123\\456')"); + statement.execute( + "insert into likeTest(time,device_id,s1,s2) values(3, 'd1','abcdef%', '123#456')"); } catch (SQLException throwable) { fail(throwable.getMessage()); } @@ -403,45 +411,63 @@ public void testBetweenExpression() { public void testRegularLikeInExpressions() { try (Connection connection = EnvFactory.getEnv().getConnection(BaseEnv.TABLE_SQL_DIALECT); Statement statement = connection.createStatement()) { - // String query = - // "SELECT s1 FROM vehicle1 where device_id='d1' WHERE s3 LIKE '_' && s3 REGEXP - // '[0-9]' && s3 IN ('4', '2', '3')"; - // try (ResultSet rs = statement.executeQuery(query)) { - // for (int i = 2; i <= 4; i++) { - // Assert.assertTrue(rs.next()); - // Assert.assertEquals(i, rs.getLong(1)); - // } - // Assert.assertFalse(rs.next()); - // } - - // String query2 = - // "SELECT s1 FROM vehicle1 where device_id='d1' WHERE s4 LIKE '_' && s4 REGEXP - // '[0-9]' && s4 IN ('4', '2', '3')"; - // try (ResultSet rs = statement.executeQuery(query2)) { - // for (int i = 2; i <= 4; i++) { - // Assert.assertTrue(rs.next()); - // Assert.assertEquals(i, rs.getLong(1)); - // } - // Assert.assertFalse(rs.next()); - // } statement.execute("USE " + DATABASE_NAME); + String[] ans = new String[] {"abcdef"}; + String query = "SELECT s1 FROM likeTest where s1 LIKE 'abcdef'"; + try (ResultSet rs = statement.executeQuery(query)) { + for (int i = 2; i < 3; i++) { + Assert.assertTrue(rs.next()); + Assert.assertEquals(ans[i - 2], rs.getString(1)); + } + Assert.assertFalse(rs.next()); + } - // String query3 = - // "SELECT time,s1 FROM vehicle1 where device_id='d1' and s5 IN ('2024-01-01', - // '2024-01-02', '2024-01-03')"; - // try (ResultSet rs = statement.executeQuery(query3)) { - // for (int i = 1; i <= 3; i++) { - // Assert.assertTrue(rs.next()); - // Assert.assertEquals(i, rs.getLong(1)); - // } - // Assert.assertFalse(rs.next()); - // } - - String query4 = "SELECT time,s1 FROM vehicle1 where device_id='d1' and s6 IN (1, 2, 3)"; - try (ResultSet rs = statement.executeQuery(query4)) { - for (int i = 1; i <= 3; i++) { + ans = new String[] {"_abcdef"}; + query = "SELECT s1 FROM likeTest where s1 LIKE '\\_%' escape '\\'"; + try (ResultSet rs = statement.executeQuery(query)) { + for (int i = 2; i < 3; i++) { Assert.assertTrue(rs.next()); - Assert.assertEquals(i, rs.getLong(1)); + Assert.assertEquals(ans[i - 2], rs.getString(1)); + } + Assert.assertFalse(rs.next()); + } + + ans = new String[] {"abcdef", "_abcdef", "abcdef%"}; + query = "SELECT s1 FROM likeTest where s1 LIKE '%abcde%' escape '\\'"; + try (ResultSet rs = statement.executeQuery(query)) { + for (int i = 2; i < 5; i++) { + Assert.assertTrue(rs.next()); + Assert.assertEquals(ans[i - 2], rs.getString(1)); + } + Assert.assertFalse(rs.next()); + } + + ans = new String[] {"123456"}; + query = "SELECT s2 FROM likeTest where s2 LIKE '12345_'"; + try (ResultSet rs = statement.executeQuery(query)) { + for (int i = 2; i < 3; i++) { + Assert.assertTrue(rs.next()); + Assert.assertEquals(ans[i - 2], rs.getString(1)); + } + Assert.assertFalse(rs.next()); + } + + ans = new String[] {"123\\456"}; + query = "SELECT s2 FROM likeTest where s2 LIKE '%\\\\%' escape '\\'"; + try (ResultSet rs = statement.executeQuery(query)) { + for (int i = 2; i < 3; i++) { + Assert.assertTrue(rs.next()); + Assert.assertEquals(ans[i - 2], rs.getString(1)); + } + Assert.assertFalse(rs.next()); + } + + ans = new String[] {"123#456"}; + query = "SELECT s2 FROM likeTest where s2 LIKE '123##456' escape '#'"; + try (ResultSet rs = statement.executeQuery(query)) { + for (int i = 2; i < 3; i++) { + Assert.assertTrue(rs.next()); + Assert.assertEquals(ans[i - 2], rs.getString(1)); } Assert.assertFalse(rs.next()); } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/relational/ColumnTransformerBuilder.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/relational/ColumnTransformerBuilder.java index 7079bf0d9f15..d2b02a73ea00 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/relational/ColumnTransformerBuilder.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/relational/ColumnTransformerBuilder.java @@ -90,8 +90,8 @@ import org.apache.iotdb.db.queryengine.transformation.dag.column.multi.LogicalOrMultiColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.ternary.BetweenColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.IsNullColumnTransformer; +import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.LikeColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.LogicNotColumnTransformer; -import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.RegularColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.scalar.AbsColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.scalar.AcosColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.scalar.AsinColumnTransformer; @@ -148,6 +148,7 @@ import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.scalar.UpperColumnTransformer; import org.apache.tsfile.common.conf.TSFileConfig; +import org.apache.tsfile.common.regexp.LikePattern; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.read.common.block.column.BinaryColumn; import org.apache.tsfile.read.common.block.column.BooleanColumn; @@ -173,14 +174,13 @@ import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.PredicatePushIntoMetadataChecker.isStringLiteral; import static org.apache.iotdb.db.queryengine.plan.relational.type.InternalTypeManager.getTSDataType; import static org.apache.iotdb.db.queryengine.plan.relational.type.TypeSignatureTranslator.toTypeSignature; +import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter; import static org.apache.tsfile.read.common.type.BlobType.BLOB; import static org.apache.tsfile.read.common.type.BooleanType.BOOLEAN; import static org.apache.tsfile.read.common.type.DoubleType.DOUBLE; import static org.apache.tsfile.read.common.type.IntType.INT32; import static org.apache.tsfile.read.common.type.LongType.INT64; import static org.apache.tsfile.read.common.type.StringType.STRING; -import static org.apache.tsfile.utils.RegexUtils.compileRegex; -import static org.apache.tsfile.utils.RegexUtils.parseLikePatternToRegex; public class ColumnTransformerBuilder extends AstVisitor { @@ -1153,13 +1153,18 @@ protected ColumnTransformer visitLikePredicate(LikePredicate node, Context conte context.cache.put(node, identity); } else { ColumnTransformer childColumnTransformer = process(node.getValue(), context); + Optional escapeValueOpt = + node.getEscape().isPresent() + ? Optional.ofNullable(((StringLiteral) node.getEscape().get()).getValue()) + : Optional.empty(); context.cache.put( node, - new RegularColumnTransformer( + new LikeColumnTransformer( BOOLEAN, childColumnTransformer, - compileRegex( - parseLikePatternToRegex(((StringLiteral) node.getPattern()).getValue())))); + LikePattern.compile( + ((StringLiteral) node.getPattern()).getValue(), + getEscapeCharacter(escapeValueOpt)))); } } ColumnTransformer res = context.cache.get(node); diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java index a83824ce68dd..69b788cf3a4e 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java @@ -24,16 +24,16 @@ import org.apache.iotdb.db.queryengine.plan.expression.ExpressionType; import org.apache.iotdb.db.queryengine.plan.expression.visitor.ExpressionVisitor; +import org.apache.tsfile.common.regexp.LikePattern; import org.apache.tsfile.utils.RamUsageEstimator; import org.apache.tsfile.utils.ReadWriteIOUtils; import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.regex.Pattern; +import java.util.Optional; -import static org.apache.tsfile.utils.RegexUtils.compileRegex; -import static org.apache.tsfile.utils.RegexUtils.parseLikePatternToRegex; +import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter; public class LikeExpression extends UnaryExpression { @@ -41,7 +41,7 @@ public class LikeExpression extends UnaryExpression { RamUsageEstimator.shallowSizeOfInstance(LikeExpression.class); private final String patternString; - private final Pattern pattern; + private final LikePattern pattern; private final boolean isNot; @@ -49,11 +49,11 @@ public LikeExpression(Expression expression, String patternString, boolean isNot super(expression); this.patternString = patternString; this.isNot = isNot; - pattern = compileRegex(parseLikePatternToRegex(patternString)); + pattern = LikePattern.compile(patternString, getEscapeCharacter(Optional.of("\\"))); } public LikeExpression( - Expression expression, String patternString, Pattern pattern, boolean isNot) { + Expression expression, String patternString, LikePattern pattern, boolean isNot) { super(expression); this.patternString = patternString; this.pattern = pattern; @@ -64,14 +64,14 @@ public LikeExpression(ByteBuffer byteBuffer) { super(Expression.deserialize(byteBuffer)); patternString = ReadWriteIOUtils.readString(byteBuffer); isNot = ReadWriteIOUtils.readBool(byteBuffer); - pattern = compileRegex(parseLikePatternToRegex(patternString)); + pattern = LikePattern.compile(patternString, getEscapeCharacter(Optional.of("\\"))); } public String getPatternString() { return patternString; } - public Pattern getPattern() { + public LikePattern getPattern() { return pattern; } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java index 4d517e219e70..fc847576458a 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java @@ -33,8 +33,6 @@ import java.nio.ByteBuffer; import java.util.regex.Pattern; -import static org.apache.tsfile.utils.RegexUtils.compileRegex; - public class RegularExpression extends UnaryExpression { private static final long INSTANCE_SIZE = @@ -49,7 +47,7 @@ public RegularExpression(Expression expression, String patternString, boolean is super(expression); this.patternString = patternString; this.isNot = isNot; - pattern = compileRegex(patternString); + pattern = Pattern.compile(patternString); } public RegularExpression( @@ -64,7 +62,7 @@ public RegularExpression(ByteBuffer byteBuffer) { super(Expression.deserialize(byteBuffer)); patternString = ReadWriteIOUtils.readString(byteBuffer); isNot = ReadWriteIOUtils.readBool(byteBuffer); - pattern = compileRegex(Validate.notNull(patternString, "patternString cannot be null")); + pattern = Pattern.compile(Validate.notNull(patternString, "patternString cannot be null")); } public String getPatternString() { diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/ColumnTransformerVisitor.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/ColumnTransformerVisitor.java index e537d7fc763c..523460da658e 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/ColumnTransformerVisitor.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/ColumnTransformerVisitor.java @@ -64,6 +64,7 @@ import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.ArithmeticNegationColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.InColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.IsNullColumnTransformer; +import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.LikeColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.LogicNotColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.RegularColumnTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.udf.UDTFContext; @@ -442,7 +443,7 @@ private ColumnTransformer getConcreteUnaryColumnTransformer( return new ArithmeticNegationColumnTransformer(returnType, childColumnTransformer); case LIKE: LikeExpression likeExpression = (LikeExpression) expression; - return new RegularColumnTransformer( + return new LikeColumnTransformer( returnType, childColumnTransformer, likeExpression.getPattern()); case REGEXP: RegularExpression regularExpression = (RegularExpression) expression; diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/IntermediateLayerVisitor.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/IntermediateLayerVisitor.java index ff96ab0020db..889e90f14e73 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/IntermediateLayerVisitor.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/IntermediateLayerVisitor.java @@ -67,6 +67,7 @@ import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.ArithmeticNegationTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.InTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.IsNullTransformer; +import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.LikeTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.LogicNotTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.RegularTransformer; import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.TransparentTransformer; @@ -311,7 +312,7 @@ private Transformer getConcreteUnaryTransformer(Expression expression, LayerRead return new ArithmeticNegationTransformer(parentReader); case LIKE: LikeExpression likeExpression = (LikeExpression) expression; - return new RegularTransformer(parentReader, likeExpression.getPattern()); + return new LikeTransformer(parentReader, likeExpression.getPattern()); case REGEXP: RegularExpression regularExpression = (RegularExpression) expression; return new RegularTransformer(parentReader, regularExpression.getPattern()); diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/analyzer/predicate/ConvertPredicateToFilterVisitor.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/analyzer/predicate/ConvertPredicateToFilterVisitor.java index 446f8caa5920..7d046af30ece 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/analyzer/predicate/ConvertPredicateToFilterVisitor.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/analyzer/predicate/ConvertPredicateToFilterVisitor.java @@ -47,6 +47,7 @@ import org.apache.iotdb.db.queryengine.plan.relational.type.InternalTypeManager; import org.apache.tsfile.common.conf.TSFileConfig; +import org.apache.tsfile.common.regexp.LikePattern; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.read.common.type.Type; import org.apache.tsfile.read.filter.basic.Filter; @@ -58,6 +59,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -66,6 +68,7 @@ import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.ConvertPredicateToTimeFilterVisitor.isTimeColumn; import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.PredicatePushIntoScanChecker.isLiteral; import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.PredicatePushIntoScanChecker.isSymbolReference; +import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter; public class ConvertPredicateToFilterVisitor extends PredicateVisitor { @@ -208,10 +211,17 @@ protected Filter visitLikePredicate(LikePredicate node, Context context) { SymbolReference operand = (SymbolReference) node.getValue(); checkArgument(context.isMeasurementColumn(operand)); int measurementIndex = context.getMeasurementIndex(operand.getName()); - Expression pattern = node.getPattern(); + Optional escapeValueOpt = + node.getEscape().isPresent() + ? Optional.ofNullable(((StringLiteral) node.getEscape().get()).getValue()) + : Optional.empty(); Type type = context.getType(Symbol.from(operand)); TSDataType dataType = InternalTypeManager.getTSDataType(type); - return ValueFilterApi.like(measurementIndex, getStringValue(pattern), dataType); + return ValueFilterApi.like( + measurementIndex, + LikePattern.compile( + ((StringLiteral) node.getPattern()).getValue(), getEscapeCharacter(escapeValueOpt)), + dataType); } @Override diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/analyzer/predicate/schema/ConvertSchemaPredicateToFilterVisitor.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/analyzer/predicate/schema/ConvertSchemaPredicateToFilterVisitor.java index d78170b64bba..afc0fb769f85 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/analyzer/predicate/schema/ConvertSchemaPredicateToFilterVisitor.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/analyzer/predicate/schema/ConvertSchemaPredicateToFilterVisitor.java @@ -58,11 +58,12 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; import static com.google.common.base.Preconditions.checkArgument; import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.PredicatePushIntoScanChecker.isSymbolReference; -import static org.apache.tsfile.utils.RegexUtils.parseLikePatternToRegex; +import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter; /** * The {@link ConvertSchemaPredicateToFilterVisitor} will convert a predicate to {@link @@ -116,7 +117,12 @@ protected SchemaFilter visitIsNotNullPredicate( return null; } return wrapIdOrAttributeFilter( - new LikeFilter(parseLikePatternToRegex(((StringLiteral) node.getPattern()).getValue())), + new LikeFilter( + (((StringLiteral) node.getPattern()).getValue()), + node.getEscape().isPresent() + ? getEscapeCharacter( + Optional.ofNullable(((StringLiteral) node.getEscape().get()).getValue())) + : Optional.empty()), ((SymbolReference) node.getValue()).getName(), context); } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/transformation/dag/column/unary/LikeColumnTransformer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/transformation/dag/column/unary/LikeColumnTransformer.java new file mode 100644 index 000000000000..f89e18e7c40c --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/transformation/dag/column/unary/LikeColumnTransformer.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.queryengine.transformation.dag.column.unary; + +import org.apache.iotdb.db.queryengine.transformation.dag.column.ColumnTransformer; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.common.regexp.LikePattern; +import org.apache.tsfile.read.common.type.Type; +import org.apache.tsfile.utils.Binary; + +import static org.apache.iotdb.db.queryengine.plan.relational.metadata.TableMetadataImpl.isCharType; + +public class LikeColumnTransformer extends UnaryColumnTransformer { + private final LikePattern pattern; + + public LikeColumnTransformer( + Type returnType, ColumnTransformer childColumnTransformer, LikePattern pattern) { + super(returnType, childColumnTransformer); + this.pattern = pattern; + } + + @Override + protected void doTransform(Column column, ColumnBuilder columnBuilder) { + for (int i = 0, n = column.getPositionCount(); i < n; i++) { + if (!column.isNull(i)) { + Binary value = childColumnTransformer.getType().getBinary(column, i); + returnType.writeBoolean( + columnBuilder, pattern.getMatcher().match(value.getValues(), 0, value.getLength())); + } else { + columnBuilder.appendNull(); + } + } + } + + @Override + protected void checkType() { + if (!isCharType(childColumnTransformer.getType())) { + throw new UnsupportedOperationException( + "Unsupported Type: " + childColumnTransformer.getType().getTypeEnum()); + } + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/transformation/dag/transformer/unary/LikeTransformer.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/transformation/dag/transformer/unary/LikeTransformer.java new file mode 100644 index 000000000000..61d896847cfc --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/transformation/dag/transformer/unary/LikeTransformer.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary; + +import org.apache.iotdb.db.exception.query.QueryProcessException; +import org.apache.iotdb.db.queryengine.transformation.api.LayerReader; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.common.regexp.LikePattern; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.utils.Binary; +import org.apache.tsfile.write.UnSupportedDataTypeException; + +import java.io.IOException; + +public class LikeTransformer extends UnaryTransformer { + private final LikePattern pattern; + + public LikeTransformer(LayerReader layerReader, LikePattern pattern) { + super(layerReader); + this.pattern = pattern; + + if (layerReaderDataType != TSDataType.TEXT) { + throw new UnSupportedDataTypeException("Unsupported data type: " + layerReaderDataType); + } + } + + @Override + public TSDataType[] getDataTypes() { + return new TSDataType[] {TSDataType.BOOLEAN}; + } + + @Override + protected void transform(Column[] columns, ColumnBuilder builder) + throws QueryProcessException, IOException { + int count = columns[0].getPositionCount(); + Binary[] binaries = columns[0].getBinaries(); + boolean[] isNulls = columns[0].isNull(); + + for (int i = 0; i < count; i++) { + if (!isNulls[i]) { + boolean res = pattern.getMatcher().match(binaries[i].getValues()); + builder.writeBoolean(res); + } else { + builder.appendNull(); + } + } + } +} diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/metadata/schemaRegion/SchemaRegionTableDeviceTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/metadata/schemaRegion/SchemaRegionTableDeviceTest.java index 07b52d5b5c0f..e913fb48ce32 100644 --- a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/metadata/schemaRegion/SchemaRegionTableDeviceTest.java +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/metadata/schemaRegion/SchemaRegionTableDeviceTest.java @@ -36,10 +36,9 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.stream.Collectors; -import static org.apache.tsfile.utils.RegexUtils.parseLikePatternToRegex; - public class SchemaRegionTableDeviceTest extends AbstractSchemaRegionTest { public SchemaRegionTableDeviceTest(final SchemaRegionTestParams testParams) { @@ -170,7 +169,7 @@ public void testDeviceQuery() throws Exception { 3, Arrays.asList( new IdFilter(new InFilter(new HashSet<>(Arrays.asList("d_0", "d_1"))), 2), - new IdFilter(new LikeFilter(parseLikePatternToRegex("__1")), 2))); + new IdFilter(new LikeFilter("__1", Optional.empty()), 2))); Assert.assertEquals(2, deviceSchemaInfoList.size()); } @@ -246,8 +245,7 @@ public void testDeviceIdWithNull() throws Exception { schemaRegion, tableName, 3, - Collections.singletonList( - new IdFilter(new LikeFilter(parseLikePatternToRegex("%")), 2))); + Collections.singletonList(new IdFilter(new LikeFilter("%", Optional.empty()), 2))); Assert.assertEquals(2, deviceSchemaInfoList.size()); } diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/transformation/dag/column/unary/UnaryColumnTransformerTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/transformation/dag/column/unary/UnaryColumnTransformerTest.java index 45f31e70a7ca..3d9904d764e7 100644 --- a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/transformation/dag/column/unary/UnaryColumnTransformerTest.java +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/queryengine/transformation/dag/column/unary/UnaryColumnTransformerTest.java @@ -23,6 +23,7 @@ import org.apache.tsfile.block.column.Column; import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.common.regexp.LikePattern; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.read.common.block.TsBlock; import org.apache.tsfile.read.common.block.TsBlockBuilder; @@ -35,6 +36,7 @@ import java.util.Collections; import java.util.HashSet; +import java.util.Optional; import java.util.Set; import java.util.regex.Pattern; @@ -126,4 +128,17 @@ public void testRegular() { } catch (Exception ignored) { } } + + @Test + public void testLike() { + try { + // check input type + new LikeColumnTransformer( + TypeFactory.getType(TSDataType.BOOLEAN), + operand, + LikePattern.compile("%d", Optional.empty())); + Assert.fail(); + } catch (Exception ignored) { + } + } } diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/filter/impl/StringValueFilterVisitor.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/filter/impl/StringValueFilterVisitor.java index f815d7928653..e95bb3cdb196 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/filter/impl/StringValueFilterVisitor.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/filter/impl/StringValueFilterVisitor.java @@ -78,7 +78,7 @@ public Boolean visitLikeFilter(final LikeFilter filter, final String context) { if (Objects.isNull(context)) { return null; } - return filter.getPattern().matcher(context).find(); + return filter.getPattern().getMatcher().match(context.getBytes(), 0, context.length()); } @Override diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/filter/impl/values/LikeFilter.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/filter/impl/values/LikeFilter.java index 7c90be6ed0f3..168e0b7c88a0 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/filter/impl/values/LikeFilter.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/filter/impl/values/LikeFilter.java @@ -23,27 +23,35 @@ import org.apache.iotdb.commons.schema.filter.SchemaFilterType; import org.apache.iotdb.commons.schema.filter.SchemaFilterVisitor; +import org.apache.tsfile.common.regexp.LikePattern; import org.apache.tsfile.utils.ReadWriteIOUtils; import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Objects; -import java.util.regex.Pattern; +import java.util.Optional; + +import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter; // Does not support escape now public class LikeFilter extends SchemaFilter { - private final Pattern pattern; + private final LikePattern pattern; - public LikeFilter(final String regex) { - this.pattern = Pattern.compile(regex); + public LikeFilter(final String regex, Optional escape) { + this.pattern = LikePattern.compile(regex, escape); } public LikeFilter(final ByteBuffer byteBuffer) { - this.pattern = Pattern.compile(ReadWriteIOUtils.readString(byteBuffer)); + this.pattern = + LikePattern.compile( + ReadWriteIOUtils.readString(byteBuffer), + ReadWriteIOUtils.readBool(byteBuffer) + ? getEscapeCharacter(Optional.of(ReadWriteIOUtils.readString(byteBuffer))) + : Optional.empty()); } - public Pattern getPattern() { + public LikePattern getPattern() { return pattern; } @@ -59,12 +67,24 @@ public SchemaFilterType getSchemaFilterType() { @Override protected void serialize(final ByteBuffer byteBuffer) { - ReadWriteIOUtils.write(pattern.pattern(), byteBuffer); + ReadWriteIOUtils.write(pattern.getPattern(), byteBuffer); + if (pattern.getEscape().isPresent()) { + ReadWriteIOUtils.write(true, byteBuffer); + ReadWriteIOUtils.write(pattern.getEscape().get().toString(), byteBuffer); + } else { + ReadWriteIOUtils.write(false, byteBuffer); + } } @Override protected void serialize(final DataOutputStream stream) throws IOException { - ReadWriteIOUtils.write(pattern.pattern(), stream); + ReadWriteIOUtils.write(pattern.getPattern(), stream); + if (pattern.getEscape().isPresent()) { + ReadWriteIOUtils.write(true, stream); + ReadWriteIOUtils.write(pattern.getEscape().get().toString(), stream); + } else { + ReadWriteIOUtils.write(false, stream); + } } @Override @@ -76,7 +96,7 @@ public boolean equals(final Object o) { return false; } final LikeFilter that = (LikeFilter) o; - return Objects.equals(pattern.pattern(), that.pattern.pattern()); + return Objects.equals(pattern, that.pattern); } @Override diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/view/viewExpression/unary/LikeViewExpression.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/view/viewExpression/unary/LikeViewExpression.java index 12cbc513acdf..063116db0b83 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/view/viewExpression/unary/LikeViewExpression.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/view/viewExpression/unary/LikeViewExpression.java @@ -23,19 +23,22 @@ import org.apache.iotdb.commons.schema.view.viewExpression.ViewExpressionType; import org.apache.iotdb.commons.schema.view.viewExpression.visitor.ViewExpressionVisitor; +import org.apache.tsfile.common.regexp.LikePattern; import org.apache.tsfile.utils.ReadWriteIOUtils; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; -import java.util.regex.Pattern; +import java.util.Optional; + +import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter; public class LikeViewExpression extends UnaryViewExpression { // region member variables and init functions private final String patternString; - private final Pattern pattern; + private final LikePattern pattern; private final boolean isNot; @@ -43,11 +46,11 @@ public LikeViewExpression(ViewExpression expression, String patternString, boole super(expression); this.patternString = patternString; this.isNot = isNot; - pattern = this.compile(); + pattern = LikePattern.compile(patternString, getEscapeCharacter(Optional.of("\\"))); } public LikeViewExpression( - ViewExpression expression, String patternString, Pattern pattern, boolean isNot) { + ViewExpression expression, String patternString, LikePattern pattern, boolean isNot) { super(expression); this.patternString = patternString; this.pattern = pattern; @@ -58,7 +61,7 @@ public LikeViewExpression(ByteBuffer byteBuffer) { super(ViewExpression.deserialize(byteBuffer)); patternString = ReadWriteIOUtils.readString(byteBuffer); isNot = ReadWriteIOUtils.readBool(byteBuffer); - pattern = compile(); + pattern = LikePattern.compile(patternString, getEscapeCharacter(Optional.of("\\"))); } public LikeViewExpression(InputStream inputStream) { @@ -66,7 +69,7 @@ public LikeViewExpression(InputStream inputStream) { try { patternString = ReadWriteIOUtils.readString(inputStream); isNot = ReadWriteIOUtils.readBool(inputStream); - pattern = compile(); + pattern = LikePattern.compile(patternString, getEscapeCharacter(Optional.of("\\"))); } catch (IOException e) { throw new RuntimeException(e); } @@ -115,66 +118,11 @@ public String getPatternString() { return patternString; } - public Pattern getPattern() { + public LikePattern getPattern() { return pattern; } public boolean isNot() { return isNot; } - - /** - * This Method is for un-escaping strings except '\' before special string '%', '_', '\', because - * we need to use '\' to judge whether to replace this to regexp string - */ - private String unescapeString(String value) { - StringBuilder stringBuilder = new StringBuilder(); - for (int i = 0; i < value.length(); i++) { - String ch = String.valueOf(value.charAt(i)); - if ("\\".equals(ch)) { - if (i < value.length() - 1) { - String nextChar = String.valueOf(value.charAt(i + 1)); - if ("%".equals(nextChar) || "_".equals(nextChar) || "\\".equals(nextChar)) { - stringBuilder.append(ch); - } - if ("\\".equals(nextChar)) { - i++; - } - } - } else { - stringBuilder.append(ch); - } - } - return stringBuilder.toString(); - } - - /** - * The main idea of this part comes from - * https://codereview.stackexchange.com/questions/36861/convert-sql-like-to-regex/36864 - */ - private Pattern compile() { - String unescapeValue = unescapeString(patternString); - String specialRegexString = ".^$*+?{}[]|()"; - StringBuilder patternBuilder = new StringBuilder(); - patternBuilder.append("^"); - for (int i = 0; i < unescapeValue.length(); i++) { - String ch = String.valueOf(unescapeValue.charAt(i)); - if (specialRegexString.contains(ch)) { - ch = "\\" + unescapeValue.charAt(i); - } - if (i == 0 - || !"\\".equals(String.valueOf(unescapeValue.charAt(i - 1))) - || i >= 2 - && "\\\\" - .equals( - patternBuilder.substring( - patternBuilder.length() - 2, patternBuilder.length()))) { - patternBuilder.append(ch.replace("%", ".*?").replace("_", ".")); - } else { - patternBuilder.append(ch); - } - } - patternBuilder.append("$"); - return Pattern.compile(patternBuilder.toString()); - } } diff --git a/iotdb-core/node-commons/src/test/java/org/apache/iotdb/commons/schema/SchemaFilterSerDeTest.java b/iotdb-core/node-commons/src/test/java/org/apache/iotdb/commons/schema/SchemaFilterSerDeTest.java index 454978673812..de49472ccee7 100644 --- a/iotdb-core/node-commons/src/test/java/org/apache/iotdb/commons/schema/SchemaFilterSerDeTest.java +++ b/iotdb-core/node-commons/src/test/java/org/apache/iotdb/commons/schema/SchemaFilterSerDeTest.java @@ -43,8 +43,7 @@ import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Collections; - -import static org.apache.tsfile.utils.RegexUtils.parseLikePatternToRegex; +import java.util.Optional; public class SchemaFilterSerDeTest { @@ -62,7 +61,7 @@ public void testSchemaFilter() { new OrFilter(Arrays.asList(viewTypeFilter, pathContainsFilter, tagFilter)); final PreciseFilter preciseFilter = new PreciseFilter("s1"); final InFilter inFilter = new InFilter(Collections.singleton("d1")); - final LikeFilter likeFilter = new LikeFilter(parseLikePatternToRegex("__1")); + final LikeFilter likeFilter = new LikeFilter("__1", Optional.empty()); final IdFilter idFilter = new IdFilter(preciseFilter, 1); final AttributeFilter attributeFilter = new AttributeFilter(likeFilter, "attr"); final ComparisonFilter comparisonFilter = diff --git a/pom.xml b/pom.xml index 88a6e1007abc..cf45ff69e1a1 100644 --- a/pom.xml +++ b/pom.xml @@ -166,7 +166,7 @@ 0.14.1 1.9 1.5.6-3 - 1.2.0-240920-SNAPSHOT + 1.2.0-c3437a41-SNAPSHOT