diff --git a/examples/substrait-spark/.gitignore b/examples/substrait-spark/.gitignore index a6765eee..4eb18b0c 100644 --- a/examples/substrait-spark/.gitignore +++ b/examples/substrait-spark/.gitignore @@ -2,3 +2,4 @@ spark-warehouse derby.log _apps _data +bin diff --git a/examples/substrait-spark/README.md b/examples/substrait-spark/README.md index 495a117d..8cbc4267 100644 --- a/examples/substrait-spark/README.md +++ b/examples/substrait-spark/README.md @@ -95,8 +95,16 @@ The `justfile` has three targets to make it easy to run the examples - `just dataset` runs the Dataset API and produces `spark_dataset_substrait.plan` - `just sql` runs the SQL api and produces `spark_sql_substrait.plan` - `just consume ` runs the specified plan (from the `_data` directory) - - +- run `just` without arguments to get a summary of the targets available. +``` +just +Available recipes: + buildapp # Builds the application into a JAR file + consume arg # Consumes the Substrait plan file passed as the argument + dataset # Runs a Spark dataset api query and produces a Substrait plan + spark # Starts a simple Spark cluster locally in docker + sql # Runs a Spark SQL api query and produces a Substrait plan +``` ## Creating a Substrait Plan @@ -266,7 +274,7 @@ The `io.substrait.plan.Plan` object is a high-level Substrait POJO representing For the dataset approach, the `spark_dataset_substrait.plan` is created, and for the SQL approach the `spark_sql_substrait.plan` is created. These Intermediate Representations of the query can be saved, transferred and reloaded into a Data Engine. -We can also review the Substrait plan's structure; the canonical format of the Substrait plan is the binary protobuf format, but it's possible to produce a textual version, an example is below. Both the Substrait plans from the Dataset or SQL APIs generate the same output. +We can also review the Substrait plan's structure; the canonical format of the Substrait plan is the binary protobuf format, but it's possible to produce a textual version, an example is below (please see the [SubstraitStringify utility class](./src/main/java/io/substrait/examples/util/SubstraitStringify.java); it's also a good example of how to use some if the vistor patterns). Both the Substrait plans from the Dataset or SQL APIs generate the same output. ``` @@ -318,7 +326,9 @@ Loading the binary protobuf file is the reverse of the writing process (in the c ProtoPlanConverter protoToPlan = new ProtoPlanConverter(); Plan plan = protoToPlan.from(proto); ``` -The loaded byte array is first converted into the protobuf Plan, and then into the Substrait Plan object. Note it can be useful to name the variables, and/or use the pull class names to keep track of it's the ProtoBuf Plan or the high-level POJO Plan. + +The loaded byte array is first converted into the protobuf Plan, and then into the Substrait Plan object. Note it can be useful to name the variables, and/or use the full class names to keep track of it's the ProtoBuf Plan or the high-level POJO Plan. For example `io.substrait.proto.Plan` or `io.substrait.Plan` + Finally this can be converted to a Spark Plan: @@ -395,6 +405,9 @@ When converted to Substrait the Sort and Aggregate is in the same order, but the +- Aggregate:: FieldRef#/Str/StructField{offset=0} ``` +These look different due to two factors. Firstly the Spark optimizer has swapped the project and aggregate functions. +Secondly projects within the Substrait plan joined the fields together but don't reduce the number of fields. Any such filtering is done on the outer relations. + ### Inner Join Spark's inner join is taking as inputs the two filtered relations; it's ensuring the join key is not null but also the `test_result==p` check. @@ -408,7 +421,7 @@ Spark's inner join is taking as inputs the two filtered relations; it's ensuring +- Filter ((isnotnull(test_result#14) AND (test_result#14 = P)) AND isnotnull(vehicle_id#10)) ``` -The Substrait Representation looks longer, but is showing the same structure. (note that this format is a custom format implemented as [SubstraitStingify](...) as the standard text output can be hard to read). +The Substrait Representation looks longer, but is showing the same structure. (note that this format is a custom format implemented as [SubstraitStingify](./src/main/java/io/substrait/examples/util/SubstraitStringify.java) as the standard text output can be hard to read). ``` +- Join:: INNER equal:any_any diff --git a/examples/substrait-spark/justfile b/examples/substrait-spark/justfile index 6fcad766..37ed425c 100644 --- a/examples/substrait-spark/justfile +++ b/examples/substrait-spark/justfile @@ -15,6 +15,7 @@ SPARK_MASTER_CONTAINER := "substrait-spark-spark-1" _default: @just -f {{justfile()}} --list +# Builds the application into a JAR file buildapp: #!/bin/bash set -e -o pipefail @@ -28,25 +29,28 @@ buildapp: cp ${CWDIR}/build/libs/substrait-spark*.jar ${CWDIR}/_apps/app.jar cp ${CWDIR}/src/main/resources/*.csv ${CWDIR}/_data +# Runs a Spark dataset api query and produces a Substrait plan dataset: #!/bin/bash set -e -o pipefail docker exec -it ${SPARK_MASTER_CONTAINER} bash -c "/opt/bitnami/spark/bin/spark-submit --master spark://${SPARK_MASTER_CONTAINER}:7077 --driver-memory 1G --executor-memory 1G /opt/spark-apps/app.jar SparkDataset" +# Runs a Spark SQL api query and produces a Substrait plan sql: #!/bin/bash set -e -o pipefail docker exec -it ${SPARK_MASTER_CONTAINER} bash -c "/opt/bitnami/spark/bin/spark-submit --master spark://${SPARK_MASTER_CONTAINER}:7077 --driver-memory 1G --executor-memory 1G /opt/spark-apps/app.jar SparkSQL" +# Consumes the Substrait plan file passed as the argument consume arg: #!/bin/bash set -e -o pipefail docker exec -it ${SPARK_MASTER_CONTAINER} bash -c "/opt/bitnami/spark/bin/spark-submit --master spark://${SPARK_MASTER_CONTAINER}:7077 --driver-memory 1G --executor-memory 1G /opt/spark-apps/app.jar SparkConsumeSubstrait {{arg}}" - +# Starts a simple Spark cluster locally in docker spark: #!/bin/bash set -e -o pipefail diff --git a/examples/substrait-spark/src/main/java/io/substrait/examples/SparkConsumeSubstrait.java b/examples/substrait-spark/src/main/java/io/substrait/examples/SparkConsumeSubstrait.java index 5f5436b0..26c15274 100644 --- a/examples/substrait-spark/src/main/java/io/substrait/examples/SparkConsumeSubstrait.java +++ b/examples/substrait-spark/src/main/java/io/substrait/examples/SparkConsumeSubstrait.java @@ -2,6 +2,7 @@ import static io.substrait.examples.SparkHelper.ROOT_DIR; +import io.substrait.examples.util.SubstraitStringify; import io.substrait.plan.Plan; import io.substrait.plan.ProtoPlanConverter; import io.substrait.spark.logical.ToLogicalPlan; @@ -28,6 +29,8 @@ public void run(String arg) { ProtoPlanConverter protoToPlan = new ProtoPlanConverter(); Plan plan = protoToPlan.from(proto); + SubstraitStringify.explain(plan).forEach(System.out::println); + ToLogicalPlan substraitConverter = new ToLogicalPlan(spark); LogicalPlan sparkPlan = substraitConverter.convert(plan); diff --git a/examples/substrait-spark/src/main/java/io/substrait/examples/SparkDataset.java b/examples/substrait-spark/src/main/java/io/substrait/examples/SparkDataset.java index d5d4a488..81de54b0 100644 --- a/examples/substrait-spark/src/main/java/io/substrait/examples/SparkDataset.java +++ b/examples/substrait-spark/src/main/java/io/substrait/examples/SparkDataset.java @@ -4,6 +4,7 @@ import static io.substrait.examples.SparkHelper.TESTS_CSV; import static io.substrait.examples.SparkHelper.VEHICLES_CSV; +import io.substrait.examples.util.SubstraitStringify; import io.substrait.plan.PlanProtoConverter; import io.substrait.spark.logical.ToSubstraitRel; import java.io.IOException; @@ -70,7 +71,7 @@ public void createSubstrait(LogicalPlan enginePlan) { ToSubstraitRel toSubstrait = new ToSubstraitRel(); io.substrait.plan.Plan plan = toSubstrait.convert(enginePlan); - System.out.println(plan); + SubstraitStringify.explain(plan).forEach(System.out::println); PlanProtoConverter planToProto = new PlanProtoConverter(); byte[] buffer = planToProto.toProto(plan).toByteArray(); diff --git a/examples/substrait-spark/src/main/java/io/substrait/examples/SparkHelper.java b/examples/substrait-spark/src/main/java/io/substrait/examples/SparkHelper.java index f7514055..4f77a9c1 100644 --- a/examples/substrait-spark/src/main/java/io/substrait/examples/SparkHelper.java +++ b/examples/substrait-spark/src/main/java/io/substrait/examples/SparkHelper.java @@ -7,21 +7,12 @@ public final class SparkHelper { private SparkHelper() {} - /** Namespace to use for the data */ - public static final String NAMESPACE = "demo_db"; - /** Vehicles table */ public static final String VEHICLE_TABLE = "vehicles"; /** Tests table (the vehicle safety tests) */ public static final String TESTS_TABLE = "tests"; - /** Source data - parquet */ - public static final String VEHICLES_PQ = "vehicles_subset_2023.parquet"; - - /** Source data - parquet */ - public static final String TESTS_PQ = "tests_subset_2023.parquet"; - /** Source data - csv */ public static final String VEHICLES_CSV = "vehicles_subset_2023.csv"; @@ -31,26 +22,6 @@ private SparkHelper() {} /** In-container data location */ public static final String ROOT_DIR = "/opt/spark-data"; - /** - * Connect to local spark for demo purposes - * - * @param sparkMaster address of the Spark Master to connect to - * @return SparkSession - */ - public static SparkSession connectSpark(String sparkMaster) { - - SparkSession spark = - SparkSession.builder() - // .config("spark.sql.warehouse.dir", "spark-warehouse") - .config("spark.master", sparkMaster) - .enableHiveSupport() - .getOrCreate(); - - spark.sparkContext().setLogLevel("ERROR"); - - return spark; - } - /** * Connects to the local spark cluister * diff --git a/examples/substrait-spark/src/main/java/io/substrait/examples/SparkSQL.java b/examples/substrait-spark/src/main/java/io/substrait/examples/SparkSQL.java index a927d079..fc4c8fec 100644 --- a/examples/substrait-spark/src/main/java/io/substrait/examples/SparkSQL.java +++ b/examples/substrait-spark/src/main/java/io/substrait/examples/SparkSQL.java @@ -6,6 +6,7 @@ import static io.substrait.examples.SparkHelper.VEHICLES_CSV; import static io.substrait.examples.SparkHelper.VEHICLE_TABLE; +import io.substrait.examples.util.SubstraitStringify; import io.substrait.plan.PlanProtoConverter; import io.substrait.spark.logical.ToSubstraitRel; import java.io.IOException; @@ -76,7 +77,8 @@ public void run(String arg) { public void createSubstrait(LogicalPlan enginePlan) { ToSubstraitRel toSubstrait = new ToSubstraitRel(); io.substrait.plan.Plan plan = toSubstrait.convert(enginePlan); - System.out.println(plan); + + SubstraitStringify.explain(plan).forEach(System.out::println); PlanProtoConverter planToProto = new PlanProtoConverter(); byte[] buffer = planToProto.toProto(plan).toByteArray(); diff --git a/examples/substrait-spark/src/main/java/io/substrait/examples/util/ExpressionStringify.java b/examples/substrait-spark/src/main/java/io/substrait/examples/util/ExpressionStringify.java new file mode 100644 index 00000000..6d0ddf69 --- /dev/null +++ b/examples/substrait-spark/src/main/java/io/substrait/examples/util/ExpressionStringify.java @@ -0,0 +1,276 @@ +package io.substrait.examples.util; + +import io.substrait.expression.Expression.BinaryLiteral; +import io.substrait.expression.Expression.BoolLiteral; +import io.substrait.expression.Expression.Cast; +import io.substrait.expression.Expression.DateLiteral; +import io.substrait.expression.Expression.DecimalLiteral; +import io.substrait.expression.Expression.EmptyListLiteral; +import io.substrait.expression.Expression.FP32Literal; +import io.substrait.expression.Expression.FP64Literal; +import io.substrait.expression.Expression.FixedBinaryLiteral; +import io.substrait.expression.Expression.FixedCharLiteral; +import io.substrait.expression.Expression.I16Literal; +import io.substrait.expression.Expression.I32Literal; +import io.substrait.expression.Expression.I64Literal; +import io.substrait.expression.Expression.I8Literal; +import io.substrait.expression.Expression.IfThen; +import io.substrait.expression.Expression.InPredicate; +import io.substrait.expression.Expression.IntervalDayLiteral; +import io.substrait.expression.Expression.IntervalYearLiteral; +import io.substrait.expression.Expression.ListLiteral; +import io.substrait.expression.Expression.MapLiteral; +import io.substrait.expression.Expression.MultiOrList; +import io.substrait.expression.Expression.NullLiteral; +import io.substrait.expression.Expression.ScalarFunctionInvocation; +import io.substrait.expression.Expression.ScalarSubquery; +import io.substrait.expression.Expression.SetPredicate; +import io.substrait.expression.Expression.SingleOrList; +import io.substrait.expression.Expression.StrLiteral; +import io.substrait.expression.Expression.StructLiteral; +import io.substrait.expression.Expression.Switch; +import io.substrait.expression.Expression.TimeLiteral; +import io.substrait.expression.Expression.TimestampLiteral; +import io.substrait.expression.Expression.TimestampTZLiteral; +import io.substrait.expression.Expression.UUIDLiteral; +import io.substrait.expression.Expression.UserDefinedLiteral; +import io.substrait.expression.Expression.VarCharLiteral; +import io.substrait.expression.Expression.WindowFunctionInvocation; +import io.substrait.expression.ExpressionVisitor; +import io.substrait.expression.FieldReference; + +/** ExpressionStringify gives a simple debug text output for Expressions */ +public class ExpressionStringify extends ParentStringify + implements ExpressionVisitor { + + public ExpressionStringify(int indent) { + super(indent); + } + + @Override + public String visit(NullLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(BoolLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(I8Literal expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(I16Literal expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(I32Literal expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(I64Literal expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(FP32Literal expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(FP64Literal expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(StrLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(BinaryLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(TimeLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(DateLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(TimestampLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(TimestampTZLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(IntervalYearLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(IntervalDayLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(UUIDLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(FixedCharLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(VarCharLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(FixedBinaryLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(DecimalLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(MapLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(ListLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(EmptyListLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(StructLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(UserDefinedLiteral expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(Switch expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(IfThen expr) throws RuntimeException { + return ""; + } + + @Override + public String visit(ScalarFunctionInvocation expr) throws RuntimeException { + var sb = new StringBuilder(""); + + sb.append(expr.declaration()); + // sb.append(" ("); + var args = expr.arguments(); + for (var i = 0; i < args.size(); i++) { + var arg = args.get(i); + sb.append(getContinuationIndentString()); + sb.append("arg" + i + " = "); + var funcArgVisitor = new FunctionArgStringify(indent); + + sb.append(arg.accept(expr.declaration(), i, funcArgVisitor)); + sb.append(" "); + } + return sb.toString(); + } + + @Override + public String visit(WindowFunctionInvocation expr) throws RuntimeException { + var sb = new StringBuilder("WindowFunctionInvocation#"); + + return sb.toString(); + } + + @Override + public String visit(Cast expr) throws RuntimeException { + var sb = new StringBuilder(""); + return sb.toString(); + } + + @Override + public String visit(SingleOrList expr) throws RuntimeException { + var sb = new StringBuilder("SingleOrList#"); + + return sb.toString(); + } + + @Override + public String visit(MultiOrList expr) throws RuntimeException { + var sb = new StringBuilder("Cast#"); + + return sb.toString(); + } + + @Override + public String visit(FieldReference expr) throws RuntimeException { + StringBuilder sb = new StringBuilder("FieldRef#"); + var type = expr.getType(); + // sb.append(expr.inputExpression()); + sb.append("/").append(type.accept(new TypeStringify(indent))).append("/"); + expr.segments() + .forEach( + s -> { + sb.append(s).append(" "); + }); + + return sb.toString(); + } + + @Override + public String visit(SetPredicate expr) throws RuntimeException { + var sb = new StringBuilder("SetPredicate#"); + + return sb.toString(); + } + + @Override + public String visit(ScalarSubquery expr) throws RuntimeException { + var sb = new StringBuilder("ScalarSubquery#"); + + return sb.toString(); + } + + @Override + public String visit(InPredicate expr) throws RuntimeException { + var sb = new StringBuilder("InPredicate#"); + + return sb.toString(); + } +} diff --git a/examples/substrait-spark/src/main/java/io/substrait/examples/util/FunctionArgStringify.java b/examples/substrait-spark/src/main/java/io/substrait/examples/util/FunctionArgStringify.java new file mode 100644 index 00000000..d910a17a --- /dev/null +++ b/examples/substrait-spark/src/main/java/io/substrait/examples/util/FunctionArgStringify.java @@ -0,0 +1,31 @@ +package io.substrait.examples.util; + +import io.substrait.expression.EnumArg; +import io.substrait.expression.Expression; +import io.substrait.expression.FunctionArg.FuncArgVisitor; +import io.substrait.extension.SimpleExtension.Function; +import io.substrait.type.Type; + +/** FunctionArgStrngify produces a simple debug string for Funcation Arguments */ +public class FunctionArgStringify extends ParentStringify + implements FuncArgVisitor { + + public FunctionArgStringify(int indent) { + super(indent); + } + + @Override + public String visitExpr(Function fnDef, int argIdx, Expression e) throws RuntimeException { + return e.accept(new ExpressionStringify(indent + 1)); + } + + @Override + public String visitType(Function fnDef, int argIdx, Type t) throws RuntimeException { + return t.accept(new TypeStringify(indent)); + } + + @Override + public String visitEnumArg(Function fnDef, int argIdx, EnumArg e) throws RuntimeException { + return e.toString(); + } +} diff --git a/examples/substrait-spark/src/main/java/io/substrait/examples/util/ParentStringify.java b/examples/substrait-spark/src/main/java/io/substrait/examples/util/ParentStringify.java new file mode 100644 index 00000000..ace9fb9a --- /dev/null +++ b/examples/substrait-spark/src/main/java/io/substrait/examples/util/ParentStringify.java @@ -0,0 +1,57 @@ +package io.substrait.examples.util; + +/** + * Parent class of all stringifiers Created as it seemed there could be a an optimization to share + * formatting fns between the various stringifiers + */ +public class ParentStringify { + + protected String indentChar = " "; + protected int indent = 0; + protected int indentSize = 3; + + /** + * Build with a specific indent at the start - note 'an indent' is set by default to be 3 spaces. + * + * @param indent number of indentes + */ + public ParentStringify(int indent) { + this.indent = indent; + } + + StringBuilder getIndent() { + + var sb = new StringBuilder(); + if (indent != 0) { + sb.append("\n"); + } + sb.append(getIndentString()); + + indent++; + return sb; + } + + StringBuilder getIndentString() { + + var sb = new StringBuilder(); + sb.append(indentChar.repeat(this.indent * this.indentSize)); + sb.append("+- "); + return sb; + } + + StringBuilder getContinuationIndentString() { + + var sb = new StringBuilder(); + if (indent != 0) { + sb.append("\n"); + } + sb.append(indentChar.repeat(this.indent * this.indentSize)); + sb.append(" : "); + return sb; + } + + protected String getOutdent(StringBuilder sb) { + indent--; + return (sb).toString(); + } +} diff --git a/examples/substrait-spark/src/main/java/io/substrait/examples/util/SubstraitStringify.java b/examples/substrait-spark/src/main/java/io/substrait/examples/util/SubstraitStringify.java new file mode 100644 index 00000000..070d64bb --- /dev/null +++ b/examples/substrait-spark/src/main/java/io/substrait/examples/util/SubstraitStringify.java @@ -0,0 +1,336 @@ +package io.substrait.examples.util; + +import io.substrait.relation.Aggregate; +import io.substrait.relation.ConsistentPartitionWindow; +import io.substrait.relation.Cross; +import io.substrait.relation.EmptyScan; +import io.substrait.relation.ExtensionLeaf; +import io.substrait.relation.ExtensionMulti; +import io.substrait.relation.ExtensionSingle; +import io.substrait.relation.ExtensionTable; +import io.substrait.relation.Fetch; +import io.substrait.relation.Filter; +import io.substrait.relation.Join; +import io.substrait.relation.LocalFiles; +import io.substrait.relation.NamedScan; +import io.substrait.relation.Project; +import io.substrait.relation.Rel; +import io.substrait.relation.RelVisitor; +import io.substrait.relation.Set; +import io.substrait.relation.Sort; +import io.substrait.relation.VirtualTableScan; +import io.substrait.relation.physical.HashJoin; +import io.substrait.relation.physical.MergeJoin; +import io.substrait.relation.physical.NestedLoopJoin; +import io.substrait.type.NamedStruct; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** + * SubstraitStringify produces a string format output of the Substrait plan or relation + * + *

This is intended for debug and development purposes only, and follows a similar style to the + * `explain` API in libraries suck as Spark Calcite etc. + * + *

Usage: + * + *

+ * io.substrait.plan.Plan plan = toSubstrait.convert(enginePlan);
+ * SubstraitStringify.explain(plan).forEach(System.out::println);
+ * 
+ * + * There is scope for improving this output; there are some gaps in the lesser used relations This + * is not a replacement for any canoncial form and is only for ease of debugging + */ +public class SubstraitStringify extends ParentStringify + implements RelVisitor { + + public SubstraitStringify() { + super(0); + } + + /** + * Explains the Sustrait plan + * + * @param plan Subsrait plan + * @return List of strings; typically these would then be logged or sent to stdout + */ + public static List explain(io.substrait.plan.Plan plan) { + var explanations = new ArrayList(); + explanations.add(""); + + plan.getRoots() + .forEach( + root -> { + var rel = root.getInput(); + + explanations.add("Root:: " + rel.getClass().getSimpleName() + " " + root.getNames()); + explanations.addAll(explain(rel)); + }); + + return explanations; + } + + /** + * Explains the Sustrait relation + * + * @param plan Subsrait relation + * @return List of strings; typically these would then be logged or sent to stdout + */ + public static List explain(io.substrait.relation.Rel rel) { + var s = new SubstraitStringify(); + + List explanation = new ArrayList(); + explanation.add(""); + explanation.addAll(Arrays.asList(rel.accept(s).split("\n"))); + return explanation; + } + + private boolean showRemap = false; + + private List fieldList(List fields) { + return fields.stream().map(t -> t.accept(new TypeStringify(0))).collect(Collectors.toList()); + } + + private String getRemap(Rel rel) { + if (!showRemap) { + return ""; + } + var fieldCount = rel.getRecordType().fields().size(); + var remap = rel.getRemap(); + var recordType = fieldList(rel.getRecordType().fields()); + + if (remap.isPresent()) { + return "/Remapping fields (" + + fieldCount + + ") " + + remap.get().indices() + + " as " + + recordType + + "/ "; + } else { + return "/No Remap (" + fieldCount + ") " + recordType + "/ "; + } + } + + @Override + public String visit(Aggregate aggregate) throws RuntimeException { + StringBuilder sb = getIndent().append("Aggregate:: ").append(getRemap(aggregate)); + aggregate + .getGroupings() + .forEach( + g -> { + g.getExpressions() + .forEach( + expr -> { + sb.append(expr.accept(new ExpressionStringify(this.indent))); + }); + }); + aggregate + .getInputs() + .forEach( + s -> { + sb.append(s.accept(this)); + }); + aggregate.getRemap().ifPresent(s -> sb.append(s.toString())); + + return getOutdent(sb); + } + + @Override + public String visit(EmptyScan emptyScan) throws RuntimeException { + var sb = new StringBuilder("EmptyScan:: ").append(getRemap(emptyScan)); + // sb.append(emptyScan.accept(this)); + return getOutdent(sb); + } + + @Override + public String visit(Fetch fetch) throws RuntimeException { + var sb = new StringBuilder("Fetch:: "); + // sb.append(fetch.accept(this)); + return getOutdent(sb); + } + + @Override + public String visit(Filter filter) throws RuntimeException { + var sb = getIndent().append("Filter:: ").append(getRemap(filter)); + // .append("{ "); + sb.append(filter.getCondition().accept(new ExpressionStringify(indent))) /* .append(")") */; + filter + .getInputs() + .forEach( + i -> { + sb.append(i.accept(this)); + }); + + return getOutdent(sb); + } + + @Override + public String visit(Join join) throws RuntimeException { + + var sb = + getIndent().append("Join:: ").append(join.getJoinType()).append(" ").append(getRemap(join)); + + if (join.getCondition().isPresent()) { + sb.append(join.getCondition().get().accept(new ExpressionStringify(indent))); + } + + sb.append(join.getLeft().accept(this)); + sb.append(join.getRight().accept(this)); + + return getOutdent(sb); + } + + @Override + public String visit(Set set) throws RuntimeException { + StringBuilder sb = getIndent().append("Set:: "); + return getOutdent(sb); + } + + @Override + public String visit(NamedScan namedScan) throws RuntimeException { + + StringBuilder sb = getIndent().append("NamedScan:: ").append(getRemap(namedScan)); + namedScan + .getInputs() + .forEach( + i -> { + sb.append(i.accept(this)); + }); + sb.append(" Tables="); + sb.append(namedScan.getNames()); + sb.append(" Fields="); + sb.append(namedStruct(namedScan.getInitialSchema())); + return getOutdent(sb); + } + + private String namedStruct(NamedStruct struct) { + var sb = new StringBuilder(); + + var names = struct.names(); + var types = fieldList(struct.struct().fields()); + + for (var x = 0; x < names.size(); x++) { + if (x != 0) { + sb.append(","); + } + sb.append(names.get(x)).append("[").append(types.get(x)).append("]"); + } + + return sb.toString(); + } + + @Override + public String visit(LocalFiles localFiles) throws RuntimeException { + StringBuilder sb = getIndent().append("LocalFiles:: "); + + for (var i : localFiles.getItems()) { + sb.append(getContinuationIndentString()); + var fileFormat = ""; + if (i.getFileFormat().isPresent()) { + fileFormat = i.getFileFormat().get().toString(); + } + + sb.append( + String.format( + "%s %s len=%d partition=%d start=%d", + fileFormat, i.getPath().get(), i.getLength(), i.getPartitionIndex(), i.getStart())); + } + + return getOutdent(sb); + } + + @Override + public String visit(Project project) throws RuntimeException { + StringBuilder sb = getIndent().append("Project:: ").append(getRemap(project)); + + sb.append(fieldList(project.deriveRecordType().fields())); + + var inputs = project.getInputs(); + inputs.forEach( + i -> { + sb.append(i.accept(this)); + }); + return getOutdent(sb); + } + + @Override + public String visit(Sort sort) throws RuntimeException { + StringBuilder sb = getIndent().append("Sort:: ").append(getRemap(sort)); + sort.getSortFields() + .forEach( + sf -> { + var expr = new ExpressionStringify(indent); + sb.append(sf.expr().accept(expr)).append(" ").append(sf.direction()); + }); + var inputs = sort.getInputs(); + inputs.forEach( + i -> { + sb.append(i.accept(this)); + }); + return getOutdent(sb); + } + + @Override + public String visit(Cross cross) throws RuntimeException { + StringBuilder sb = getIndent().append("Cross:: "); + return getOutdent(sb); + } + + @Override + public String visit(VirtualTableScan virtualTableScan) throws RuntimeException { + StringBuilder sb = getIndent().append("VirtualTableScan:: "); + return getOutdent(sb); + } + + @Override + public String visit(ExtensionLeaf extensionLeaf) throws RuntimeException { + StringBuilder sb = getIndent().append("extensionLeaf:: "); + return getOutdent(sb); + } + + @Override + public String visit(ExtensionSingle extensionSingle) throws RuntimeException { + StringBuilder sb = getIndent().append("extensionSingle:: "); + return getOutdent(sb); + } + + @Override + public String visit(ExtensionMulti extensionMulti) throws RuntimeException { + StringBuilder sb = getIndent().append("extensionMulti:: "); + return getOutdent(sb); + } + + @Override + public String visit(ExtensionTable extensionTable) throws RuntimeException { + StringBuilder sb = getIndent().append("extensionTable:: "); + return getOutdent(sb); + } + + @Override + public String visit(HashJoin hashJoin) throws RuntimeException { + StringBuilder sb = getIndent().append("hashJoin:: "); + return getOutdent(sb); + } + + @Override + public String visit(MergeJoin mergeJoin) throws RuntimeException { + StringBuilder sb = getIndent().append("mergeJoin:: "); + return getOutdent(sb); + } + + @Override + public String visit(NestedLoopJoin nestedLoopJoin) throws RuntimeException { + StringBuilder sb = getIndent().append("nestedLoopJoin:: "); + return getOutdent(sb); + } + + @Override + public String visit(ConsistentPartitionWindow consistentPartitionWindow) throws RuntimeException { + StringBuilder sb = getIndent().append("consistentPartitionWindow:: "); + return getOutdent(sb); + } +} diff --git a/examples/substrait-spark/src/main/java/io/substrait/examples/util/TypeStringify.java b/examples/substrait-spark/src/main/java/io/substrait/examples/util/TypeStringify.java new file mode 100644 index 00000000..d17a7070 --- /dev/null +++ b/examples/substrait-spark/src/main/java/io/substrait/examples/util/TypeStringify.java @@ -0,0 +1,175 @@ +package io.substrait.examples.util; + +import io.substrait.type.Type; +import io.substrait.type.Type.Binary; +import io.substrait.type.Type.Bool; +import io.substrait.type.Type.Date; +import io.substrait.type.Type.Decimal; +import io.substrait.type.Type.FP32; +import io.substrait.type.Type.FP64; +import io.substrait.type.Type.FixedBinary; +import io.substrait.type.Type.FixedChar; +import io.substrait.type.Type.I16; +import io.substrait.type.Type.I32; +import io.substrait.type.Type.I64; +import io.substrait.type.Type.I8; +import io.substrait.type.Type.IntervalDay; +import io.substrait.type.Type.IntervalYear; +import io.substrait.type.Type.ListType; +import io.substrait.type.Type.Map; +import io.substrait.type.Type.Str; +import io.substrait.type.Type.Struct; +import io.substrait.type.Type.Time; +import io.substrait.type.Type.Timestamp; +import io.substrait.type.Type.TimestampTZ; +import io.substrait.type.Type.UUID; +import io.substrait.type.Type.UserDefined; +import io.substrait.type.Type.VarChar; +import io.substrait.type.TypeVisitor; + +/** TypeStrinify produces a simple debug string of Substrait types */ +public class TypeStringify extends ParentStringify + implements TypeVisitor { + + protected TypeStringify(int indent) { + super(indent); + } + + @Override + public String visit(I64 type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Bool type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(I8 type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(I16 type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(I32 type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(FP32 type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(FP64 type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Str type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Binary type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Date type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Time type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + @Deprecated + public String visit(TimestampTZ type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + @Deprecated + public String visit(Timestamp type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Type.PrecisionTimestamp type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Type.PrecisionTimestampTZ type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(IntervalYear type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(IntervalDay type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(UUID type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(FixedChar type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(VarChar type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(FixedBinary type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Decimal type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Struct type) throws RuntimeException { + var sb = new StringBuffer(type.getClass().getSimpleName()); + type.fields() + .forEach( + f -> { + sb.append(f.accept(this)); + }); + return sb.toString(); + } + + @Override + public String visit(ListType type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(Map type) throws RuntimeException { + return type.getClass().getSimpleName(); + } + + @Override + public String visit(UserDefined type) throws RuntimeException { + return type.getClass().getSimpleName(); + } +}