diff --git a/cinn/auto_schedule/search_space/auto_gen_rule/CMakeLists.txt b/cinn/auto_schedule/search_space/auto_gen_rule/CMakeLists.txt index 521b23a1b7..5105ed66dc 100644 --- a/cinn/auto_schedule/search_space/auto_gen_rule/CMakeLists.txt +++ b/cinn/auto_schedule/search_space/auto_gen_rule/CMakeLists.txt @@ -19,6 +19,6 @@ if (WITH_CUDA) nv_test(test_multi_level_tiling SRCS multi_level_tiling_test.cc DEPS cinncore auto_gen_rule_test_helper test_program_builder) endif() -cc_test(test_auto_inline SRCS auto_inline_test.cc DEPS cinncore) +cc_test(test_auto_inline SRCS auto_inline_test.cc DEPS cinncore auto_gen_rule_test_helper) cc_test(test_skip_rule SRCS skip_rule_test.cc DEPS cinncore) cc_test(test_auto_unroll SRCS auto_unroll_test.cc DEPS cinncore) diff --git a/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline_test.cc b/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline_test.cc index 0334162520..061c3cd667 100644 --- a/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline_test.cc +++ b/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline_test.cc @@ -22,6 +22,7 @@ #include #include "cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.h" +#include "cinn/auto_schedule/search_space/auto_gen_rule/test_helper.h" #include "cinn/cinn.h" #include "cinn/frontend/net_builder.h" #include "cinn/hlir/framework/op_lowering.h" @@ -37,6 +38,7 @@ #include "cinn/poly/stage.h" #include "cinn/runtime/flags.h" #include "cinn/utils/string.h" +#include "tests/concrete_program_builder.h" DECLARE_bool(cinn_ir_schedule); @@ -239,5 +241,253 @@ TEST(AutoInline, AddReluInline) { EXPECT_EQ(auto_inline.AnalyseApplyType(new_states[0], "var_2"), RuleApplyType::kCannotApply); } +#ifdef CINN_WITH_CUDA +class TestAutoInline : public TestAutoGenRuleBase {}; + +/* The single chain graph composed of multiple blocks can be inlined into one. + * + * Before AutoInline: The output of the previous block is the input of another block. + * Loop1: + * x1 = Add() + * Loop2: + * x2 = Multiply(x1) + * Loop3: + * x3 = Add(x2) + * Loop4: + * x4 = Relu(x3) + * + * After AutoInline: All loops are inlined into a loop. + * Loop: + * Add(Multiply(Add(Relu()))) + */ +TEST_F(TestAutoInline, SingleChain) { + Target target = common::DefaultNVGPUTarget(); + Initialize(target); + std::vector input_names = {"bias", "conv_output", "bn_scale", "bn_offset"}; + std::vector output_names = {"var_6", "var_5", "var_1", "var", "var_0", "var_4", "var_3"}; + std::vector conv_output_shape = {1, 512, 56, 56}; + int32_t channel = conv_output_shape[1]; + std::vector inputs_varinfo({{"conv_output", conv_output_shape}, + {"bias", {channel, 1, 1}}, + {"bn_scale", {channel, 1, 1}}, + {"bn_offset", {channel, 1, 1}}}); + + // Construct the computation graph and convert it to ir::Expr + Context::Global().ResetNameId(); + ir::IRSchedule ir_schedule = MakeIRSchedule(tests::BiasBnReLUBuilder().Build(inputs_varinfo)); + SearchState state(ir_schedule, 0, {}); + std::vector func_bodys = ir_schedule.GetModule().GetExprs(); + ASSERT_EQ(func_bodys.size(), 1UL); + VLOG(6) << "Original Expr:\n" << func_bodys[0]; + + // Apply AutoInline for every block that can be inline + AutoInline auto_inline(target_, {output_names.front()}); + EXPECT_EQ(auto_inline.AnalyseApplyType(state, "var_3"), RuleApplyType::kApplyAndPruneOtherRules); + auto new_states = auto_inline.ApplyOnBlock(state, "var_3"); + std::vector inline_block_names({"var_4", "var_5", "var_6", "var", "var_0", "var_1"}); + for (const auto& inline_block_name : inline_block_names) { + new_states = auto_inline.ApplyOnBlock(new_states[0], inline_block_name); + } + std::vector exprs = new_states[0]->ir_schedule.GetModule().GetExprs(); + EXPECT_EQ(exprs.size(), 1UL); + VLOG(6) << "Expr after AutoInline applied on block: " << exprs[0]; + + // build ir::Module and debug source code + auto build_module_auto = BuildIRModule(new_states[0]->ir_schedule); + auto build_module_manually = + BuildIRModule(MakeIRSchedule(tests::BiasBnReLUBuilder().Build(inputs_varinfo), -1, true)); + auto source_code_auto = GenSourceCode(build_module_auto); + VLOG(6) << " auto-schedule source code:\n" << source_code_auto; + auto source_code_manually = GenSourceCode(build_module_manually); + VLOG(6) << " manually-schedule source code:\n" << source_code_manually; + + CheckResult(GenExecutableKernel(build_module_auto), + GenExecutableKernel(build_module_manually), + input_names, + output_names, + {{conv_output_shape[1], 1, 1}, conv_output_shape, conv_output_shape, conv_output_shape}, + {conv_output_shape, {1}, {1}, {1}, {1}, {1}, {1}}, + target); +} + +/* An op can be inlined into multiple consumers at the same time. + * + * Before AutoInline: The output of Exp is used by Add and Multiply. + * Loop1: + * x = Exp() + * Loop2: + * y = Add(x) + * Loop3: + * z = Multiply(x) + * + * After AutoInline: Exp is inlined into Add and Multiply. + * Loop: + * y = Add(Exp()) + * z = Multiply(Exp()) + */ +TEST_F(TestAutoInline, InlineToMultiConsumers) { + Target target = common::DefaultNVGPUTarget(); + Initialize(target); + std::vector input_names = {"x"}; + std::vector output_names = {"var_2", "var_1", "var_0"}; + std::vector input_shape{256, 256}; + std::vector inputs_varinfo({{"x", input_shape}}); + + // Construct the computation graph and convert it to ir::Expr + Context::Global().ResetNameId(); + ir::IRSchedule ir_schedule = MakeIRSchedule(tests::ExpTwoConsumersOpBuilder().Build(inputs_varinfo)); + SearchState state(ir_schedule, 0, {}); + std::vector func_bodys = ir_schedule.GetModule().GetExprs(); + ASSERT_EQ(func_bodys.size(), 1UL); + VLOG(6) << "Original Expr:\n" << func_bodys[0]; + + // Apply AutoInline for every block that can be inline + AutoInline auto_inline(target_, {output_names.front()}); + EXPECT_EQ(auto_inline.AnalyseApplyType(state, "var_0"), RuleApplyType::kApplyAndPruneOtherRules); + auto new_states = auto_inline.ApplyOnBlock(state, "var_1"); + new_states = auto_inline.ApplyOnBlock(state, "var_0"); + std::vector exprs = new_states[0]->ir_schedule.GetModule().GetExprs(); + EXPECT_EQ(exprs.size(), 1UL); + VLOG(6) << "Expr after AutoInline applied on block: " << exprs[0]; + + // build ir::Module and debug source code + auto build_module_auto = BuildIRModule(new_states[0]->ir_schedule); + auto build_module_manually = + BuildIRModule(MakeIRSchedule(tests::ExpTwoConsumersOpBuilder().Build(inputs_varinfo), -1, true)); + auto source_code_auto = GenSourceCode(build_module_auto); + VLOG(6) << " auto-schedule source code:\n" << source_code_auto; + auto source_code_manually = GenSourceCode(build_module_manually); + VLOG(6) << " manually-schedule source code:\n" << source_code_manually; + + CheckResult(GenExecutableKernel(build_module_auto), + GenExecutableKernel(build_module_manually), + input_names, + output_names, + {input_shape}, + {input_shape, {1}, {1}}, + target); +} + +/* Operators of type elementwise or injective can all be inlined. + * + * Before AutoInline: A graph of Gather, Add and Subtract + * Loop1: + * x1 = Gather() + * Loop2: + * x2 = Add(x1) + * Loop3: + * y1 = Gather() + * Loop4: + * z1 = Subtract(y1, x1) + * + * After AutoInline: All loops are inlined to one + * z1 = Subtract(Gather(), Add(Gather())) + */ +TEST_F(TestAutoInline, OnlySpatialOp) { + Target target = common::DefaultNVGPUTarget(); + Initialize(target); + std::vector input_names = {"x", "y"}; + std::vector output_names = { + "var_6", "var_4", "constant_idx_last", "constant_idx_first", "var_2", "var_5"}; + std::vector input_shape{256, 256}; + std::vector inputs_varinfo({{"x", input_shape}, {"y", input_shape}}); + + // Construct the computation graph and convert it to ir::Expr + Context::Global().ResetNameId(); + ir::IRSchedule ir_schedule = MakeIRSchedule(tests::GatherAddSubBuilder().Build(inputs_varinfo)); + SearchState state(ir_schedule, 0, {}); + std::vector func_bodys = ir_schedule.GetModule().GetExprs(); + ASSERT_EQ(func_bodys.size(), 1UL); + VLOG(6) << "Original Expr:\n" << func_bodys[0]; + + // Apply AutoInline for every block that can be inline + AutoInline auto_inline(target_, {output_names.front()}); + EXPECT_EQ(auto_inline.AnalyseApplyType(state, "constant_idx_first"), RuleApplyType::kApplyAndPruneOtherRules); + auto new_states = auto_inline.ApplyOnBlock(state, "constant_idx_first"); + std::vector inline_block_names({"constant_idx_last", "var_2", "var_5", "var_4"}); + for (const auto& inline_block_name : inline_block_names) { + new_states = auto_inline.ApplyOnBlock(new_states[0], inline_block_name); + } + std::vector exprs = new_states[0]->ir_schedule.GetModule().GetExprs(); + EXPECT_EQ(exprs.size(), 1UL); + VLOG(6) << "Expr after AutoInline applied on block: " << exprs[0]; + + // build ir::Module and debug source code + auto build_module_auto = BuildIRModule(new_states[0]->ir_schedule); + auto build_module_manually = + BuildIRModule(MakeIRSchedule(tests::GatherAddSubBuilder().Build(inputs_varinfo), -1, true)); + auto source_code_auto = GenSourceCode(build_module_auto); + VLOG(6) << " auto-schedule source code:\n" << source_code_auto; + auto source_code_manually = GenSourceCode(build_module_manually); + VLOG(6) << " manually-schedule source code:\n" << source_code_manually; + + CheckResult(GenExecutableKernel(build_module_auto), + GenExecutableKernel(build_module_manually), + input_names, + output_names, + {input_shape, input_shape}, + {input_shape, {1}, {1}, {1}, {1}, {1}}, + target); +} + +/* An op that does not read data can be directly inlined. + * + * Before AutoInline: fill_constant op is in a separate loop. + * Loop1: + * x = fill_constant() + * Loop2: + * y = Add(x) + * + * After AutoInline: fill_constant op is inlined into other loop + * Loop: + * y = Add(fill_constant()) + */ +TEST_F(TestAutoInline, NoReadBufferOp) { + Target target = common::DefaultNVGPUTarget(); + Initialize(target); + std::vector input_names = {"x"}; + std::vector output_names = {"var_0", "fill_constant"}; + std::vector input_shape{256, 256}; + std::vector inputs_varinfo({{"x", input_shape}}); + + // Construct the computation graph and convert it to ir::Expr + ir::IRSchedule ir_schedule = MakeIRSchedule(tests::FillConstantAddBuilder().Build(inputs_varinfo)); + SearchState state(ir_schedule, 0, {}); + std::vector func_bodys = ir_schedule.GetModule().GetExprs(); + ASSERT_EQ(func_bodys.size(), 1UL); + VLOG(6) << "Original Expr:\n" << func_bodys[0]; + + // Apply AutoInline for every block that can be inline + AutoInline auto_inline(target_, {output_names.front()}); + EXPECT_EQ(auto_inline.AnalyseApplyType(state, "fill_constant"), RuleApplyType::kApplyAndPruneOtherRules); + auto new_states = auto_inline.ApplyOnBlock(state, "fill_constant"); + std::vector exprs = new_states[0]->ir_schedule.GetModule().GetExprs(); + EXPECT_EQ(exprs.size(), 1UL); + VLOG(6) << "Expr after AutoInline applied on block: " << exprs[0]; + + // build ir::Module and debug source code + auto build_module_auto = BuildIRModule(new_states[0]->ir_schedule); + auto build_module_manually = + BuildIRModule(MakeIRSchedule(tests::FillConstantAddBuilder().Build(inputs_varinfo), -1, true)); + auto source_code_auto = GenSourceCode(build_module_auto); + VLOG(6) << " auto-schedule source code:\n" << source_code_auto; + auto source_code_manually = GenSourceCode(build_module_manually); + VLOG(6) << " manually-schedule source code:\n" << source_code_manually; + + CheckResult(GenExecutableKernel(build_module_auto), + GenExecutableKernel(build_module_manually), + input_names, + output_names, + {input_shape}, + {input_shape, {1}}, + target); +} + +/* An op can be inlined into multiple producers at the same time. + */ +// TEST_F(TestAutoInline, InlineToMultiProducers) { +// TODO(6clc): Complete the unit test, once ReverseComputeInline is ready. +// } +#endif } // namespace auto_schedule } // namespace cinn diff --git a/tests/concrete_program_builder.h b/tests/concrete_program_builder.h new file mode 100644 index 0000000000..5b70f76a97 --- /dev/null +++ b/tests/concrete_program_builder.h @@ -0,0 +1,100 @@ +// Copyright (c) 2023 CINN Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "tests/program_builder.h" + +namespace cinn { +namespace tests { + +/* + * Add --* Multiply --* Add --* Relu + */ +class BiasBnReLUBuilder : public ProgramBuilder { + public: + BiasBnReLUBuilder() : ProgramBuilder("bias_bn_relu_builder") {} + frontend::Program Build(const std::vector& inputs_varinfo, const utils::AttributeMap& attrs = {}) { + CHECK(inputs_varinfo.size() == 4); + auto conv_output = builder_.CreateInput(inputs_varinfo[0].type, inputs_varinfo[0].shape, inputs_varinfo[0].id); + auto bias = builder_.CreateInput(inputs_varinfo[1].type, inputs_varinfo[1].shape, inputs_varinfo[1].id); + auto bn_scale = builder_.CreateInput(inputs_varinfo[2].type, inputs_varinfo[2].shape, inputs_varinfo[2].id); + auto bn_offset = builder_.CreateInput(inputs_varinfo[3].type, inputs_varinfo[3].shape, inputs_varinfo[3].id); + + auto bias_add = builder_.Add(conv_output, bias); + auto bn_mul = builder_.Multiply(bias_add, bn_scale); + auto bn_add = builder_.Add(bn_mul, bn_offset); + builder_.Relu(bn_add); + return builder_.Build(); + } +}; + +/* + * Exp --* Add + * \ + * --* Multiply + */ +class ExpTwoConsumersOpBuilder : public ProgramBuilder { + public: + ExpTwoConsumersOpBuilder() : ProgramBuilder("exp_two_consumers_builder") {} + frontend::Program Build(const std::vector& inputs_varinfo, const utils::AttributeMap& attrs = {}) { + CHECK(inputs_varinfo.size() == 1); + auto x = builder_.CreateInput(inputs_varinfo[0].type, inputs_varinfo[0].shape, inputs_varinfo[0].id); + auto exp_x = builder_.Exp(x); + auto add_x = builder_.Add(exp_x, x); + auto mul_1 = builder_.Multiply(exp_x, add_x); + return builder_.Build(); + } +}; + +/* + * Gather --* Add --* Subtract + * * + * / + * Gather + */ +class GatherAddSubBuilder : public ProgramBuilder { + public: + GatherAddSubBuilder() : ProgramBuilder("gather_add_sub_builder") {} + frontend::Program Build(const std::vector& inputs_varinfo, const utils::AttributeMap& attrs = {}) { + CHECK(inputs_varinfo.size() == 2); + auto x = builder_.CreateInput(inputs_varinfo[0].type, inputs_varinfo[0].shape, inputs_varinfo[0].id); + auto y = builder_.CreateInput(inputs_varinfo[1].type, inputs_varinfo[1].shape, inputs_varinfo[1].id); + auto input_x_shape = inputs_varinfo[0].shape; + auto where_x_0 = builder_.Gather(x, builder_.FillConstant({input_x_shape[0]}, 0, "constant_idx_first")); + auto where_x_last = + builder_.Gather(x, builder_.FillConstant({input_x_shape[0]}, input_x_shape[0] - 1, "constant_idx_last")); + auto add_1 = builder_.Add(where_x_0, y); + builder_.Subtract(where_x_last, add_1); + return builder_.Build(); + } +}; + +/* + * FillConstant --* Add + */ +class FillConstantAddBuilder : public ProgramBuilder { + public: + FillConstantAddBuilder() : ProgramBuilder("fill_constant_add_builder") {} + frontend::Program Build(const std::vector& inputs_varinfo, const utils::AttributeMap& attrs = {}) { + CHECK(inputs_varinfo.size() == 1); + auto x = builder_.CreateInput(inputs_varinfo[0].type, inputs_varinfo[0].shape, inputs_varinfo[0].id); + auto fill_constant = builder_.FillConstant(inputs_varinfo[0].shape, 1.0f, "fill_constant"); + builder_.Add(x, fill_constant); + return builder_.Build(); + } +}; + +} // namespace tests +} // namespace cinn