From 591e0ebba8b745e6665d5161c50ba09bc5b91394 Mon Sep 17 00:00:00 2001 From: Liu Liu Date: Sat, 16 Mar 2024 00:24:14 -0400 Subject: [PATCH] Add chunk support for Model. --- lib/nnc/ccv_cnnp_model_addons.c | 88 +++++++++++++++++++++++++++++++++ lib/nnc/ccv_nnc.h | 8 +++ test/unit/nnc/cnnp.core.tests.c | 38 ++++++++++++++ 3 files changed, 134 insertions(+) diff --git a/lib/nnc/ccv_cnnp_model_addons.c b/lib/nnc/ccv_cnnp_model_addons.c index 321f71c1a..961467165 100644 --- a/lib/nnc/ccv_cnnp_model_addons.c +++ b/lib/nnc/ccv_cnnp_model_addons.c @@ -439,6 +439,94 @@ static ccv_cnnp_model_t* _ccv_cnnp_concat_copy(const ccv_cnnp_model_t* const sup return ccv_cnnp_concat(self->axis, self->super.name); } +typedef struct { + ccv_cnnp_model_t super; + int axis; + ccv_nnc_tensor_symbol_t outputs[1]; +} ccv_cnnp_model_chunk_t; + +static void _ccv_cnnp_chunk_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size) +{ + const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super; + assert(input_size == 1); + const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]); + ccv_nnc_tensor_param_t output_params = input_params; + int i; + const int nd = ccv_nnc_tensor_nd(output_params.dim); + const int axis = self->axis; + assert(axis < nd); + const int n = self->super.output_size; + assert(n == output_size); + assert(output_params.dim[axis] % n == 0); + output_params.dim[axis] = output_params.dim[axis] / n; + int ofs[CCV_NNC_MAX_DIM_ALLOC] = {}; + int stride[CCV_NNC_MAX_DIM_ALLOC] = {}; + ccv_nnc_tensor_get_stride(input_params.dim, stride); + ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]); + if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward. + { + for (i = 0; i < output_size; i++) + { + outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, stride, output_params, 0); + ofs[axis] += output_params.dim[axis]; + } + } else { + // Otherwise, we need to check if it is permute. For permute, we cannot do alias directly. + // We need to first materialize the permute and then run reshape on top of it, otherwise it will be wrong. + int old_stride[CCV_NNC_MAX_DIM_ALLOC]; + ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride); + // We identify permute by checking if the stride is not in descending order. + // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly. + int i, no_permute = 1; + for (i = 1; no_permute && i < nd; i++) + if (old_stride[i - 1] < old_stride[i]) + no_permute = 0; + if (no_permute) + { // Just straightforward reshape if there is no no permute. + for (i = 0; i < output_size; i++) + { + outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, old_stride, output_params, 0); + ofs[axis] += output_params.dim[axis]; + } + } else { + // Otherwise, we first do format transform to plain tensor and then do reshape. + ccv_nnc_tensor_symbol_t permuted = ccv_nnc_tensor_symbol_new(graph, input_params, 0); + ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(permuted), "reshape"); + for (i = 0; i < output_size; i++) + { + outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, permuted, ofs, stride, output_params, 0); + ofs[axis] += output_params.dim[axis]; + } + } + } +} + +static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const self, void* const context); + +static const ccv_cnnp_model_vtab_t ccv_cnnp_chunk_isa = { + .build = _ccv_cnnp_chunk_build, + .copy = _ccv_cnnp_chunk_copy, +}; + +ccv_cnnp_model_t* ccv_cnnp_chunk(const int n, const int axis, const char* const name) +{ + assert(n >= 1); + ccv_cnnp_model_chunk_t* const model_chunk = (ccv_cnnp_model_chunk_t*)cccalloc(1, sizeof(ccv_cnnp_model_chunk_t) + sizeof(ccv_nnc_tensor_symbol_t) * (n - 1)); + model_chunk->super.isa = &ccv_cnnp_chunk_isa; + model_chunk->super.input_size = 1; + model_chunk->super.outputs = model_chunk->outputs; + model_chunk->super.output_size = n; + model_chunk->axis = axis; + ccv_cnnp_model_copy_name(&model_chunk->super, name); + return (ccv_cnnp_model_t*)model_chunk; +} + +static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const super, void* const context) +{ + const ccv_cnnp_model_chunk_t* const self = (const ccv_cnnp_model_chunk_t*)super; + return ccv_cnnp_chunk(self->super.output_size, self->axis, self->super.name); +} + typedef struct { ccv_cnnp_model_t super; ccv_nnc_tensor_symbol_t output; diff --git a/lib/nnc/ccv_nnc.h b/lib/nnc/ccv_nnc.h index 591e5a004..9dc0a7fe2 100644 --- a/lib/nnc/ccv_nnc.h +++ b/lib/nnc/ccv_nnc.h @@ -4176,6 +4176,14 @@ CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_sum(const char* const name); * @return A model that can be applied with multiple inputs, and generate output that is a concatenation of the inputs. */ CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_concat(const int axis, const char* const name); +/** + * Chunk the input tensor into n pieces. + * @param n How many pieces we chunk the tensor into. + * @param axis Along this axis, we chunk the tensor. Other dimensions need to be exactly the same. + * @param name The unique name of the model. + * @return A model that can be applied with one input, and generate outputs that are chunks of the input. + */ +CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_chunk(const int n, const int axis, const char* const name); /** * A convolution model. * @param groups The number of kernel groups in the model. diff --git a/test/unit/nnc/cnnp.core.tests.c b/test/unit/nnc/cnnp.core.tests.c index 850036f80..bd7a309f1 100644 --- a/test/unit/nnc/cnnp.core.tests.c +++ b/test/unit/nnc/cnnp.core.tests.c @@ -1947,4 +1947,42 @@ TEST_CASE("use contiguous to make certain tensor contiguous during model inferen ccv_cnnp_model_free(final); } +TEST_CASE("chunk a tensor into several smaller ones") +{ + const ccv_cnnp_model_io_t x = ccv_cnnp_input(); + ccv_cnnp_model_t* const chunk = ccv_cnnp_chunk(2, 1, "chunk"); + ccv_cnnp_model_io_t y = ccv_cnnp_model_apply(chunk, MODEL_IO_LIST(x)); + ccv_cnnp_model_io_t y0 = ccv_cnnp_model_apply(ccv_cnnp_extract(0, "index0"), MODEL_IO_LIST(y)); + ccv_cnnp_model_io_t o0 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y0)); + ccv_cnnp_model_io_t y1 = ccv_cnnp_model_apply(ccv_cnnp_extract(1, "index1"), MODEL_IO_LIST(y)); + ccv_cnnp_model_io_t o1 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y1)); + ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(o0, o1), 0, "tiny"); + ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 4), 0); + ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_tensor->info), CMD_NOOP(), CMD_NOOP()); + CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); + x_tensor->data.f32[0] = 1; + x_tensor->data.f32[1] = -1; + x_tensor->data.f32[2] = 2; + x_tensor->data.f32[3] = 3; + x_tensor->data.f32[4] = 4; + x_tensor->data.f32[5] = 5; + x_tensor->data.f32[6] = 6; + x_tensor->data.f32[7] = 7; + ccv_nnc_tensor_t* const y0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0); + ccv_nnc_tensor_t* const y1_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0); + ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x_tensor), TENSOR_LIST(y0_tensor, y1_tensor), 0, 0); + REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[0], 1, 1e-5, "should be equal to expected value"); + REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[1], -1, 1e-5, "should be equal to expected value"); + REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[2], 4, 1e-5, "should be equal to expected value"); + REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[3], 5, 1e-5, "should be equal to expected value"); + REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[0], 2, 1e-5, "should be equal to expected value"); + REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[1], 3, 1e-5, "should be equal to expected value"); + REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[2], 6, 1e-5, "should be equal to expected value"); + REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[3], 7, 1e-5, "should be equal to expected value"); + ccv_nnc_tensor_free(x_tensor); + ccv_nnc_tensor_free(y0_tensor); + ccv_nnc_tensor_free(y1_tensor); + ccv_cnnp_model_free(final); +} + #include "case_main.h"