From 591e0ebba8b745e6665d5161c50ba09bc5b91394 Mon Sep 17 00:00:00 2001
From: Liu Liu <i@liuliu.me>
Date: Sat, 16 Mar 2024 00:24:14 -0400
Subject: [PATCH] Add chunk support for Model.

---
 lib/nnc/ccv_cnnp_model_addons.c | 88 +++++++++++++++++++++++++++++++++
 lib/nnc/ccv_nnc.h               |  8 +++
 test/unit/nnc/cnnp.core.tests.c | 38 ++++++++++++++
 3 files changed, 134 insertions(+)

diff --git a/lib/nnc/ccv_cnnp_model_addons.c b/lib/nnc/ccv_cnnp_model_addons.c
index 321f71c1a..961467165 100644
--- a/lib/nnc/ccv_cnnp_model_addons.c
+++ b/lib/nnc/ccv_cnnp_model_addons.c
@@ -439,6 +439,94 @@ static ccv_cnnp_model_t* _ccv_cnnp_concat_copy(const ccv_cnnp_model_t* const sup
 	return ccv_cnnp_concat(self->axis, self->super.name);
 }
 
+typedef struct {
+	ccv_cnnp_model_t super;
+	int axis;
+	ccv_nnc_tensor_symbol_t outputs[1];
+} ccv_cnnp_model_chunk_t;
+
+static void _ccv_cnnp_chunk_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
+{
+	const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
+	assert(input_size == 1);
+	const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
+	ccv_nnc_tensor_param_t output_params = input_params;
+	int i;
+	const int nd = ccv_nnc_tensor_nd(output_params.dim);
+	const int axis = self->axis;
+	assert(axis < nd);
+	const int n = self->super.output_size;
+	assert(n == output_size);
+	assert(output_params.dim[axis] % n == 0);
+	output_params.dim[axis] = output_params.dim[axis] / n;
+	int ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
+	int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
+	ccv_nnc_tensor_get_stride(input_params.dim, stride);
+	ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
+	if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
+	{
+		for (i = 0; i < output_size; i++)
+		{
+			outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, stride, output_params, 0);
+			ofs[axis] += output_params.dim[axis];
+		}
+	} else {
+		// Otherwise, we need to check if it is permute. For permute, we cannot do alias directly.
+		// We need to first materialize the permute and then run reshape on top of it, otherwise it will be wrong.
+		int old_stride[CCV_NNC_MAX_DIM_ALLOC];
+		ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
+		// We identify permute by checking if the stride is not in descending order.
+		// This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
+		int i, no_permute = 1;
+		for (i = 1; no_permute && i < nd; i++)
+			if (old_stride[i - 1] < old_stride[i])
+				no_permute = 0;
+		if (no_permute)
+		{ // Just straightforward reshape if there is no no permute.
+			for (i = 0; i < output_size; i++)
+			{
+				outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, old_stride, output_params, 0);
+				ofs[axis] += output_params.dim[axis];
+			}
+		} else {
+			// Otherwise, we first do format transform to plain tensor and then do reshape.
+			ccv_nnc_tensor_symbol_t permuted = ccv_nnc_tensor_symbol_new(graph, input_params, 0);
+			ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(permuted), "reshape");
+			for (i = 0; i < output_size; i++)
+			{
+				outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, permuted, ofs, stride, output_params, 0);
+				ofs[axis] += output_params.dim[axis];
+			}
+		}
+	}
+}
+
+static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const self, void* const context);
+
+static const ccv_cnnp_model_vtab_t ccv_cnnp_chunk_isa = {
+	.build = _ccv_cnnp_chunk_build,
+	.copy = _ccv_cnnp_chunk_copy,
+};
+
+ccv_cnnp_model_t* ccv_cnnp_chunk(const int n, const int axis, const char* const name)
+{
+	assert(n >= 1);
+	ccv_cnnp_model_chunk_t* const model_chunk = (ccv_cnnp_model_chunk_t*)cccalloc(1, sizeof(ccv_cnnp_model_chunk_t) + sizeof(ccv_nnc_tensor_symbol_t) * (n - 1));
+	model_chunk->super.isa = &ccv_cnnp_chunk_isa;
+	model_chunk->super.input_size = 1;
+	model_chunk->super.outputs = model_chunk->outputs;
+	model_chunk->super.output_size = n;
+	model_chunk->axis = axis;
+	ccv_cnnp_model_copy_name(&model_chunk->super, name);
+	return (ccv_cnnp_model_t*)model_chunk;
+}
+
+static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const super, void* const context)
+{
+	const ccv_cnnp_model_chunk_t* const self = (const ccv_cnnp_model_chunk_t*)super;
+	return ccv_cnnp_chunk(self->super.output_size, self->axis, self->super.name);
+}
+
 typedef struct {
 	ccv_cnnp_model_t super;
 	ccv_nnc_tensor_symbol_t output;
diff --git a/lib/nnc/ccv_nnc.h b/lib/nnc/ccv_nnc.h
index 591e5a004..9dc0a7fe2 100644
--- a/lib/nnc/ccv_nnc.h
+++ b/lib/nnc/ccv_nnc.h
@@ -4176,6 +4176,14 @@ CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_sum(const char* const name);
  * @return A model that can be applied with multiple inputs, and generate output that is a concatenation of the inputs.
  */
 CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_concat(const int axis, const char* const name);
+/**
+ * Chunk the input tensor into n pieces.
+ * @param n How many pieces we chunk the tensor into.
+ * @param axis Along this axis, we chunk the tensor. Other dimensions need to be exactly the same.
+ * @param name The unique name of the model.
+ * @return A model that can be applied with one input, and generate outputs that are chunks of the input.
+ */
+CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_chunk(const int n, const int axis, const char* const name);
 /**
  * A convolution model.
  * @param groups The number of kernel groups in the model.
diff --git a/test/unit/nnc/cnnp.core.tests.c b/test/unit/nnc/cnnp.core.tests.c
index 850036f80..bd7a309f1 100644
--- a/test/unit/nnc/cnnp.core.tests.c
+++ b/test/unit/nnc/cnnp.core.tests.c
@@ -1947,4 +1947,42 @@ TEST_CASE("use contiguous to make certain tensor contiguous during model inferen
 	ccv_cnnp_model_free(final);
 }
 
+TEST_CASE("chunk a tensor into several smaller ones")
+{
+	const ccv_cnnp_model_io_t x = ccv_cnnp_input();
+	ccv_cnnp_model_t* const chunk = ccv_cnnp_chunk(2, 1, "chunk");
+	ccv_cnnp_model_io_t y = ccv_cnnp_model_apply(chunk, MODEL_IO_LIST(x));
+	ccv_cnnp_model_io_t y0 = ccv_cnnp_model_apply(ccv_cnnp_extract(0, "index0"), MODEL_IO_LIST(y));
+	ccv_cnnp_model_io_t o0 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y0));
+	ccv_cnnp_model_io_t y1 = ccv_cnnp_model_apply(ccv_cnnp_extract(1, "index1"), MODEL_IO_LIST(y));
+	ccv_cnnp_model_io_t o1 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y1));
+	ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(o0, o1), 0, "tiny");
+	ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 4), 0);
+	ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_tensor->info), CMD_NOOP(), CMD_NOOP());
+	CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
+	x_tensor->data.f32[0] = 1;
+	x_tensor->data.f32[1] = -1;
+	x_tensor->data.f32[2] = 2;
+	x_tensor->data.f32[3] = 3;
+	x_tensor->data.f32[4] = 4;
+	x_tensor->data.f32[5] = 5;
+	x_tensor->data.f32[6] = 6;
+	x_tensor->data.f32[7] = 7;
+	ccv_nnc_tensor_t* const y0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0);
+	ccv_nnc_tensor_t* const y1_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0);
+	ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x_tensor), TENSOR_LIST(y0_tensor, y1_tensor), 0, 0);
+	REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[0], 1, 1e-5, "should be equal to expected value");
+	REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[1], -1, 1e-5, "should be equal to expected value");
+	REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[2], 4, 1e-5, "should be equal to expected value");
+	REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[3], 5, 1e-5, "should be equal to expected value");
+	REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[0], 2, 1e-5, "should be equal to expected value");
+	REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[1], 3, 1e-5, "should be equal to expected value");
+	REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[2], 6, 1e-5, "should be equal to expected value");
+	REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[3], 7, 1e-5, "should be equal to expected value");
+	ccv_nnc_tensor_free(x_tensor);
+	ccv_nnc_tensor_free(y0_tensor);
+	ccv_nnc_tensor_free(y1_tensor);
+	ccv_cnnp_model_free(final);
+}
+
 #include "case_main.h"