Skip to content

Commit

Permalink
Add rmsnorm op.
Browse files Browse the repository at this point in the history
liuliu committed Dec 19, 2023

Verified

This commit was signed with the committer’s verified signature.
1 parent dab8e8a commit abd9796
Showing 13 changed files with 1,851 additions and 822 deletions.
5 changes: 5 additions & 0 deletions lib/nnc/ccv_nnc.h
Original file line number Diff line number Diff line change
@@ -143,6 +143,11 @@ typedef struct {
int groups; /**< [gnorm.group] The number of groups that separates channels. */
float epsilon; /**< [gnorm.epsilon] The epsilon for standard derivation. */
} gnorm;
struct {
int axis[CCV_NNC_MAX_DIM_ALLOC]; /**< [rmsnorm.axis[]] The axis selected to compute mean / variance. */
int count; /**< [rmsnorm.count] The number of axis selected. */
float epsilon; /**< [rmsnorm.epsilon] The epsilon for standard derivation. */
} rmsnorm;
struct {
int nesterov; /**< [sgd.nesterov] Nesterov accelerated gradient. */
float rate; /**< [sgd.rate] The learning rate. */
4 changes: 3 additions & 1 deletion lib/nnc/cmd/ccv_nnc_cmd.h
Original file line number Diff line number Diff line change
@@ -108,6 +108,8 @@ enum {
CCV_NNC_REDUCE_SUM_BACKWARD = 0x52970f07,
CCV_NNC_RELU_FORWARD = 0xc51eaa80,
CCV_NNC_RELU_BACKWARD = 0xc51eaa81,
CCV_NNC_RMSNORM_FORWARD = 0x6889e9d0,
CCV_NNC_RMSNORM_BACKWARD = 0x6889e9d1,
CCV_NNC_RMSPROP_FORWARD = 0x9c886b1c,
CCV_NNC_RMSPROP_BACKWARD = 0x9c886b1d,
CCV_NNC_ROI_ALIGN_FORWARD = 0xfef55168,
@@ -138,6 +140,6 @@ enum {
CCV_NNC_TRANSPOSE_BACKWARD = 0xb4d506e1,
CCV_NNC_UPSAMPLE_FORWARD = 0x73875556,
CCV_NNC_UPSAMPLE_BACKWARD = 0x73875557,
CCV_NNC_COUNT = 133,
CCV_NNC_COUNT = 135,
};
/** @} */
1,308 changes: 662 additions & 646 deletions lib/nnc/cmd/ccv_nnc_cmd.inc

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions lib/nnc/cmd/ccv_nnc_cmd_easy.h
Original file line number Diff line number Diff line change
@@ -166,6 +166,10 @@
#define CMD_GROUP_NORM_FORWARD(_group_axis, _groups, _epsilon, ...) ccv_nnc_cmd(CCV_NNC_GROUP_NORM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.gnorm={.group_axis=_group_axis,.groups=_groups,.epsilon=_epsilon,.reduce_count=LIST_COUNT(__VA_ARGS__),.reduce_axis={__VA_ARGS__}}}), 0)
// CCV_NNC_GROUP_NORM_BACKWARD
#define CMD_GROUP_NORM_BACKWARD(_group_axis, _groups, _epsilon, ...) ccv_nnc_cmd(CCV_NNC_GROUP_NORM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.gnorm={.group_axis=_group_axis,.groups=_groups,.epsilon=_epsilon,.reduce_count=LIST_COUNT(__VA_ARGS__),.reduce_axis={__VA_ARGS__}}}), 0)
// CCV_NNC_RMSNORM_FORWARD
#define CMD_RMSNORM_FORWARD(_epsilon, ...) ccv_nnc_cmd(CCV_NNC_RMSNORM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.lnorm={.epsilon=_epsilon,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)
// CCV_NNC_RMSNORM_BACKWARD
#define CMD_RMSNORM_BACKWARD(_epsilon, ...) ccv_nnc_cmd(CCV_NNC_RMSNORM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.lnorm={.epsilon=_epsilon,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)
// CCV_NNC_MAX_POOL_FORWARD
#define CMD_MAX_POOL_FORWARD(rows, cols) ccv_nnc_cmd(CCV_NNC_MAX_POOL_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={rows, cols,1}}}), 0)
// CCV_NNC_MAX_POOL_BACKWARD
4 changes: 2 additions & 2 deletions lib/nnc/cmd/config.mk
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CMD_SRCS := ./adam/ccv_nnc_adam_cpu_ref.c ./adam/ccv_nnc_adamw_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_opt.c ./blas/ccv_nnc_add_cpu_ref.c ./blas/ccv_nnc_mul_cpu_ref.c ./compare/ccv_nnc_min_cpu_ref.c ./compare/ccv_nnc_max_cpu_ref.c ./compression/ccv_nnc_lssc_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_opt.c ./dropout/ccv_nnc_dropout_cpu_ref.c ./ew/ccv_nnc_ew_cpu_ref.c ./gelu/ccv_nnc_gelu_cpu_ref.c ./histogram/ccv_nnc_histogram_cpu_ref.c ./index/ccv_nnc_index_select_cpu_ref.c ./isnan/ccv_nnc_reduce_isnan_cpu_ref.c ./lamb/ccv_nnc_lamb_cpu_ref.c ./leaky_relu/ccv_nnc_leaky_relu_cpu_ref.c ./loss/ccv_nnc_binary_crossentropy_cpu_ref.c ./loss/ccv_nnc_categorical_crossentropy_cpu_ref.c ./loss/ccv_nnc_mse_cpu_ref.c ./loss/ccv_nnc_smooth_l1_cpu_ref.c ./nms/ccv_nnc_nms_cpu_ref.c ./norm/ccv_nnc_batch_norm_cpu_ref.c ./norm/ccv_nnc_layer_norm_cpu_ref.c ./norm/ccv_nnc_group_norm_cpu_ref.c ./pool/ccv_nnc_max_pool_cpu_ref.c ./pool/ccv_nnc_avg_pool_cpu_ref.c ./rand/ccv_nnc_rand_uniform_cpu_ref.c ./rand/ccv_nnc_rand_normal_cpu_ref.c ./reduce/ccv_nnc_reduce_sum_cpu_ref.c ./reduce/ccv_nnc_reduce_mean_cpu_ref.c ./reduce/ccv_nnc_reduce_max_cpu_ref.c ./reduce/ccv_nnc_reduce_min_cpu_ref.c ./reduce/ccv_nnc_reduce_norm2_cpu_ref.c ./reduce/ccv_nnc_argmax_cpu_ref.c ./reduce/ccv_nnc_argmin_cpu_ref.c ./relu/ccv_nnc_relu_cpu_ref.c ./rmsprop/ccv_nnc_rmsprop_cpu_ref.c ./roi/ccv_nnc_roi_align_cpu_ref.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c ./sgd/ccv_nnc_sgd_cpu_ref.c ./sigmoid/ccv_nnc_sigmoid_cpu_ref.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy_cpu_ref.c ./softmax/ccv_nnc_softmax_cpu_ref.c ./softmax_loss/ccv_nnc_softmax_crossentropy_cpu_ref.c ./swish/ccv_nnc_swish_cpu_ref.c ./tanh/ccv_nnc_tanh_cpu_ref.c ./upsample/ccv_nnc_upsample_cpu_ref.c ./util/ccv_nnc_util_cpu_ref.c ./adam/ccv_nnc_adam.c ./blas/ccv_nnc_blas.c ./blas/cpu_opt/_ccv_nnc_gemm_cpu_opt.c ./blas/cpu_sys/_ccv_nnc_gemm_cpu_sys.c ./comm/ccv_nnc_comm.c ./compare/ccv_nnc_cmp.c ./compression/ccv_nnc_compression.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_4x4_3x3_winograd.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_fft.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_gemm.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_opt.c ./convolution/ccv_nnc_convolution.c ./dropout/ccv_nnc_dropout.c ./ew/ccv_nnc_ew.c ./gelu/ccv_nnc_gelu.c ./histogram/ccv_nnc_histogram.c ./index/ccv_nnc_index_select.c ./isnan/ccv_nnc_reduce_isnan.c ./lamb/ccv_nnc_lamb.c ./leaky_relu/ccv_nnc_leaky_relu.c ./loss/ccv_nnc_binary_crossentropy.c ./loss/ccv_nnc_categorical_crossentropy.c ./loss/ccv_nnc_mse.c ./loss/ccv_nnc_smooth_l1.c ./nms/ccv_nnc_nms.c ./norm/ccv_nnc_norm.c ./pool/ccv_nnc_pool.c ./rand/ccv_nnc_rand.c ./reduce/ccv_nnc_reduce.c ./relu/ccv_nnc_relu.c ./rmsprop/ccv_nnc_rmsprop.c ./rnn/ccv_nnc_lstm.c ./roi/ccv_nnc_roi_align.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention.c ./sgd/ccv_nnc_sgd.c ./sigmoid/ccv_nnc_sigmoid.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy.c ./softmax/ccv_nnc_softmax.c ./softmax_loss/ccv_nnc_softmax_crossentropy.c ./swish/ccv_nnc_swish.c ./tanh/ccv_nnc_tanh.c ./upsample/ccv_nnc_upsample.c ./util/ccv_nnc_util.c
CUDA_CMD_SRCS := ./adam/gpu/ccv_nnc_adam_gpu_ref.cu ./adam/gpu/ccv_nnc_adamw_gpu_ref.cu ./blas/gpu/ccv_nnc_gemm_gpu_cublas.cu ./blas/gpu/ccv_nnc_add_gpu_cudnn.cu ./blas/gpu/ccv_nnc_mul_gpu_cudnn.cu ./comm/gpu/ccv_nnc_comm_gpu_nccl.cu ./compare/gpu/ccv_nnc_min_gpu_ref.cu ./compare/gpu/ccv_nnc_max_gpu_ref.cu ./compression/gpu/ccv_nnc_lssc_gpu_ref.cu ./convolution/gpu/ccv_nnc_conv_gpu_cudnn.cu ./dropout/gpu/ccv_nnc_dropout_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_ref.cu ./gelu/gpu/ccv_nnc_gelu_gpu_ref.cu ./index/gpu/ccv_nnc_index_select_gpu_ref.cu ./isnan/gpu/ccv_nnc_reduce_isnan_gpu_cudnn.cu ./lamb/gpu/ccv_nnc_lamb_gpu_ref.cu ./leaky_relu/gpu/ccv_nnc_leaky_relu_gpu_ref.cu ./loss/gpu/ccv_nnc_binary_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_categorical_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_mse_gpu_ref.cu ./loss/gpu/ccv_nnc_smooth_l1_gpu_ref.cu ./nms/gpu/ccv_nnc_nms_gpu_ref.cu ./norm/gpu/ccv_nnc_batch_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_layer_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_group_norm_gpu_cudnn.cu ./pool/gpu/ccv_nnc_max_pool_gpu_cudnn.cu ./pool/gpu/ccv_nnc_avg_pool_gpu_cudnn.cu ./rand/gpu/ccv_nnc_rand_uniform_gpu_ref.cu ./rand/gpu/ccv_nnc_rand_normal_gpu_ref.cu ./reduce/gpu/ccv_nnc_reduce_sum_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_mean_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_norm2_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_argmax_gpu_ref.cu ./reduce/gpu/ccv_nnc_argmin_gpu_ref.cu ./relu/gpu/ccv_nnc_relu_gpu_cudnn.cu ./rmsprop/gpu/ccv_nnc_rmsprop_gpu_ref.cu ./rnn/gpu/ccv_nnc_lstm_gpu_cudnn.cu ./roi/gpu/ccv_nnc_roi_align_gpu_ref.cu ./scaled_dot_product_attention/gpu/ccv_nnc_scaled_dot_product_attention_flash_attn.cu ./sgd/gpu/ccv_nnc_sgd_gpu_ref.cu ./sigmoid/gpu/ccv_nnc_sigmoid_gpu_cudnn.cu ./sigmoid_loss/gpu/ccv_nnc_sigmoid_binary_crossentropy_gpu_ref.cu ./softmax/gpu/ccv_nnc_softmax_gpu_cudnn.cu ./softmax_loss/gpu/ccv_nnc_softmax_crossentropy_gpu_cudnn.cu ./swish/gpu/ccv_nnc_swish_gpu_ref.cu ./tanh/gpu/ccv_nnc_tanh_gpu_cudnn.cu ./upsample/gpu/ccv_nnc_upsample_gpu_ref.cu ./util/gpu/ccv_nnc_util_gpu_cudnn.cu ./util/gpu/ccv_nnc_util_gpu_ref.cu
CMD_SRCS := ./adam/ccv_nnc_adam_cpu_ref.c ./adam/ccv_nnc_adamw_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_opt.c ./blas/ccv_nnc_add_cpu_ref.c ./blas/ccv_nnc_mul_cpu_ref.c ./compare/ccv_nnc_min_cpu_ref.c ./compare/ccv_nnc_max_cpu_ref.c ./compression/ccv_nnc_lssc_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_opt.c ./dropout/ccv_nnc_dropout_cpu_ref.c ./ew/ccv_nnc_ew_cpu_ref.c ./gelu/ccv_nnc_gelu_cpu_ref.c ./histogram/ccv_nnc_histogram_cpu_ref.c ./index/ccv_nnc_index_select_cpu_ref.c ./isnan/ccv_nnc_reduce_isnan_cpu_ref.c ./lamb/ccv_nnc_lamb_cpu_ref.c ./leaky_relu/ccv_nnc_leaky_relu_cpu_ref.c ./loss/ccv_nnc_binary_crossentropy_cpu_ref.c ./loss/ccv_nnc_categorical_crossentropy_cpu_ref.c ./loss/ccv_nnc_mse_cpu_ref.c ./loss/ccv_nnc_smooth_l1_cpu_ref.c ./nms/ccv_nnc_nms_cpu_ref.c ./norm/ccv_nnc_batch_norm_cpu_ref.c ./norm/ccv_nnc_layer_norm_cpu_ref.c ./norm/ccv_nnc_group_norm_cpu_ref.c ./norm/ccv_nnc_rmsnorm_cpu_ref.c ./pool/ccv_nnc_max_pool_cpu_ref.c ./pool/ccv_nnc_avg_pool_cpu_ref.c ./rand/ccv_nnc_rand_uniform_cpu_ref.c ./rand/ccv_nnc_rand_normal_cpu_ref.c ./reduce/ccv_nnc_reduce_sum_cpu_ref.c ./reduce/ccv_nnc_reduce_mean_cpu_ref.c ./reduce/ccv_nnc_reduce_max_cpu_ref.c ./reduce/ccv_nnc_reduce_min_cpu_ref.c ./reduce/ccv_nnc_reduce_norm2_cpu_ref.c ./reduce/ccv_nnc_argmax_cpu_ref.c ./reduce/ccv_nnc_argmin_cpu_ref.c ./relu/ccv_nnc_relu_cpu_ref.c ./rmsprop/ccv_nnc_rmsprop_cpu_ref.c ./roi/ccv_nnc_roi_align_cpu_ref.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c ./sgd/ccv_nnc_sgd_cpu_ref.c ./sigmoid/ccv_nnc_sigmoid_cpu_ref.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy_cpu_ref.c ./softmax/ccv_nnc_softmax_cpu_ref.c ./softmax_loss/ccv_nnc_softmax_crossentropy_cpu_ref.c ./swish/ccv_nnc_swish_cpu_ref.c ./tanh/ccv_nnc_tanh_cpu_ref.c ./upsample/ccv_nnc_upsample_cpu_ref.c ./util/ccv_nnc_util_cpu_ref.c ./adam/ccv_nnc_adam.c ./blas/ccv_nnc_blas.c ./blas/cpu_opt/_ccv_nnc_gemm_cpu_opt.c ./blas/cpu_sys/_ccv_nnc_gemm_cpu_sys.c ./comm/ccv_nnc_comm.c ./compare/ccv_nnc_cmp.c ./compression/ccv_nnc_compression.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_4x4_3x3_winograd.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_fft.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_gemm.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_opt.c ./convolution/ccv_nnc_convolution.c ./dropout/ccv_nnc_dropout.c ./ew/ccv_nnc_ew.c ./gelu/ccv_nnc_gelu.c ./histogram/ccv_nnc_histogram.c ./index/ccv_nnc_index_select.c ./isnan/ccv_nnc_reduce_isnan.c ./lamb/ccv_nnc_lamb.c ./leaky_relu/ccv_nnc_leaky_relu.c ./loss/ccv_nnc_binary_crossentropy.c ./loss/ccv_nnc_categorical_crossentropy.c ./loss/ccv_nnc_mse.c ./loss/ccv_nnc_smooth_l1.c ./nms/ccv_nnc_nms.c ./norm/ccv_nnc_norm.c ./pool/ccv_nnc_pool.c ./rand/ccv_nnc_rand.c ./reduce/ccv_nnc_reduce.c ./relu/ccv_nnc_relu.c ./rmsprop/ccv_nnc_rmsprop.c ./rnn/ccv_nnc_lstm.c ./roi/ccv_nnc_roi_align.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention.c ./sgd/ccv_nnc_sgd.c ./sigmoid/ccv_nnc_sigmoid.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy.c ./softmax/ccv_nnc_softmax.c ./softmax_loss/ccv_nnc_softmax_crossentropy.c ./swish/ccv_nnc_swish.c ./tanh/ccv_nnc_tanh.c ./upsample/ccv_nnc_upsample.c ./util/ccv_nnc_util.c
CUDA_CMD_SRCS := ./adam/gpu/ccv_nnc_adam_gpu_ref.cu ./adam/gpu/ccv_nnc_adamw_gpu_ref.cu ./blas/gpu/ccv_nnc_gemm_gpu_cublas.cu ./blas/gpu/ccv_nnc_add_gpu_cudnn.cu ./blas/gpu/ccv_nnc_mul_gpu_cudnn.cu ./comm/gpu/ccv_nnc_comm_gpu_nccl.cu ./compare/gpu/ccv_nnc_min_gpu_ref.cu ./compare/gpu/ccv_nnc_max_gpu_ref.cu ./compression/gpu/ccv_nnc_lssc_gpu_ref.cu ./convolution/gpu/ccv_nnc_conv_gpu_cudnn.cu ./dropout/gpu/ccv_nnc_dropout_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_ref.cu ./gelu/gpu/ccv_nnc_gelu_gpu_ref.cu ./index/gpu/ccv_nnc_index_select_gpu_ref.cu ./isnan/gpu/ccv_nnc_reduce_isnan_gpu_cudnn.cu ./lamb/gpu/ccv_nnc_lamb_gpu_ref.cu ./leaky_relu/gpu/ccv_nnc_leaky_relu_gpu_ref.cu ./loss/gpu/ccv_nnc_binary_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_categorical_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_mse_gpu_ref.cu ./loss/gpu/ccv_nnc_smooth_l1_gpu_ref.cu ./nms/gpu/ccv_nnc_nms_gpu_ref.cu ./norm/gpu/ccv_nnc_batch_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_layer_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_group_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_rmsnorm_gpu_cudnn.cu ./pool/gpu/ccv_nnc_max_pool_gpu_cudnn.cu ./pool/gpu/ccv_nnc_avg_pool_gpu_cudnn.cu ./rand/gpu/ccv_nnc_rand_uniform_gpu_ref.cu ./rand/gpu/ccv_nnc_rand_normal_gpu_ref.cu ./reduce/gpu/ccv_nnc_reduce_sum_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_mean_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_norm2_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_argmax_gpu_ref.cu ./reduce/gpu/ccv_nnc_argmin_gpu_ref.cu ./relu/gpu/ccv_nnc_relu_gpu_cudnn.cu ./rmsprop/gpu/ccv_nnc_rmsprop_gpu_ref.cu ./rnn/gpu/ccv_nnc_lstm_gpu_cudnn.cu ./roi/gpu/ccv_nnc_roi_align_gpu_ref.cu ./scaled_dot_product_attention/gpu/ccv_nnc_scaled_dot_product_attention_flash_attn.cu ./sgd/gpu/ccv_nnc_sgd_gpu_ref.cu ./sigmoid/gpu/ccv_nnc_sigmoid_gpu_cudnn.cu ./sigmoid_loss/gpu/ccv_nnc_sigmoid_binary_crossentropy_gpu_ref.cu ./softmax/gpu/ccv_nnc_softmax_gpu_cudnn.cu ./softmax_loss/gpu/ccv_nnc_softmax_crossentropy_gpu_cudnn.cu ./swish/gpu/ccv_nnc_swish_gpu_ref.cu ./tanh/gpu/ccv_nnc_tanh_gpu_cudnn.cu ./upsample/gpu/ccv_nnc_upsample_gpu_ref.cu ./util/gpu/ccv_nnc_util_gpu_cudnn.cu ./util/gpu/ccv_nnc_util_gpu_ref.cu
MPS_CMD_SRCS := ./adam/mps/ccv_nnc_adam_mps.m ./adam/mps/ccv_nnc_adamw_mps.m ./blas/mps/ccv_nnc_gemm_mps.m ./blas/mps/ccv_nnc_add_mps.m ./blas/mps/ccv_nnc_mul_mps.m ./convolution/mps/ccv_nnc_conv_mps.m ./ew/mps/ccv_nnc_ew_mps.m ./gelu/mps/ccv_nnc_gelu_mps.m ./index/mps/ccv_nnc_index_select_mps.m ./isnan/mps/ccv_nnc_reduce_isnan_mps.m ./leaky_relu/mps/ccv_nnc_leaky_relu_mps.m ./loss/mps/ccv_nnc_mse_mps.m ./norm/mps/ccv_nnc_layer_norm_mps.m ./norm/mps/ccv_nnc_group_norm_mps.m ./pool/mps/ccv_nnc_max_pool_mps.m ./pool/mps/ccv_nnc_avg_pool_mps.m ./rand/mps/ccv_nnc_rand_uniform_mps.m ./rand/mps/ccv_nnc_rand_normal_mps.m ./reduce/mps/ccv_nnc_reduce_sum_mps.m ./reduce/mps/ccv_nnc_reduce_mean_mps.m ./reduce/mps/ccv_nnc_reduce_max_mps.m ./reduce/mps/ccv_nnc_reduce_min_mps.m ./reduce/mps/ccv_nnc_argmax_mps.m ./reduce/mps/ccv_nnc_argmin_mps.m ./relu/mps/ccv_nnc_relu_mps.m ./scaled_dot_product_attention/mps/ccv_nnc_scaled_dot_product_attention_mps.m ./sigmoid/mps/ccv_nnc_sigmoid_mps.m ./softmax/mps/ccv_nnc_softmax_mps.m ./swish/mps/ccv_nnc_swish_mps.m ./upsample/mps/ccv_nnc_upsample_mps.m ./util/mps/ccv_nnc_util_mps.m
2 changes: 0 additions & 2 deletions lib/nnc/cmd/norm/ccv_nnc_layer_norm_cpu_ref.c
Original file line number Diff line number Diff line change
@@ -284,7 +284,6 @@ static int _ccv_nnc_layer_norm_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_
}
}
} else {
ahp = ah;
float* gssp = gss;
const float* const gp = g->data.f32;
const float* const scalep = scale->data.f32;
@@ -309,7 +308,6 @@ static int _ccv_nnc_layer_norm_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_
for (x = 0; x < gdim[3]; x++)
gssp[x] = gp1[x] * scalep2[x] * inv_stdp2[rdim[3] == 1 ? 0 : x];
gp1 += gstride[2];
ahp += gdim[3];
gssp += gdim[3];
}
}
74 changes: 74 additions & 0 deletions lib/nnc/cmd/norm/ccv_nnc_norm.c
Original file line number Diff line number Diff line change
@@ -242,3 +242,77 @@ REGISTER_COMMAND(CCV_NNC_GROUP_NORM_BACKWARD)(ccv_nnc_cmd_registry_t* const regi
#define CMD_GROUP_NORM_FORWARD(_group_axis, _groups, _epsilon, ...) ccv_nnc_cmd(CCV_NNC_GROUP_NORM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.gnorm={.group_axis=_group_axis,.groups=_groups,.epsilon=_epsilon,.reduce_count=LIST_COUNT(__VA_ARGS__),.reduce_axis={__VA_ARGS__}}}), 0)
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_GROUP_NORM_BACKWARD)
#define CMD_GROUP_NORM_BACKWARD(_group_axis, _groups, _epsilon, ...) ccv_nnc_cmd(CCV_NNC_GROUP_NORM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.gnorm={.group_axis=_group_axis,.groups=_groups,.epsilon=_epsilon,.reduce_count=LIST_COUNT(__VA_ARGS__),.reduce_axis={__VA_ARGS__}}}), 0)

static int _ccv_nnc_rmsnorm_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
{
// 2 inputs (x, gamma)
// 2 outputs (y, saved_inv_std)
if (input_bitmasks[0] == 3u && output_bitmasks[0] == 3u)
return 1;
// 2 inputs (x, gamma)
// 1 output (y)
if (input_bitmasks[0] == 3u && output_bitmasks[0] == 1u)
return 1;
return 0;
}

static int _ccv_nnc_rmsnorm_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
{
// 1 + 4 + 8 + 32
// Inputs (gradient, 0, x, gamma, 0, saved_inv_std)
// Output the propagated error, dgamma
if ((input_bitmasks[0] & 45u) == 45u && (output_bitmasks[0] & 3u) == 3u)
return 1;
if ((input_bitmasks[0] & 45u) == 45u && (output_bitmasks[0] & 1u) == 1u)
return 1;
return 0;
}

static void _ccv_nnc_rmsnorm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
{
assert(input_size == 2);
assert(output_size == 1 || output_size == 2);
outputs[0] = inputs[0];
if (output_size == 1)
return;
int i, j;
for (i = 1; i < output_size; i++)
{
outputs[i] = inputs[0];
for (j = 0; j < cmd.lnorm.count; j++)
outputs[i].dim[cmd.lnorm.axis[j]] = 1; // Reduce the dimension to 1.
}
}

static void _ccv_nnc_rmsnorm_tensor_auto_back(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
{
assert(input_size == 6);
assert(output_size == 1 || output_size == 2);
outputs[0] = inputs[0];
int i, j;
for (i = 1; i < output_size; i++)
{
outputs[i] = inputs[0];
for (j = 0; j < cmd.lnorm.count; j++)
outputs[i].dim[cmd.lnorm.axis[j]] = 1; // Reduce the dimension to 1.
}
}

REGISTER_COMMAND(CCV_NNC_RMSNORM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
FIND_BACKEND(ccv_nnc_rmsnorm_cpu_ref.c, gpu/ccv_nnc_rmsnorm_gpu_cudnn.cu)
{
registry->bitmask = _ccv_nnc_rmsnorm_forw_bitmask;
registry->tensor_auto = _ccv_nnc_rmsnorm_tensor_auto_forw;
}

REGISTER_COMMAND(CCV_NNC_RMSNORM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
FIND_BACKEND(ccv_nnc_rmsnorm_cpu_ref.c, gpu/ccv_nnc_rmsnorm_gpu_cudnn.cu)
{
registry->bitmask = _ccv_nnc_rmsnorm_back_bitmask;
registry->tensor_auto = _ccv_nnc_rmsnorm_tensor_auto_back;
}

//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_RMSNORM_FORWARD)
#define CMD_RMSNORM_FORWARD(_epsilon, ...) ccv_nnc_cmd(CCV_NNC_RMSNORM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.lnorm={.epsilon=_epsilon,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_RMSNORM_BACKWARD)
#define CMD_RMSNORM_BACKWARD(_epsilon, ...) ccv_nnc_cmd(CCV_NNC_RMSNORM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.lnorm={.epsilon=_epsilon,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)
Loading

0 comments on commit abd9796

Please sign in to comment.