diff --git a/src/models/minicpm.cpp b/src/models/minicpm.cpp index 484ecf17..ff5a9aef 100644 --- a/src/models/minicpm.cpp +++ b/src/models/minicpm.cpp @@ -180,8 +180,8 @@ namespace fastllm { attenOutput.Reshape({bsz, seqlen, -1}); Linear(attenOutput, weight[oWeightName], Data(), attenLastOutput); - Mul(attenLastOutput, this->attention_scale, attenLastOutput); - AddTo(hiddenStates, attenLastOutput); + // Mul(attenLastOutput, this->attention_scale, attenLastOutput); + AddTo(hiddenStates, attenLastOutput, this->attention_scale); // 2. mlp RMSNorm(hiddenStates, this->weight["model.layers." + std::to_string(i) + ".post_attention_layernorm.weight"], 1e-5, attenInput); Linear(attenInput, weight["model.layers." + std::to_string(i) + ".mlp.gate_proj.weight"], Data(), w1); @@ -189,8 +189,8 @@ namespace fastllm { Silu(w1, w1); MulTo(w1, w3); Linear(w1, weight["model.layers." + std::to_string(i) + ".mlp.down_proj.weight"], Data(), w2); - Mul(w2, this->attention_scale, w2); - AddTo(hiddenStates, w2); + // Mul(w2, this->attention_scale, w2); + AddTo(hiddenStates, w2, this->attention_scale); } Data logits, topk; Data tempHiddenStates; @@ -338,8 +338,8 @@ namespace fastllm { PermuteSelf(attenOutput, {1, 0, 2}); Linear(attenOutput, weight[oWeightName], Data(), attenLastOutput); - Mul(attenLastOutput, this->attention_scale, attenLastOutput); - AddTo(hiddenStates, attenLastOutput); + // Mul(attenLastOutput, this->attention_scale, attenLastOutput); + AddTo(hiddenStates, attenLastOutput, this->attention_scale); // 2. mlp RMSNorm(hiddenStates, this->weight["model.layers." + std::to_string(i) + ".post_attention_layernorm.weight"], 1e-5, attenInput); Linear(attenInput, weight["model.layers." + std::to_string(i) + ".mlp.gate_proj.weight"], Data(), w1); @@ -347,8 +347,8 @@ namespace fastllm { Silu(w1, w1); MulTo(w1, w3); Linear(w1, weight["model.layers." + std::to_string(i) + ".mlp.down_proj.weight"], Data(), w2); - Mul(w2, this->attention_scale, w2); - AddTo(hiddenStates, w2); + // Mul(w2, this->attention_scale, w2); + AddTo(hiddenStates, w2, this->attention_scale); } Data logits, topk; @@ -525,8 +525,8 @@ namespace fastllm { } Linear(attenOutput, weight[oWeightName], Data(), attenLastOutput); - Mul(attenLastOutput, this->attention_scale, attenLastOutput); - AddTo(hiddenStates, attenLastOutput); + // Mul(attenLastOutput, this->attention_scale, attenLastOutput); + AddTo(hiddenStates, attenLastOutput, this->attention_scale); // 2. mlp RMSNorm(hiddenStates, this->weight["model.layers." + std::to_string(i) + ".post_attention_layernorm.weight"], 1e-5, attenInput); Linear(attenInput, weight["model.layers." + std::to_string(i) + ".mlp.gate_proj.weight"], Data(), w1); @@ -534,8 +534,8 @@ namespace fastllm { Silu(w1, w1); MulTo(w1, w3); Linear(w1, weight["model.layers." + std::to_string(i) + ".mlp.down_proj.weight"], Data(), w2); - Mul(w2, this->attention_scale, w2); - AddTo(hiddenStates, w2); + // Mul(w2, this->attention_scale, w2); + AddTo(hiddenStates, w2, this->attention_scale); } Data logits, curLogit;