From 28c6fdcf13e297c74f8538cd5e180b09d2095706 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E5=AE=87=E6=89=AC?= Date: Thu, 8 Aug 2024 19:06:53 +0800 Subject: [PATCH] fix --- src/devices/cuda/fastllm-cuda.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/devices/cuda/fastllm-cuda.cu b/src/devices/cuda/fastllm-cuda.cu index 69c8e7e..a3af28e 100644 --- a/src/devices/cuda/fastllm-cuda.cu +++ b/src/devices/cuda/fastllm-cuda.cu @@ -1102,6 +1102,7 @@ __global__ void FastllmLayerNormKernelTop1(float *input, float *output, int chan float *inputData = input + blockIdx.x * channels; float *outputData = output + blockIdx.x * 2; int tid = threadIdx.x; + idData[tid] = tid; maxData[tid] = -1e100; for (int j = tid; j < channels; j += THREAD_PER_BLOCK) { if (inputData[j] > maxData[tid]) { @@ -1134,6 +1135,7 @@ __global__ void FastllmLayerNormKernelTopK(float *input, float *output, int K, i float *inputData = input + blockIdx.x * channels; float *outputData = output + blockIdx.x * 2 * K; int tid = threadIdx.x; + idData[tid][0] = tid; for (int i = 0; i < K; i++) { maxData[tid][i] = -1e100; }