one dim dequant

Tencent · Jan 22, 2025 · 2165601 · 2165601
1 parent 21a71d3
commit 2165601
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 212 deletions.
diff --git a/src/layer/dequantize.cpp b/src/layer/dequantize.cpp
@@ -46,170 +46,80 @@ int Dequantize::load_model(const ModelBin& mb)
     return 0;
 }
 
+static void dequantize(const int* intptr, float* ptr, float scale, float bias, int size)
+{
+    if (bias == 0.f)
+    {
+        for (int i = 0; i < size; i++)
+        {
+            *ptr = *intptr * scale;
+            intptr++;
+            ptr++;
+        }
+    }
+    else
+    {
+        for (int i = 0; i < size; i++)
+        {
+            *ptr = *intptr * scale + bias;
+            intptr++;
+            ptr++;
+        }
+    }
+}
+
 int Dequantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
-    int dims = bottom_blob.dims;
+    const int dims = bottom_blob.dims;
+    const int w = bottom_blob.w;
+    const int h = bottom_blob.h;
+    const int channels = bottom_blob.c;
+
+    top_blob.create_like(bottom_blob, opt.blob_allocator);
+    if (top_blob.empty())
+        return -100;
 
     if (dims == 1)
     {
-        int w = bottom_blob.w;
-
-        top_blob.create(w, (size_t)4u, opt.blob_allocator);
-        if (top_blob.empty())
-            return -100;
+        // assert scale_data_size == 1
+        // assert bias_data_size == 0 || bias_data_size == 1
 
         const int* intptr = bottom_blob;
         float* ptr = top_blob;
 
-        if (scale_data_size == 1)
-        {
-            const float scale = scale_data[0];
-
-            if (bias_data_size == 0)
-            {
-                #pragma omp parallel for num_threads(opt.num_threads)
-                for (int i = 0; i < w; i++)
-                {
-                    ptr[i] = intptr[i] * scale;
-                }
-            }
-            else if (bias_data_size == 1)
-            {
-                const float bias = bias_data[0];
-
-                #pragma omp parallel for num_threads(opt.num_threads)
-                for (int i = 0; i < w; i++)
-                {
-                    ptr[i] = intptr[i] * scale + bias;
-                }
-            }
-            else
-            {
-                #pragma omp parallel for num_threads(opt.num_threads)
-                for (int i = 0; i < w; i++)
-                {
-                    ptr[i] = intptr[i] * scale + bias_data[i];
-                }
-            }
-        }
-        else
-        {
-            if (bias_data_size == 0)
-            {
-                #pragma omp parallel for num_threads(opt.num_threads)
-                for (int i = 0; i < w; i++)
-                {
-                    ptr[i] = intptr[i] * scale_data[i];
-                }
-            }
-            else if (bias_data_size == 1)
-            {
-                const float bias = bias_data[0];
-
-                #pragma omp parallel for num_threads(opt.num_threads)
-                for (int i = 0; i < w; i++)
-                {
-                    ptr[i] = intptr[i] * scale_data[i] + bias;
-                }
-            }
-            else
-            {
-                #pragma omp parallel for num_threads(opt.num_threads)
-                for (int i = 0; i < w; i++)
-                {
-                    ptr[i] = intptr[i] * scale_data[i] + bias_data[i];
-                }
-            }
-        }
+        const float scale = scale_data[0];
+        const float bias = bias_data_size == 0 ? 0.f : bias_data[0];
+
+        dequantize(intptr, ptr, scale, bias, w);
     }
 
     if (dims == 2)
     {
-        int w = bottom_blob.w;
-        int h = bottom_blob.h;
+        #pragma omp parallel for num_threads(opt.num_threads)
+        for (int i = 0; i < h; i++)
+        {
+            const int* intptr = bottom_blob.row<const int>(i);
+            float* ptr = top_blob.row(i);
 
-        top_blob.create(w, h, (size_t)4u, opt.blob_allocator);
-        if (top_blob.empty())
-            return -100;
+            const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[i];
+            const float bias = bias_data_size == 0 ? 0.f : bias_data_size == 1 ? bias_data[0] : bias_data[i];
 
-        if (bias_data_size == 0)
-        {
-            #pragma omp parallel for num_threads(opt.num_threads)
-            for (int i = 0; i < h; i++)
-            {
-                const int* intptr = bottom_blob.row<const int>(i);
-                float* ptr = top_blob.row(i);
-
-                const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[i];
-
-                for (int j = 0; j < w; j++)
-                {
-                    ptr[j] = intptr[j] * scale;
-                }
-            }
-        }
-        else
-        {
-            #pragma omp parallel for num_threads(opt.num_threads)
-            for (int i = 0; i < h; i++)
-            {
-                const int* intptr = bottom_blob.row<const int>(i);
-                float* ptr = top_blob.row(i);
-
-                const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[i];
-                const float bias = bias_data_size == 1 ? bias_data[0] : bias_data[i];
-
-                for (int j = 0; j < w; j++)
-                {
-                    ptr[j] = intptr[j] * scale + bias;
-                }
-            }
+            dequantize(intptr, ptr, scale, bias, w);
         }
     }
 
     if (dims == 3)
     {
-        int w = bottom_blob.w;
-        int h = bottom_blob.h;
-        int channels = bottom_blob.c;
-        int size = w * h;
+        #pragma omp parallel for num_threads(opt.num_threads)
+        for (int q = 0; q < channels; q++)
+        {
+            const int* intptr = bottom_blob.channel(q);
+            float* ptr = top_blob.channel(q);
 
-        top_blob.create(w, h, channels, (size_t)4u, opt.blob_allocator);
-        if (top_blob.empty())
-            return -100;
+            const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[q];
+            const float bias = bias_data_size == 0 ? 0.f : bias_data_size == 1 ? bias_data[0] : bias_data[q];
 
-        if (bias_data_size == 0)
-        {
-            #pragma omp parallel for num_threads(opt.num_threads)
-            for (int q = 0; q < channels; q++)
-            {
-                const int* intptr = bottom_blob.channel(q);
-                float* ptr = top_blob.channel(q);
-
-                const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[q];
-
-                for (int i = 0; i < size; i++)
-                {
-                    ptr[i] = intptr[i] * scale;
-                }
-            }
-        }
-        else
-        {
-            #pragma omp parallel for num_threads(opt.num_threads)
-            for (int q = 0; q < channels; q++)
-            {
-                const int* intptr = bottom_blob.channel(q);
-                float* ptr = top_blob.channel(q);
-
-                const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[q];
-                const float bias = bias_data_size == 1 ? bias_data[0] : bias_data[q];
-
-                for (int i = 0; i < size; i++)
-                {
-                    ptr[i] = intptr[i] * scale + bias;
-                }
-            }
+            dequantize(intptr, ptr, scale, bias, w * h);
         }
     }
 

diff --git a/src/layer/x86/dequantize_x86.cpp b/src/layer/x86/dequantize_x86.cpp
@@ -261,62 +261,12 @@ int Dequantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
             const int* intptr = (const int*)bottom_blob + i * elempack;
             float* ptr = (float*)top_blob + i * elempack;
 
-            const float* scale_ptr = scale_data_size > 1 ? (const float*)scale_data + i * elempack : scale_data;
-            const float* bias_ptr = bias_data_size > 1 ? (const float*)bias_data + i * elempack : bias_data;
+            // assert scale_data_size == 1
+            // assert bias_data_size == 0 || bias_data_size == 1
 
             const int size = std::min(w - i, wp) * elempack;
 
-            if (scale_data_size == 1)
-            {
-                const float scale = scale_ptr[0];
-                if (bias_data_size == 0)
-                {
-                    for (int j = 0; j < size; j++)
-                    {
-                        ptr[j] = intptr[j] * scale;
-                    }
-                }
-                else if (bias_data_size == 1)
-                {
-                    const float bias = bias_ptr[0];
-                    for (int j = 0; j < size; j++)
-                    {
-                        ptr[j] = intptr[j] * scale + bias;
-                    }
-                }
-                else
-                {
-                    for (int j = 0; j < size; j++)
-                    {
-                        ptr[j] = intptr[j] * scale + bias_ptr[j];
-                    }
-                }
-            }
-            else
-            {
-                if (bias_data_size == 0)
-                {
-                    for (int j = 0; j < size; j++)
-                    {
-                        ptr[j] = intptr[j] * scale_ptr[j];
-                    }
-                }
-                else if (bias_data_size == 1)
-                {
-                    const float bias = bias_ptr[0];
-                    for (int j = 0; j < size; j++)
-                    {
-                        ptr[j] = intptr[j] * scale_ptr[j] + bias;
-                    }
-                }
-                else
-                {
-                    for (int j = 0; j < size; j++)
-                    {
-                        ptr[j] = intptr[j] * scale_ptr[j] + bias_ptr[j];
-                    }
-                }
-            }
+            dequantize(intptr, ptr, scale_data, bias_data, size, 1);
         }
     }
 

diff --git a/tests/test_dequantize.cpp b/tests/test_dequantize.cpp
@@ -96,30 +96,14 @@ static int test_dequantize_1()
 static int test_dequantize_2()
 {
     return 0
-           || test_dequantize(RandomIntMat(128), 1, 128)
            || test_dequantize(RandomIntMat(128), 1, 1)
            || test_dequantize(RandomIntMat(128), 1, 0)
-           || test_dequantize(RandomIntMat(128), 128, 128)
-           || test_dequantize(RandomIntMat(128), 128, 1)
-           || test_dequantize(RandomIntMat(128), 128, 0)
-           || test_dequantize(RandomIntMat(120), 1, 120)
            || test_dequantize(RandomIntMat(120), 1, 1)
            || test_dequantize(RandomIntMat(120), 1, 0)
-           || test_dequantize(RandomIntMat(120), 120, 120)
-           || test_dequantize(RandomIntMat(120), 120, 1)
-           || test_dequantize(RandomIntMat(120), 120, 0)
-           || test_dequantize(RandomIntMat(124), 1, 124)
            || test_dequantize(RandomIntMat(124), 1, 1)
            || test_dequantize(RandomIntMat(124), 1, 0)
-           || test_dequantize(RandomIntMat(124), 124, 124)
-           || test_dequantize(RandomIntMat(124), 124, 1)
-           || test_dequantize(RandomIntMat(124), 124, 0)
-           || test_dequantize(RandomIntMat(127), 1, 127)
            || test_dequantize(RandomIntMat(127), 1, 1)
-           || test_dequantize(RandomIntMat(127), 1, 0)
-           || test_dequantize(RandomIntMat(127), 127, 127)
-           || test_dequantize(RandomIntMat(127), 127, 1)
-           || test_dequantize(RandomIntMat(127), 127, 0);
+           || test_dequantize(RandomIntMat(127), 1, 0);
 }
 
 int main()