Skip to content

Commit

Permalink
one dim dequant
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Jan 22, 2025
1 parent 21a71d3 commit 2165601
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 212 deletions.
194 changes: 52 additions & 142 deletions src/layer/dequantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,170 +46,80 @@ int Dequantize::load_model(const ModelBin& mb)
return 0;
}

static void dequantize(const int* intptr, float* ptr, float scale, float bias, int size)
{
if (bias == 0.f)
{
for (int i = 0; i < size; i++)
{
*ptr = *intptr * scale;
intptr++;
ptr++;
}
}
else
{
for (int i = 0; i < size; i++)
{
*ptr = *intptr * scale + bias;
intptr++;
ptr++;
}
}
}

int Dequantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
int dims = bottom_blob.dims;
const int dims = bottom_blob.dims;
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int channels = bottom_blob.c;

top_blob.create_like(bottom_blob, opt.blob_allocator);
if (top_blob.empty())
return -100;

if (dims == 1)
{
int w = bottom_blob.w;

top_blob.create(w, (size_t)4u, opt.blob_allocator);
if (top_blob.empty())
return -100;
// assert scale_data_size == 1
// assert bias_data_size == 0 || bias_data_size == 1

const int* intptr = bottom_blob;
float* ptr = top_blob;

if (scale_data_size == 1)
{
const float scale = scale_data[0];

if (bias_data_size == 0)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < w; i++)
{
ptr[i] = intptr[i] * scale;
}
}
else if (bias_data_size == 1)
{
const float bias = bias_data[0];

#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < w; i++)
{
ptr[i] = intptr[i] * scale + bias;
}
}
else
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < w; i++)
{
ptr[i] = intptr[i] * scale + bias_data[i];
}
}
}
else
{
if (bias_data_size == 0)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < w; i++)
{
ptr[i] = intptr[i] * scale_data[i];
}
}
else if (bias_data_size == 1)
{
const float bias = bias_data[0];

#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < w; i++)
{
ptr[i] = intptr[i] * scale_data[i] + bias;
}
}
else
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < w; i++)
{
ptr[i] = intptr[i] * scale_data[i] + bias_data[i];
}
}
}
const float scale = scale_data[0];
const float bias = bias_data_size == 0 ? 0.f : bias_data[0];

dequantize(intptr, ptr, scale, bias, w);
}

if (dims == 2)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < h; i++)
{
const int* intptr = bottom_blob.row<const int>(i);
float* ptr = top_blob.row(i);

top_blob.create(w, h, (size_t)4u, opt.blob_allocator);
if (top_blob.empty())
return -100;
const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[i];
const float bias = bias_data_size == 0 ? 0.f : bias_data_size == 1 ? bias_data[0] : bias_data[i];

if (bias_data_size == 0)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < h; i++)
{
const int* intptr = bottom_blob.row<const int>(i);
float* ptr = top_blob.row(i);

const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[i];

for (int j = 0; j < w; j++)
{
ptr[j] = intptr[j] * scale;
}
}
}
else
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < h; i++)
{
const int* intptr = bottom_blob.row<const int>(i);
float* ptr = top_blob.row(i);

const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[i];
const float bias = bias_data_size == 1 ? bias_data[0] : bias_data[i];

for (int j = 0; j < w; j++)
{
ptr[j] = intptr[j] * scale + bias;
}
}
dequantize(intptr, ptr, scale, bias, w);
}
}

if (dims == 3)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
const int* intptr = bottom_blob.channel(q);
float* ptr = top_blob.channel(q);

top_blob.create(w, h, channels, (size_t)4u, opt.blob_allocator);
if (top_blob.empty())
return -100;
const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[q];
const float bias = bias_data_size == 0 ? 0.f : bias_data_size == 1 ? bias_data[0] : bias_data[q];

if (bias_data_size == 0)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
const int* intptr = bottom_blob.channel(q);
float* ptr = top_blob.channel(q);

const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[q];

for (int i = 0; i < size; i++)
{
ptr[i] = intptr[i] * scale;
}
}
}
else
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
const int* intptr = bottom_blob.channel(q);
float* ptr = top_blob.channel(q);

const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[q];
const float bias = bias_data_size == 1 ? bias_data[0] : bias_data[q];

for (int i = 0; i < size; i++)
{
ptr[i] = intptr[i] * scale + bias;
}
}
dequantize(intptr, ptr, scale, bias, w * h);
}
}

Expand Down
56 changes: 3 additions & 53 deletions src/layer/x86/dequantize_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,62 +261,12 @@ int Dequantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
const int* intptr = (const int*)bottom_blob + i * elempack;
float* ptr = (float*)top_blob + i * elempack;

const float* scale_ptr = scale_data_size > 1 ? (const float*)scale_data + i * elempack : scale_data;
const float* bias_ptr = bias_data_size > 1 ? (const float*)bias_data + i * elempack : bias_data;
// assert scale_data_size == 1
// assert bias_data_size == 0 || bias_data_size == 1

const int size = std::min(w - i, wp) * elempack;

if (scale_data_size == 1)
{
const float scale = scale_ptr[0];
if (bias_data_size == 0)
{
for (int j = 0; j < size; j++)
{
ptr[j] = intptr[j] * scale;
}
}
else if (bias_data_size == 1)
{
const float bias = bias_ptr[0];
for (int j = 0; j < size; j++)
{
ptr[j] = intptr[j] * scale + bias;
}
}
else
{
for (int j = 0; j < size; j++)
{
ptr[j] = intptr[j] * scale + bias_ptr[j];
}
}
}
else
{
if (bias_data_size == 0)
{
for (int j = 0; j < size; j++)
{
ptr[j] = intptr[j] * scale_ptr[j];
}
}
else if (bias_data_size == 1)
{
const float bias = bias_ptr[0];
for (int j = 0; j < size; j++)
{
ptr[j] = intptr[j] * scale_ptr[j] + bias;
}
}
else
{
for (int j = 0; j < size; j++)
{
ptr[j] = intptr[j] * scale_ptr[j] + bias_ptr[j];
}
}
}
dequantize(intptr, ptr, scale_data, bias_data, size, 1);
}
}

Expand Down
18 changes: 1 addition & 17 deletions tests/test_dequantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,30 +96,14 @@ static int test_dequantize_1()
static int test_dequantize_2()
{
return 0
|| test_dequantize(RandomIntMat(128), 1, 128)
|| test_dequantize(RandomIntMat(128), 1, 1)
|| test_dequantize(RandomIntMat(128), 1, 0)
|| test_dequantize(RandomIntMat(128), 128, 128)
|| test_dequantize(RandomIntMat(128), 128, 1)
|| test_dequantize(RandomIntMat(128), 128, 0)
|| test_dequantize(RandomIntMat(120), 1, 120)
|| test_dequantize(RandomIntMat(120), 1, 1)
|| test_dequantize(RandomIntMat(120), 1, 0)
|| test_dequantize(RandomIntMat(120), 120, 120)
|| test_dequantize(RandomIntMat(120), 120, 1)
|| test_dequantize(RandomIntMat(120), 120, 0)
|| test_dequantize(RandomIntMat(124), 1, 124)
|| test_dequantize(RandomIntMat(124), 1, 1)
|| test_dequantize(RandomIntMat(124), 1, 0)
|| test_dequantize(RandomIntMat(124), 124, 124)
|| test_dequantize(RandomIntMat(124), 124, 1)
|| test_dequantize(RandomIntMat(124), 124, 0)
|| test_dequantize(RandomIntMat(127), 1, 127)
|| test_dequantize(RandomIntMat(127), 1, 1)
|| test_dequantize(RandomIntMat(127), 1, 0)
|| test_dequantize(RandomIntMat(127), 127, 127)
|| test_dequantize(RandomIntMat(127), 127, 1)
|| test_dequantize(RandomIntMat(127), 127, 0);
|| test_dequantize(RandomIntMat(127), 1, 0);
}

int main()
Expand Down

0 comments on commit 2165601

Please sign in to comment.