Skip to content

Commit

Permalink
Refactoring and clean-up
Browse files Browse the repository at this point in the history
This patch includes minor refactoring to improve overall code quality.

ONE-DCO-1.0-Signed-off-by: y01000.you <[email protected]>
  • Loading branch information
y01000.you committed Oct 31, 2024
1 parent 313d3b8 commit 177a158
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,19 @@
#ifndef __LUCI_QUANTIZE_WEIGHTS_WITH_GPTQ_PASS_H__
#define __LUCI_QUANTIZE_WEIGHTS_WITH_GPTQ_PASS_H__

#include <loco.h>
#include <luci/Pass/QuantizationParameters.h>
#include <luci/IR/CircleNode.h>

#include <logo/Pass.h>
#include <loco.h>

#include <luci/Pass/QuantizationParameters.h>
#include <luci/IR/CircleNode.h>
#include <unordered_map>

namespace luci
{

using HessianMap = std::unordered_map<const luci::CircleNode *, std::vector<float>>;

/**
* @brief Pass to quantize weights
*/
Expand All @@ -48,9 +50,7 @@ class QuantizeWeightsWithGPTQPass : public logo::Pass
// DO NOTHING
}

QuantizeWeightsWithGPTQPass(
std::unique_ptr<Context> &&ctx,
std::unordered_map<const luci::CircleNode *, std::vector<float>> *hessian_map)
QuantizeWeightsWithGPTQPass(std::unique_ptr<Context> &&ctx, HessianMap *hessian_map)
: _ctx{std::move(ctx)}, _hessian_map{hessian_map}
{
// DO NOTHING
Expand All @@ -74,7 +74,7 @@ class QuantizeWeightsWithGPTQPass : public logo::Pass

private:
std::unique_ptr<Context> _ctx;
std::unordered_map<const luci::CircleNode *, std::vector<float>> *_hessian_map;
HessianMap *_hessian_map;
};

} // namespace luci
Expand Down
40 changes: 20 additions & 20 deletions compiler/luci/pass/src/QuantizeWeightsWithGPTQPass.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,22 +29,23 @@
#include <functional>
#include <limits>

namespace
namespace luci
{

using namespace luci;
using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;

void iterate_per_channel_with_order(CircleConst *node, IterFunc func, bool reverse)
{
assert(node != nullptr);

loco::TensorShape dimension;
dimension.rank(4);
uint32_t indices[4] = {0};
int32_t channel_dim_index{0};
uint32_t num_dims[4];
if (!get_channel_dim_index(node, dimension, channel_dim_index))
{
throw std::runtime_error("Failed to get channel dim index.");
throw std::runtime_error("GPTQPass: Failed to get channel dim index.");
}

auto order = reverse ? std::vector<size_t>{3, 1, 2, 0} : std::vector<size_t>{0, 1, 2, 3};
Expand Down Expand Up @@ -73,19 +74,14 @@ void iterate_per_channel_with_order(CircleConst *node, IterFunc func, bool rever
}
}

} // namespace

namespace luci
{

namespace
{

size_t calculate_qauntized_value(CircleConst *node, uint32_t *indices, loco::TensorShape &dimension,
int channel_dim_index, std::vector<float> &scaling_factor,
std::vector<float> &max, std::vector<float> &min)
{

int channel_idx = indices[channel_dim_index];

assert(scaling_factor[channel_idx] > 0);
const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
auto data_clipped = data < min[channel_idx] ? min[channel_idx] : data;
Expand All @@ -109,7 +105,7 @@ void cholesky_decomposition(std::vector<float> &src, uint32_t num_size)
{
if (src[i * num_size + i] - sum <= 0)
{
std::cout << "Error: Matrix is not positive definite.\n" << std::endl;
std::cout << "Error: Matrix is not positive definite." << std::endl;
return;
}
src[i * num_size + i] = sqrt(src[i * num_size + i] - sum);
Expand Down Expand Up @@ -143,6 +139,7 @@ void forward_substitution(const std::vector<float> &L, const std::vector<float>
{
y[i] -= L[i * num_size + j] * y[j];
}
assert(L[i * num_size + i] > 0);
y[i] /= L[i * num_size + i];
}
}
Expand All @@ -157,6 +154,7 @@ void backward_substitution(const std::vector<float> &U, const std::vector<float>
{
x[i] -= U[i * num_size + j] * x[j];
}
assert(U[i * num_size + i] > 0);
x[i] /= U[i * num_size + i];
}
}
Expand Down Expand Up @@ -262,10 +260,13 @@ void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t
const double qmax_double = kMaxScale;
const double rmin = std::fmin(0, min);
const double rmax = std::fmax(0, max);
const double qrange = qmax_double - qmin_double;
assert(qrange > 0);

double scale = (rmax - rmin) / (qmax_double - qmin_double);
double scale = (rmax - rmin) / qrange;
double zero_point_double = 0;
uint8_t nudged_zero_point = 0;

if (scale == 0)
{
WARN(l) << "The minimum and maximum values are the same." << std::endl;
Expand All @@ -280,15 +281,15 @@ void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t
{
assert(min >= 0 && max >= 0);
nudged_zero_point = kMinScale;
scale = max / (qmax_double - qmin_double);
scale = max / qrange;
if (min > 0 && max > 0)
WARN(l) << "The minimum and maximum values are all positive." << std::endl;
}
else if (max < 0)
{
assert(min < 0 && max < 0);
nudged_zero_point = kMaxScale;
scale = -min / (qmax_double - qmin_double);
scale = -min / qrange;
WARN(l) << "The minimum and maximum values are all negative." << std::endl;
}
else
Expand Down Expand Up @@ -333,15 +334,15 @@ void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
kMaxScale);
}

if (hessian.empty()) // Cases where gptq is not applied
if (hessian.empty()) // Cases where GPTQ is not applied
{
quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
quantized_values[cal_offset(dimension, indices)] = calculate_qauntized_value(
node, indices, dimension, channel_dim_index, scaling_factor, nudged_max, nudged_min);
};
iterate_per_channel_with_order(node, quantize, false);
}
else // Cases where gptq is applied
else // Cases where GPTQ is applied
{
uint32_t size_hessian = static_cast<uint32_t>(sqrt(hessian.size()));
float percdamp = .01;
Expand All @@ -364,7 +365,7 @@ void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
cholesky_inverse(hessian, size_hessian);
cholesky_decomposition(hessian, size_hessian);

// transpose hessian to make upper trangular
// transpose hessian to make upper triangular
for (uint32_t i = 0; i < size_hessian; i++)
{
for (uint32_t j = 0; j < i; j++)
Expand Down Expand Up @@ -492,7 +493,6 @@ struct QuantizeWeightsWithGPTQ final : public luci::CircleNodeMutableVisitor<voi
private:
void fake_quantize_cwq(luci::CircleConst *weights, std::vector<float> &hessian) const
{
// assert(output_type == loco::DataType::U8); // FIX_CALLER_UNLESS
if (output_type != loco::DataType::U8)
{
throw std::runtime_error("GPTQ quantization supports u8");
Expand Down

0 comments on commit 177a158

Please sign in to comment.