Skip to content

Commit

Permalink
Draft: blockwise quantization
Browse files Browse the repository at this point in the history
Blockwise quantization: uint4 / int8 with 32 block size, fp16 delta.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
  • Loading branch information
hseok-oh committed Aug 19, 2024
1 parent f3d7383 commit 6287a5e
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 1 deletion.
2 changes: 2 additions & 0 deletions runtime/onert/core/include/ir/DataType.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ enum class DataType
QUANT_INT16_ASYMM = 10,
QUANT_INT8_SYMM_PER_CHANNEL = 11,
QUANT_INT16_SYMM = 12,
QUANT_UINT4_SYMM_BLOCK = 13, // Blockwise quantization - for Gather input and FC weight
QUANT_INT8_SYMM_BLOCK = 14 // Blockwise quantization - for FC weight
};

size_t sizeOfDataType(DataType data_type);
Expand Down
4 changes: 4 additions & 0 deletions runtime/onert/core/src/ir/DataType.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ size_t sizeOfDataType(DataType data_type)
return sizeof(int16_t);
case DataType::QUANT_INT16_SYMM:
return sizeof(int16_t);
case DataType::QUANT_UINT4_SYMM_BLOCK:
return sizeof(uint8_t) * 32 / 2 + sizeof(uint16_t);
case DataType::QUANT_INT8_SYMM_BLOCK:
return sizeof(uint8_t) * 32 + sizeof(uint16_t);
default:
throw std::runtime_error{"Unsupported type size"};
}
Expand Down
9 changes: 8 additions & 1 deletion runtime/onert/core/src/loader/BaseLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ template <typename LoaderDomain> class BaseLoader

// Helper functions
ir::Activation convertActivation(ActivationFunctionType type);
virtual ir::DataType getTensorDataType(const Tensor *tensor);
ir::DataType tensorTypeToDataType(TensorType type);
ir::OperandIndex tensorIdxToOperandIdx(int32_t tensorIdx);
flexbuffers::Map getCustomOpAttrMap(const Operator *op);
Expand Down Expand Up @@ -295,6 +296,12 @@ BaseLoader<LoaderDomain>::BaseLoader::convertActivation(const ActivationFunction
}
}

template <typename LoaderDomain>
ir::DataType BaseLoader<LoaderDomain>::BaseLoader::getTensorDataType(const Tensor *tensor)
{
return tensorTypeToDataType(tensor->type());
}

template <typename LoaderDomain>
ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const TensorType type)
{
Expand Down Expand Up @@ -381,7 +388,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
// be used.

// TypeInfo
ir::TypeInfo type_info(tensorTypeToDataType(tensor->type()));
ir::TypeInfo type_info(getTensorDataType(tensor));
loadQuantization(tensor, type_info);
loadSparsity(tensor, type_info);

Expand Down
14 changes: 14 additions & 0 deletions runtime/onert/core/src/loader/CircleLoader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ class CircleLoader final : public loader::BaseLoader<LoaderDomain>
}
}

protected:
ir::DataType getTensorDataType(const Tensor *tensor) override
{
auto type = tensor->type();
// Workaround: No quantization parameter for blockwize quantization
// Actual parameter(scale) for each block is in data
// TODO Handle custom quantization parameter to represent blockwise quantization
if (type == TensorType::TensorType_UINT4 && !tensor->quantization())
return ir::DataType::QUANT_UINT4_SYMM_BLOCK;
if (type == TensorType::TensorType_INT8 && !tensor->quantization())
return ir::DataType::QUANT_INT8_SYMM_BLOCK;
return tensorTypeToDataType(type);
}

private:
std::unique_ptr<ir::Graph> loadSubgraph(const circle::SubGraph *circle_subg) override
{
Expand Down

0 comments on commit 6287a5e

Please sign in to comment.