From 6287a5e0bf8b28ec0d463a7b2f23b4e1e610f89f Mon Sep 17 00:00:00 2001 From: Hyeongseok Oh Date: Mon, 19 Aug 2024 14:39:44 +0900 Subject: [PATCH] Draft: blockwise quantization Blockwise quantization: uint4 / int8 with 32 block size, fp16 delta. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh --- runtime/onert/core/include/ir/DataType.h | 2 ++ runtime/onert/core/src/ir/DataType.cc | 4 ++++ runtime/onert/core/src/loader/BaseLoader.h | 9 ++++++++- runtime/onert/core/src/loader/CircleLoader.cc | 14 ++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h index 0ec0e07119e..b269af8effe 100644 --- a/runtime/onert/core/include/ir/DataType.h +++ b/runtime/onert/core/include/ir/DataType.h @@ -39,6 +39,8 @@ enum class DataType QUANT_INT16_ASYMM = 10, QUANT_INT8_SYMM_PER_CHANNEL = 11, QUANT_INT16_SYMM = 12, + QUANT_UINT4_SYMM_BLOCK = 13, // Blockwise quantization - for Gather input and FC weight + QUANT_INT8_SYMM_BLOCK = 14 // Blockwise quantization - for FC weight }; size_t sizeOfDataType(DataType data_type); diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc index 07670c72081..5a5ab705233 100644 --- a/runtime/onert/core/src/ir/DataType.cc +++ b/runtime/onert/core/src/ir/DataType.cc @@ -52,6 +52,10 @@ size_t sizeOfDataType(DataType data_type) return sizeof(int16_t); case DataType::QUANT_INT16_SYMM: return sizeof(int16_t); + case DataType::QUANT_UINT4_SYMM_BLOCK: + return sizeof(uint8_t) * 32 / 2 + sizeof(uint16_t); + case DataType::QUANT_INT8_SYMM_BLOCK: + return sizeof(uint8_t) * 32 + sizeof(uint16_t); default: throw std::runtime_error{"Unsupported type size"}; } diff --git a/runtime/onert/core/src/loader/BaseLoader.h b/runtime/onert/core/src/loader/BaseLoader.h index c3a50b0d8c9..e66f90ef4fc 100644 --- a/runtime/onert/core/src/loader/BaseLoader.h +++ b/runtime/onert/core/src/loader/BaseLoader.h @@ -94,6 +94,7 @@ template class BaseLoader // Helper functions ir::Activation convertActivation(ActivationFunctionType type); + virtual ir::DataType getTensorDataType(const Tensor *tensor); ir::DataType tensorTypeToDataType(TensorType type); ir::OperandIndex tensorIdxToOperandIdx(int32_t tensorIdx); flexbuffers::Map getCustomOpAttrMap(const Operator *op); @@ -295,6 +296,12 @@ BaseLoader::BaseLoader::convertActivation(const ActivationFunction } } +template +ir::DataType BaseLoader::BaseLoader::getTensorDataType(const Tensor *tensor) +{ + return tensorTypeToDataType(tensor->type()); +} + template ir::DataType BaseLoader::BaseLoader::tensorTypeToDataType(const TensorType type) { @@ -381,7 +388,7 @@ ir::OperandIndex BaseLoader::loadOperand(const Tensor *tensor, ir: // be used. // TypeInfo - ir::TypeInfo type_info(tensorTypeToDataType(tensor->type())); + ir::TypeInfo type_info(getTensorDataType(tensor)); loadQuantization(tensor, type_info); loadSparsity(tensor, type_info); diff --git a/runtime/onert/core/src/loader/CircleLoader.cc b/runtime/onert/core/src/loader/CircleLoader.cc index 1d502308bea..f0778ffe1bf 100644 --- a/runtime/onert/core/src/loader/CircleLoader.cc +++ b/runtime/onert/core/src/loader/CircleLoader.cc @@ -87,6 +87,20 @@ class CircleLoader final : public loader::BaseLoader } } +protected: + ir::DataType getTensorDataType(const Tensor *tensor) override + { + auto type = tensor->type(); + // Workaround: No quantization parameter for blockwize quantization + // Actual parameter(scale) for each block is in data + // TODO Handle custom quantization parameter to represent blockwise quantization + if (type == TensorType::TensorType_UINT4 && !tensor->quantization()) + return ir::DataType::QUANT_UINT4_SYMM_BLOCK; + if (type == TensorType::TensorType_INT8 && !tensor->quantization()) + return ir::DataType::QUANT_INT8_SYMM_BLOCK; + return tensorTypeToDataType(type); + } + private: std::unique_ptr loadSubgraph(const circle::SubGraph *circle_subg) override {