Skip to content

Commit

Permalink
Implement inverse of a float by lookup tables (#612)
Browse files Browse the repository at this point in the history
  • Loading branch information
linay-xsj authored Aug 31, 2023
1 parent 1bab3df commit 732f8be
Show file tree
Hide file tree
Showing 17 changed files with 334 additions and 47 deletions.
Binary file removed aie_runtime_lib/AIE/libexp_lut.a
Binary file not shown.
Binary file added aie_runtime_lib/AIE/liblut_based_ops.a
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
// This allows the user to use the "gather" read feature and read 4 values from
// 4 different banks at once Tables entries are in BF16
//
alignas(aie::vector_decl_align) int16 softmax_ilut_ab[512] = {
alignas(aie::vector_decl_align) int16 exp_ilut_ab[512] = {
16256, 16430, 16620, 16801, 16986, 17172, 17354, 17545, 16256, 16430, 16620,
16801, 16986, 17172, 17354, 17545, 17722, 17917, 18092, 18282, 18463, 18648,
18835, 19016, 17722, 17917, 18092, 18282, 18463, 18648, 18835, 19016, 19208,
Expand Down Expand Up @@ -68,7 +68,7 @@ alignas(aie::vector_decl_align) int16 softmax_ilut_ab[512] = {
14593, 14768, 14959, 15138, 15325, 15510, 15692, 15883, 16060, 14768, 14959,
15138, 15325, 15510, 15692, 15883, 16060};

alignas(aie::vector_decl_align) int16 softmax_ilut_cd[512] = {
alignas(aie::vector_decl_align) int16 exp_ilut_cd[512] = {
16256, 16430, 16620, 16801, 16986, 17172, 17354, 17545, 16256, 16430, 16620,
16801, 16986, 17172, 17354, 17545, 17722, 17917, 18092, 18282, 18463, 18648,
18835, 19016, 17722, 17917, 18092, 18282, 18463, 18648, 18835, 19016, 19208,
Expand Down Expand Up @@ -117,7 +117,7 @@ alignas(aie::vector_decl_align) int16 softmax_ilut_cd[512] = {
14593, 14768, 14959, 15138, 15325, 15510, 15692, 15883, 16060, 14768, 14959,
15138, 15325, 15510, 15692, 15883, 16060};

alignas(aie::vector_decl_align) int16 softmax_flut_ab[512] = {
alignas(aie::vector_decl_align) int16 exp_flut_ab[512] = {
16256, 16257, 16257, 16258, 16258, 16259, 16259, 16260, 16256, 16257, 16257,
16258, 16258, 16259, 16259, 16260, 16260, 16261, 16261, 16262, 16262, 16263,
16263, 16264, 16260, 16261, 16261, 16262, 16262, 16263, 16263, 16264, 16264,
Expand Down Expand Up @@ -166,7 +166,7 @@ alignas(aie::vector_decl_align) int16 softmax_flut_ab[512] = {
16424, 16425, 16425, 16426, 16427, 16427, 16428, 16429, 16429, 16425, 16425,
16426, 16427, 16427, 16428, 16429, 16429};

alignas(aie::vector_decl_align) int16 softmax_flut_cd[512] = {
alignas(aie::vector_decl_align) int16 exp_flut_cd[512] = {
16256, 16257, 16257, 16258, 16258, 16259, 16259, 16260, 16256, 16257, 16257,
16258, 16258, 16259, 16259, 16260, 16260, 16261, 16261, 16262, 16262, 16263,
16263, 16264, 16260, 16261, 16261, 16262, 16262, 16263, 16263, 16264, 16264,
Expand Down Expand Up @@ -214,3 +214,14 @@ alignas(aie::vector_decl_align) int16 softmax_flut_cd[512] = {
16422, 16423, 16423, 16424, 16419, 16420, 16421, 16421, 16422, 16423, 16423,
16424, 16425, 16425, 16426, 16427, 16427, 16428, 16429, 16429, 16425, 16425,
16426, 16427, 16427, 16428, 16429, 16429};

alignas(aie::vector_decl_align) unsigned char m_inv_lut[128] = {
0, 126, 124, 122, 120, 118, 117, 115, 113, 111, 109, 108, 106, 104, 103,
101, 100, 98, 96, 95, 93, 92, 90, 89, 88, 86, 85, 83, 82, 81,
79, 78, 77, 76, 74, 73, 72, 71, 69, 68, 67, 66, 65, 64, 63,
61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47,
46, 45, 44, 44, 43, 42, 41, 40, 39, 38, 37, 37, 36, 35, 34,
33, 33, 32, 31, 30, 30, 29, 28, 27, 27, 26, 25, 24, 24, 23,
22, 22, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 14, 14, 13,
13, 12, 11, 11, 10, 10, 9, 9, 8, 7, 7, 6, 6, 5, 5,
4, 4, 3, 3, 2, 2, 1, 1};
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,26 @@
// This is the implementation of getting exponential values for a bfloat16
// vector from exponential lookup tables.
//===----------------------------------------------------------------------===//
#ifndef __EXP_LUT_H__
#define __EXP_LUT_H__
#ifndef __LUT_BASED_OPS_H__
#define __LUT_BASED_OPS_H__

#include "aie_api/aie.hpp"

alignas(aie::vector_decl_align) extern int16 softmax_ilut_ab[512];
alignas(aie::vector_decl_align) extern int16 softmax_ilut_cd[512];
alignas(aie::vector_decl_align) extern int16 softmax_flut_ab[512];
alignas(aie::vector_decl_align) extern int16 softmax_flut_cd[512];
alignas(aie::vector_decl_align) extern int16 exp_ilut_ab[512];
alignas(aie::vector_decl_align) extern int16 exp_ilut_cd[512];
alignas(aie::vector_decl_align) extern int16 exp_flut_ab[512];
alignas(aie::vector_decl_align) extern int16 exp_flut_cd[512];
alignas(aie::vector_decl_align) extern unsigned char m_inv_lut[128];

__attribute__((always_inline)) v16accfloat getExpBf16(v16bfloat16 x) {
bfloat16 __aie_dm_resource_a *ilut_ab =
(bfloat16 __aie_dm_resource_a *)softmax_ilut_ab;
(bfloat16 __aie_dm_resource_a *)exp_ilut_ab;
bfloat16 __aie_dm_resource_b *ilut_cd =
(bfloat16 __aie_dm_resource_b *)softmax_ilut_cd;
(bfloat16 __aie_dm_resource_b *)exp_ilut_cd;
bfloat16 __aie_dm_resource_a *flut_ab =
(bfloat16 __aie_dm_resource_a *)softmax_flut_ab;
(bfloat16 __aie_dm_resource_a *)exp_flut_ab;
bfloat16 __aie_dm_resource_b *flut_cd =
(bfloat16 __aie_dm_resource_b *)softmax_flut_cd;
(bfloat16 __aie_dm_resource_b *)exp_flut_cd;

using lut_type = aie::lut<4, bfloat16, bfloat16>;
const int LUT_elems = 256;
Expand Down Expand Up @@ -58,4 +59,24 @@ __attribute__((always_inline)) v16accfloat getExpBf16(v16bfloat16 x) {
exp_val = aie::mul(I_val_vec, F_val_vec);
return v16accfloat(exp_val);
}
#endif //__EXP_LUT_H__

__attribute__((always_inline)) bfloat16 getInvBf16(float x) {
unsigned int *B_x;
unsigned int exp_mask = 0x7F800000;
unsigned int mantissa_mask = 0x007FFFFF;
unsigned int mantissa_Q = 0x00008000;
unsigned char exponent, mantissa;
unsigned inv_exponent;
unsigned short inv_x_val;
unsigned int B_Q;
bfloat16 *inv_x;
B_x = (unsigned int *)&x;
B_Q = *B_x + mantissa_Q;
exponent = (B_Q & exp_mask) >> 23;
mantissa = (B_Q & mantissa_mask) >> 16;
inv_exponent = (mantissa == 0) + (253 - exponent);
inv_x_val = (inv_exponent << 7) + m_inv_lut[mantissa];
inv_x = (bfloat16 *)&inv_x_val;
return *inv_x;
}
#endif //__LUT_BASED_OPS_H__
Binary file removed aie_runtime_lib/AIE2/libexp_lut.a
Binary file not shown.
Binary file added aie_runtime_lib/AIE2/liblut_based_ops.a
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
// This allows the user to use the "gather" read feature and read 4 values from
// 4 different banks at once Tables entries are in BF16
//
alignas(aie::vector_decl_align) int16 softmax_ilut_ab[512] = {
alignas(aie::vector_decl_align) int16 exp_ilut_ab[512] = {
16256, 16430, 16620, 16801, 16986, 17172, 17354, 17545, 16256, 16430, 16620,
16801, 16986, 17172, 17354, 17545, 17722, 17917, 18092, 18282, 18463, 18648,
18835, 19016, 17722, 17917, 18092, 18282, 18463, 18648, 18835, 19016, 19208,
Expand Down Expand Up @@ -68,7 +68,7 @@ alignas(aie::vector_decl_align) int16 softmax_ilut_ab[512] = {
14593, 14768, 14959, 15138, 15325, 15510, 15692, 15883, 16060, 14768, 14959,
15138, 15325, 15510, 15692, 15883, 16060};

alignas(aie::vector_decl_align) int16 softmax_ilut_cd[512] = {
alignas(aie::vector_decl_align) int16 exp_ilut_cd[512] = {
16256, 16430, 16620, 16801, 16986, 17172, 17354, 17545, 16256, 16430, 16620,
16801, 16986, 17172, 17354, 17545, 17722, 17917, 18092, 18282, 18463, 18648,
18835, 19016, 17722, 17917, 18092, 18282, 18463, 18648, 18835, 19016, 19208,
Expand Down Expand Up @@ -117,7 +117,7 @@ alignas(aie::vector_decl_align) int16 softmax_ilut_cd[512] = {
14593, 14768, 14959, 15138, 15325, 15510, 15692, 15883, 16060, 14768, 14959,
15138, 15325, 15510, 15692, 15883, 16060};

alignas(aie::vector_decl_align) int16 softmax_flut_ab[512] = {
alignas(aie::vector_decl_align) int16 exp_flut_ab[512] = {
16256, 16257, 16257, 16258, 16258, 16259, 16259, 16260, 16256, 16257, 16257,
16258, 16258, 16259, 16259, 16260, 16260, 16261, 16261, 16262, 16262, 16263,
16263, 16264, 16260, 16261, 16261, 16262, 16262, 16263, 16263, 16264, 16264,
Expand Down Expand Up @@ -166,7 +166,7 @@ alignas(aie::vector_decl_align) int16 softmax_flut_ab[512] = {
16424, 16425, 16425, 16426, 16427, 16427, 16428, 16429, 16429, 16425, 16425,
16426, 16427, 16427, 16428, 16429, 16429};

alignas(aie::vector_decl_align) int16 softmax_flut_cd[512] = {
alignas(aie::vector_decl_align) int16 exp_flut_cd[512] = {
16256, 16257, 16257, 16258, 16258, 16259, 16259, 16260, 16256, 16257, 16257,
16258, 16258, 16259, 16259, 16260, 16260, 16261, 16261, 16262, 16262, 16263,
16263, 16264, 16260, 16261, 16261, 16262, 16262, 16263, 16263, 16264, 16264,
Expand Down Expand Up @@ -214,3 +214,14 @@ alignas(aie::vector_decl_align) int16 softmax_flut_cd[512] = {
16422, 16423, 16423, 16424, 16419, 16420, 16421, 16421, 16422, 16423, 16423,
16424, 16425, 16425, 16426, 16427, 16427, 16428, 16429, 16429, 16425, 16425,
16426, 16427, 16427, 16428, 16429, 16429};

alignas(aie::vector_decl_align) unsigned char m_inv_lut[128] = {
0, 126, 124, 122, 120, 118, 117, 115, 113, 111, 109, 108, 106, 104, 103,
101, 100, 98, 96, 95, 93, 92, 90, 89, 88, 86, 85, 83, 82, 81,
79, 78, 77, 76, 74, 73, 72, 71, 69, 68, 67, 66, 65, 64, 63,
61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47,
46, 45, 44, 44, 43, 42, 41, 40, 39, 38, 37, 37, 36, 35, 34,
33, 33, 32, 31, 30, 30, 29, 28, 27, 27, 26, 25, 24, 24, 23,
22, 22, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 14, 14, 13,
13, 12, 11, 11, 10, 10, 9, 9, 8, 7, 7, 6, 6, 5, 5,
4, 4, 3, 3, 2, 2, 1, 1};
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,26 @@
// This is the implementation of getting exponential values for a bfloat16
// vector from exponential lookup tables.
//===----------------------------------------------------------------------===//
#ifndef __EXP_LUT_H__
#define __EXP_LUT_H__
#ifndef __LUT_BASED_OPS_H__
#define __LUT_BASED_OPS_H__

#include "aie_api/aie.hpp"

alignas(aie::vector_decl_align) extern int16 softmax_ilut_ab[512];
alignas(aie::vector_decl_align) extern int16 softmax_ilut_cd[512];
alignas(aie::vector_decl_align) extern int16 softmax_flut_ab[512];
alignas(aie::vector_decl_align) extern int16 softmax_flut_cd[512];
alignas(aie::vector_decl_align) extern int16 exp_ilut_ab[512];
alignas(aie::vector_decl_align) extern int16 exp_ilut_cd[512];
alignas(aie::vector_decl_align) extern int16 exp_flut_ab[512];
alignas(aie::vector_decl_align) extern int16 exp_flut_cd[512];
alignas(aie::vector_decl_align) extern unsigned char m_inv_lut[128];

__attribute__((always_inline)) v16accfloat getExpBf16(v16bfloat16 x) {
bfloat16 __aie_dm_resource_a *ilut_ab =
(bfloat16 __aie_dm_resource_a *)softmax_ilut_ab;
(bfloat16 __aie_dm_resource_a *)exp_ilut_ab;
bfloat16 __aie_dm_resource_b *ilut_cd =
(bfloat16 __aie_dm_resource_b *)softmax_ilut_cd;
(bfloat16 __aie_dm_resource_b *)exp_ilut_cd;
bfloat16 __aie_dm_resource_a *flut_ab =
(bfloat16 __aie_dm_resource_a *)softmax_flut_ab;
(bfloat16 __aie_dm_resource_a *)exp_flut_ab;
bfloat16 __aie_dm_resource_b *flut_cd =
(bfloat16 __aie_dm_resource_b *)softmax_flut_cd;
(bfloat16 __aie_dm_resource_b *)exp_flut_cd;

using lut_type = aie::lut<4, bfloat16, bfloat16>;
const int LUT_elems = 256;
Expand Down Expand Up @@ -58,4 +59,24 @@ __attribute__((always_inline)) v16accfloat getExpBf16(v16bfloat16 x) {
exp_val = aie::mul(I_val_vec, F_val_vec);
return v16accfloat(exp_val);
}
#endif //__EXP_LUT_H__

__attribute__((always_inline)) bfloat16 getInvBf16(float x) {
unsigned int *B_x;
unsigned int exp_mask = 0x7F800000;
unsigned int mantissa_mask = 0x007FFFFF;
unsigned int mantissa_Q = 0x00008000;
unsigned char exponent, mantissa;
unsigned inv_exponent;
unsigned short inv_x_val;
unsigned int B_Q;
bfloat16 *inv_x;
B_x = (unsigned int *)&x;
B_Q = *B_x + mantissa_Q;
exponent = (B_Q & exp_mask) >> 23;
mantissa = (B_Q & mantissa_mask) >> 16;
inv_exponent = (mantissa == 0) + (253 - exponent);
inv_x_val = (inv_exponent << 7) + m_inv_lut[mantissa];
inv_x = (bfloat16 *)&inv_x_val;
return *inv_x;
}
#endif //__LUT_BASED_OPS_H__
6 changes: 3 additions & 3 deletions aie_runtime_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ function(add_aie_runtime_libs arch)

set(INSTALLS
chess_intrinsic_wrapper.cpp
exp_lut.cpp
exp_lut.h
libexp_lut.a)
lut_based_ops.cpp
lut_based_ops.h
liblut_based_ops.a)

foreach(file ${INSTALLS})
add_custom_target(aie-copy-${arch}-runtime-libs-${file} ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${file})
Expand Down
Loading

0 comments on commit 732f8be

Please sign in to comment.