From f3a904fe5293f93cd4f0488af1a4ad21df281982 Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Wed, 30 Aug 2023 12:34:43 +0200 Subject: [PATCH] CVFPU 0.8.1 vendorized for bugs correction - Fix Underflow flag for MUL and DIV/SQRT operations (#94 #726 #729) - Fix for Float to Int conversion (#97 #83 #727) - Fixed unnecessary trailing semicolon (#99) Signed-off-by: Pascal Gouedo --- rtl/vendor/pulp_platform_fpnew.lock.hjson | 2 +- rtl/vendor/pulp_platform_fpnew.vendor.hjson | 2 +- rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv | 6 +++++- rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv | 2 +- rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv | 4 +++- rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv | 6 ++++-- .../E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v | 2 +- 7 files changed, 16 insertions(+), 8 deletions(-) diff --git a/rtl/vendor/pulp_platform_fpnew.lock.hjson b/rtl/vendor/pulp_platform_fpnew.lock.hjson index e150bcb2f..de40549d5 100644 --- a/rtl/vendor/pulp_platform_fpnew.lock.hjson +++ b/rtl/vendor/pulp_platform_fpnew.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/pulp-platform/fpnew.git - rev: 11659d7ff3580ac3226c6d56a90ef717cdc530e3 + rev: 79e453139072df42c9ec8f697132ba485d74e23d } } diff --git a/rtl/vendor/pulp_platform_fpnew.vendor.hjson b/rtl/vendor/pulp_platform_fpnew.vendor.hjson index e76745d51..1fe09cca5 100644 --- a/rtl/vendor/pulp_platform_fpnew.vendor.hjson +++ b/rtl/vendor/pulp_platform_fpnew.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/pulp-platform/fpnew.git", - rev: "11659d7ff3580ac3226c6d56a90ef717cdc530e3", + rev: "79e453139072df42c9ec8f697132ba485d74e23d", }, exclude_from_upstream: [ diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv index 964ef7429..7abe33043 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv @@ -443,7 +443,11 @@ module fpnew_cast_multi #( // By default right shift mantissa to be an integer denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q); // overflow: when converting to unsigned the range is larger by one - if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin + if ((input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) // Exponent larger than max int range, + && !(!op_mod_q2 // unless cast to signed int + && input_sign_q // and input value is larges negative int value + && (input_exp_q == signed'(fpnew_pkg::int_width(int_fmt_q2) - 1)) + && (input_mant_q == {1'b1, {INT_MAN_WIDTH-1{1'b0}}}))) begin denorm_shamt = '0; // prevent shifting of_before_round = 1'b1; // underflow diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv index a8b004952..56a2f5d62 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv @@ -207,7 +207,7 @@ module fpnew_divsqrt_multi #( // Valid synch with other lanes // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni) // Tell the other units that this unit has finished now or in the past assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv index 051e6a698..6fdd89056 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv @@ -613,7 +613,9 @@ module fpnew_fma #( ); // Classification after rounding - assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0 + assign uf_after_round = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_i == fpnew_pkg::RNE) || (rnd_mode_i == fpnew_pkg::RMM))))); assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones // ----------------- diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv index e691f6777..471d966f0 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv @@ -745,8 +745,10 @@ module fpnew_fma_multi #( if (FpFmtConfig[fmt]) begin : active_format always_comb begin : post_process - // detect of / uf - fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + // detect of / uf + fmt_uf_after_round[fmt] = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_i == fpnew_pkg::RNE) || (rnd_mode_i == fpnew_pkg::RMM))))); fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. // Assemble regular result, nan box short ones. diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v index 87139a253..d22e85ba9 100644 --- a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v @@ -222,7 +222,7 @@ end assign ex4_rst_norm[31:0] = {fdsu_ex4_result_sign, ex4_expnt_rst[7:0], ex4_frac_23[22:0]}; -assign ex4_cor_uf = (fdsu_ex4_uf && !ex4_denorm_potnt_norm || ex4_uf_plus) +assign ex4_cor_uf = (fdsu_ex4_uf || ex4_denorm_potnt_norm || ex4_uf_plus) && fdsu_ex4_nx; assign ex4_cor_nx = fdsu_ex4_nx || fdsu_ex4_of