diff --git a/rtl/cv32e40p_controller.sv b/rtl/cv32e40p_controller.sv index 56cce6281..0c8379182 100644 --- a/rtl/cv32e40p_controller.sv +++ b/rtl/cv32e40p_controller.sv @@ -28,10 +28,14 @@ // // //////////////////////////////////////////////////////////////////////////////// -module cv32e40p_controller import cv32e40p_pkg::*; +module cv32e40p_controller + import cv32e40p_pkg::*; + import cv32e40p_apu_core_pkg::*; + import cv32e40p_fpu_pkg::*; #( parameter COREV_CLUSTER = 0, - parameter COREV_PULP = 1 + parameter COREV_PULP = 1, + parameter FPU = 0 ) ( input logic clk, // Gated clock @@ -191,6 +195,17 @@ module cv32e40p_controller import cv32e40p_pkg::*; output logic jr_stall_o, output logic load_stall_o, + // apu-interconnect + // handshake signals + // request channel + input logic apu_req_i, + input logic apu_gnt_i, + input logic [APU_WOP_CPU-1:0] apu_op_i, + // response channel + input logic apu_rvalid_i, + input mul_opcode_e mult_operator_i, + output logic mulh_stall_o, + input logic id_ready_i, // ID stage is ready input logic id_valid_i, // ID stage is valid @@ -238,6 +253,7 @@ module cv32e40p_controller import cv32e40p_pkg::*; // qualify wfi vs nosleep locally logic wfi_active; + logic deassert_we_mulh; //////////////////////////////////////////////////////////////////////////////////////////// // ____ ___ ____ _____ ____ ___ _ _ _____ ____ ___ _ _ _____ ____ // @@ -1240,10 +1256,8 @@ module cv32e40p_controller import cv32e40p_pkg::*; endcase end - - generate - if(COREV_PULP) begin : gen_hwlp + if (COREV_PULP) begin : gen_hwlp ////////////////////////////////////////////////////////////////////////////// // Convert hwlp_jump_o to a pulse ////////////////////////////////////////////////////////////////////////////// @@ -1258,7 +1272,7 @@ generate assign hwlp_jump_o = (hwlp_end_4_id_d && !hwlp_end_4_id_q) ? 1'b1 : 1'b0; always_ff @(posedge clk or negedge rst_n) begin - if(!rst_n) begin + if (rst_n == 1'b0) begin hwlp_end_4_id_q <= 1'b0; end else begin hwlp_end_4_id_q <= hwlp_end_4_id_d; @@ -1297,6 +1311,55 @@ generate endgenerate +generate + if (FPU) begin : gen_mulh_stall + logic [1:0] fp_ongoing; + logic [1:0] fp_multicycle_pos; + + always_ff @(posedge clk or negedge rst_n) begin + if (rst_n == 1'b0) begin + fp_ongoing <= 2'b0; + fp_multicycle_pos <= 2'b0; + end else begin + if (halt_id_o) begin + fp_ongoing <= 2'b0; + fp_multicycle_pos <= 2'b0; + end else begin + + if (apu_req_i && apu_gnt_i && !apu_rvalid_i) begin + fp_ongoing <= fp_ongoing + 1; + end else if (apu_rvalid_i && (!apu_req_i || !apu_gnt_i)) begin + fp_ongoing <= fp_ongoing - 1; + end + + if (apu_req_i && apu_gnt_i && (apu_op_i == cv32e40p_fpu_pkg::DIV || apu_op_i == cv32e40p_fpu_pkg::SQRT)) begin + fp_multicycle_pos <= fp_ongoing + 1; + end else if (apu_rvalid_i && fp_multicycle_pos == fp_ongoing && fp_multicycle_pos != 2'b0) begin + fp_multicycle_pos <= fp_multicycle_pos - 1; + end + end + end + end + + always_comb + begin + mulh_stall_o = 1'b0; + deassert_we_mulh = 1'b0; + + if (mult_operator_i == MUL_H && + ((apu_req_i && apu_gnt_i && (apu_op_i == cv32e40p_fpu_pkg::DIV || apu_op_i == cv32e40p_fpu_pkg::SQRT)) || fp_multicycle_pos != 2'b0) + ) + begin + mulh_stall_o = 1'b1; + deassert_we_mulh = 1'b1; + end + end + end else begin : gen_no_mulh_stall + assign mulh_stall_o = 1'b0; + assign deassert_we_mulh = 1'b0; + end +endgenerate + ///////////////////////////////////////////////////////////// // ____ _ _ _ ____ _ _ // // / ___|| |_ __ _| | | / ___|___ _ __ | |_ _ __ ___ | | // @@ -1307,6 +1370,7 @@ endgenerate ///////////////////////////////////////////////////////////// always_comb begin + jr_stall_o = 1'b0; load_stall_o = 1'b0; deassert_we_o = 1'b0; @@ -1314,6 +1378,10 @@ endgenerate if (~is_decoding_o) deassert_we_o = 1'b1; + // deassert WE in case of of MULH in ID while long multicycle on-going FPU instruction + if (deassert_we_mulh) + deassert_we_o = 1'b1; + // deassert WE in case of illegal instruction if (illegal_insn_i) deassert_we_o = 1'b1; @@ -1327,8 +1395,8 @@ endgenerate (is_decoding_o && (regfile_we_id_i && !data_misaligned_i) && (regfile_waddr_ex_i == regfile_alu_waddr_id_i)) ) ) begin - deassert_we_o = 1'b1; load_stall_o = 1'b1; + deassert_we_o = 1'b1; end // Stall because of jr path @@ -1343,12 +1411,8 @@ endgenerate jr_stall_o = 1'b1; deassert_we_o = 1'b1; end - else - begin - jr_stall_o = 1'b0; - end - end + end // stall because of misaligned data access assign misaligned_stall_o = data_misaligned_i; @@ -1399,8 +1463,7 @@ endgenerate // update registers always_ff @(posedge clk , negedge rst_n) begin : UPDATE_REGS - if ( rst_n == 1'b0 ) - begin + if (rst_n == 1'b0) begin ctrl_fsm_cs <= RESET; jump_done_q <= 1'b0; data_err_q <= 1'b0; @@ -1410,9 +1473,7 @@ endgenerate debug_req_entry_q <= 1'b0; debug_force_wakeup_q <= 1'b0; - end - else - begin + end else begin ctrl_fsm_cs <= ctrl_fsm_ns; // clear when id is valid (no instruction incoming) @@ -1451,23 +1512,22 @@ endgenerate // sticky version of debug_req (must be on clk_ungated_i such that incoming pulse before core is enabled is not missed) always_ff @(posedge clk_ungated_i, negedge rst_n) - if ( !rst_n ) + if (rst_n == 1'b0) begin debug_req_q <= 1'b0; - else - if( debug_req_i ) + end else begin + if ( debug_req_i ) begin debug_req_q <= 1'b1; - else if( debug_mode_q ) + end else if ( debug_mode_q ) begin debug_req_q <= 1'b0; + end + end // Debug state FSM always_ff @(posedge clk , negedge rst_n) begin - if ( rst_n == 1'b0 ) - begin + if (rst_n == 1'b0) begin debug_fsm_cs <= HAVERESET; - end - else - begin + end else begin debug_fsm_cs <= debug_fsm_ns; end end diff --git a/rtl/cv32e40p_core.sv b/rtl/cv32e40p_core.sv index 528ac1c8f..717f5f141 100644 --- a/rtl/cv32e40p_core.sv +++ b/rtl/cv32e40p_core.sv @@ -630,6 +630,10 @@ module cv32e40p_core .apu_perf_dep_o (perf_apu_dep), .apu_busy_i (apu_busy), + .apu_req_i (apu_req_o), + .apu_gnt_i (apu_gnt_i), + .apu_rvalid_i(apu_rvalid_i), + // CSR ID/EX .csr_access_ex_o (csr_access_ex), .csr_op_ex_o (csr_op_ex), diff --git a/rtl/cv32e40p_id_stage.sv b/rtl/cv32e40p_id_stage.sv index 0f54e31eb..1f5b0a737 100644 --- a/rtl/cv32e40p_id_stage.sv +++ b/rtl/cv32e40p_id_stage.sv @@ -152,6 +152,10 @@ module cv32e40p_id_stage output logic apu_perf_dep_o, input logic apu_busy_i, + input logic apu_req_i, + input logic apu_gnt_i, + input logic apu_rvalid_i, + input logic fs_off_i, input logic [C_RM-1:0] frm_i, @@ -292,6 +296,7 @@ module cv32e40p_id_stage logic misaligned_stall; logic jr_stall; logic load_stall; + logic mulh_stall; logic csr_apu_stall; logic hwlp_mask; logic halt_id; @@ -1084,7 +1089,8 @@ module cv32e40p_id_stage cv32e40p_controller #( .COREV_CLUSTER(COREV_CLUSTER), - .COREV_PULP (COREV_PULP) + .COREV_PULP (COREV_PULP), + .FPU (FPU) ) controller_i ( .clk (clk), // Gated clock .clk_ungated_i(clk_ungated_i), // Ungated clock @@ -1241,6 +1247,13 @@ module cv32e40p_id_stage .jr_stall_o (jr_stall), .load_stall_o (load_stall), + .apu_req_i (apu_req_i), + .apu_gnt_i (apu_gnt_i), + .apu_op_i (apu_op_ex_o), + .apu_rvalid_i (apu_rvalid_i), + .mult_operator_i(mult_operator), + .mulh_stall_o (mulh_stall), + .id_ready_i(id_ready_o), .id_valid_i(id_valid_o), @@ -1642,7 +1655,7 @@ module cv32e40p_id_stage end // stall control - assign id_ready_o = ((~misaligned_stall) & (~jr_stall) & (~load_stall) & (~apu_stall) & (~csr_apu_stall) & ex_ready_i); + assign id_ready_o = ((~misaligned_stall) & (~jr_stall) & (~load_stall) & (~apu_stall) & (~csr_apu_stall) & (~mulh_stall) & ex_ready_i); assign id_valid_o = (~halt_id) & id_ready_o; assign halt_if_o = halt_if;