diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index 7bdb5245d..cd1cf6f75 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -108,6 +108,7 @@ module cv32e40p_rvfi input logic apu_multicycle_i, input logic wb_contention_lsu_i, input logic wb_contention_i, + input logic regfile_we_lsu_i, input logic branch_in_ex_i, input logic branch_decision_ex_i, @@ -135,6 +136,7 @@ module cv32e40p_rvfi input logic rf_we_wb_i, input logic [ 5:0] rf_addr_wb_i, input logic [31:0] rf_wdata_wb_i, + input logic regfile_alu_we_ex_i, // LSU input logic [31:0] lsu_rdata_wb_i, @@ -610,7 +612,7 @@ module cv32e40p_rvfi `include "insn_trace.sv" - insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; +insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; insn_trace_t tmp_trace_wb; insn_trace_t rvfi_trace_q[$], wb_bypass_trace_q[$]; @@ -641,6 +643,7 @@ module cv32e40p_rvfi end end end + trace_q_size = wb_bypass_trace_q.size(); //Re-calculate here for accurate status endfunction /* * Function used to alocate a new insn and send it to the rvfi driver @@ -659,25 +662,6 @@ module cv32e40p_rvfi empty_fifo(); endfunction - /* - * - */ - function void f_bypass_wb(insn_trace_t m_ex_insn); - insn_trace_t new_rvfi_trace; - new_rvfi_trace = new(); - new_rvfi_trace.copy_full(m_ex_insn); - if (m_ex_insn.m_ex_fw) begin - new_rvfi_trace.m_rd_addr = m_ex_insn.m_rd_addr; - new_rvfi_trace.m_rd_wdata = m_ex_insn.m_rd_wdata; - end else begin - new_rvfi_trace.m_rd_addr[0] = '0; - new_rvfi_trace.m_rd_wdata[0] = '0; - new_rvfi_trace.m_rd_addr[1] = '0; - new_rvfi_trace.m_rd_wdata[1] = '0; - end - wb_bypass_trace_q.push_back(new_rvfi_trace); - endfunction - /* * Assing rvfi signals once the instruction is completed */ @@ -690,7 +674,7 @@ module cv32e40p_rvfi logic [31:0] s_fflags_mirror; logic [31:0] s_frm_mirror; logic [31:0] s_fcsr_mirror; - logic [31:0] r_previous_minstret; + logic [31:0] s_mstatus_sd_fs_mirror; function void set_rvfi(); insn_trace_t new_rvfi_trace; @@ -712,11 +696,21 @@ module cv32e40p_rvfi end else begin s_fcsr_mirror = new_rvfi_trace.m_csr.fcsr_rdata; end + if (new_rvfi_trace.m_csr.mstatus_we) begin + s_mstatus_sd_fs_mirror = new_rvfi_trace.m_csr.mstatus_wdata & 32'h8000_6000; + end else begin + s_mstatus_sd_fs_mirror = new_rvfi_trace.m_csr.mstatus_rdata & 32'h8000_6000; + end end else begin new_rvfi_trace.m_csr.fflags_rdata = s_fflags_mirror; new_rvfi_trace.m_csr.frm_rdata = s_frm_mirror; new_rvfi_trace.m_csr.fcsr_rdata = s_fcsr_mirror; + if (s_mstatus_sd_fs_mirror != 32'h0) begin + new_rvfi_trace.m_csr.mstatus_wdata = new_rvfi_trace.m_csr.mstatus_wdata | s_mstatus_sd_fs_mirror; + new_rvfi_trace.m_csr.mstatus_wmask = 32'hFFFF_FFFF; + s_mstatus_sd_fs_mirror = 32'h0; // Reset mirror + end if (new_rvfi_trace.m_fflags_we_non_apu) begin s_fflags_mirror = new_rvfi_trace.m_csr.fflags_wdata; s_fcsr_mirror = new_rvfi_trace.m_csr.fcsr_wdata; @@ -729,6 +723,14 @@ module cv32e40p_rvfi new_rvfi_trace.m_csr.frm_wmask = 32'hFFFF_FFFF; new_rvfi_trace.m_csr.fcsr_wmask = 32'hFFFF_FFFF; end + if (new_rvfi_trace.m_fcsr_we_non_apu) begin + s_fcsr_mirror = new_rvfi_trace.m_csr.fcsr_wdata; + s_fflags_mirror = new_rvfi_trace.m_csr.fflags_wdata; + s_frm_mirror = new_rvfi_trace.m_csr.frm_wdata; + new_rvfi_trace.m_csr.fcsr_wmask = 32'hFFFF_FFFF; + new_rvfi_trace.m_csr.fflags_wmask = 32'hFFFF_FFFF; + new_rvfi_trace.m_csr.frm_wmask = 32'hFFFF_FFFF; + end end @@ -742,20 +744,9 @@ module cv32e40p_rvfi // new_rvfi_trace.m_csr.mepc_wdata = 32'h0000_554E; // end - rvfi_order = new_rvfi_trace.m_order; - rvfi_pc_rdata = new_rvfi_trace.m_pc_rdata; - rvfi_insn = new_rvfi_trace.m_insn; - - //Trying something here - //Flag as trap everytime minstret is not incremented - - if (new_rvfi_trace.m_instret_cnt == r_previous_minstret) begin - // new_rvfi_trace.m_trap = 1'b0; - new_rvfi_trace.m_trap = 1'b1; - end else begin - r_previous_minstret = new_rvfi_trace.m_instret_cnt; - new_rvfi_trace.m_trap = 1'b0; - end + rvfi_order = new_rvfi_trace.m_order; + rvfi_pc_rdata = new_rvfi_trace.m_pc_rdata; + rvfi_insn = new_rvfi_trace.m_insn; rvfi_rs1_addr = '0; rvfi_rs1_rdata = '0; @@ -854,17 +845,22 @@ module cv32e40p_rvfi `SET_RVFI_CSR_FROM_INSN(minstret) `SET_RVFI_CSR_FROM_INSN(mip) - rvfi_csr_tdata_rdata[0] = 'Z; - rvfi_csr_tdata_rmask[0] = '0; // Does not exist - rvfi_csr_tdata_wdata[0] = 'Z; // Does not exist - rvfi_csr_tdata_wmask[0] = '0; + rvfi_csr_tdata_rdata[0] = 'Z; + rvfi_csr_tdata_rmask[0] = '0; // Does not exist + rvfi_csr_tdata_wdata[0] = 'Z; // Does not exist + rvfi_csr_tdata_wmask[0] = '0; + + rvfi_csr_tdata_rdata[1] = new_rvfi_trace.m_csr.tdata1_rdata; + rvfi_csr_tdata_rmask[1] = new_rvfi_trace.m_csr.tdata1_rmask; //'1 + rvfi_csr_tdata_wdata[1] = new_rvfi_trace.m_csr.tdata1_wdata; + rvfi_csr_tdata_wmask[1] = new_rvfi_trace.m_csr.tdata1_wmask; - rvfi_csr_tdata_rdata[1] = new_rvfi_trace.m_csr.tdata1_rdata; - rvfi_csr_tdata_rmask[1] = new_rvfi_trace.m_csr.tdata1_rmask; //'1 - rvfi_csr_tdata_wdata[1] = new_rvfi_trace.m_csr.tdata1_wdata; - rvfi_csr_tdata_wmask[1] = new_rvfi_trace.m_csr.tdata1_wmask; + rvfi_csr_tdata_rdata[2] = new_rvfi_trace.m_csr.tdata2_rdata; + rvfi_csr_tdata_rmask[2] = new_rvfi_trace.m_csr.tdata2_rmask; //'1 + rvfi_csr_tdata_wdata[2] = new_rvfi_trace.m_csr.tdata2_wdata; + rvfi_csr_tdata_wmask[2] = new_rvfi_trace.m_csr.tdata2_wmask; - rvfi_csr_tdata_rmask[3:2] = '1; + rvfi_csr_tdata_rmask[3] = '1; `SET_RVFI_CSR_FROM_INSN(tinfo) `SET_RVFI_CSR_FROM_INSN(dcsr) @@ -892,16 +888,14 @@ module cv32e40p_rvfi `SET_RVFI_CSR_FROM_INSN(lpend1) `SET_RVFI_CSR_FROM_INSN(lpcount1) - endfunction // set_rvfi + endfunction // set_rvfi - int r_instret_cnt; function void minstret_to_id(); trace_id.m_csr.minstret_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]; trace_id.m_csr.minstret_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2]; trace_id.m_csr.minstret_rmask = '1; trace_id.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q; trace_id.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; - trace_id.m_instret_cnt = r_instret_cnt; endfunction function void minstret_to_ex(); @@ -910,7 +904,6 @@ module cv32e40p_rvfi trace_ex.m_csr.minstret_rmask = '1; trace_ex.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q; trace_ex.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; - trace_ex.m_instret_cnt = r_instret_cnt; endfunction function void tinfo_to_id(); @@ -1052,7 +1045,6 @@ module cv32e40p_rvfi if (r_pipe_freeze_trace.exc_pc_mux == EXC_PC_IRQ) begin s_irq = 1'b1; trace_if.m_is_irq = 1'b1; - trace_if.m_trap = 1'b1; end end @@ -1060,30 +1052,6 @@ module cv32e40p_rvfi s_dbg_exception = 1'b0; end - if (r_pipe_freeze_trace.pc_id == trace_if.m_pc_rdata) begin - if (trace_if.m_valid && (s_dbg_exception || s_exception)) begin - trace_if.m_trap = 1'b1; - end - end - - if (r_pipe_freeze_trace.pc_id == trace_ex.m_pc_rdata) begin - if (trace_ex.m_valid && (s_dbg_exception || s_exception)) begin - trace_ex.m_trap = 1'b1; - end - end - - if (r_pipe_freeze_trace.pc_id == trace_wb.m_pc_rdata) begin - if (trace_wb.m_valid && (s_dbg_exception || s_exception)) begin - trace_wb.m_trap = 1'b1; - end - end - - if (r_pipe_freeze_trace.pc_id == trace_id.m_pc_rdata) begin - if (trace_id.m_valid && (s_dbg_exception || s_exception)) begin - trace_id.m_trap = 1'b1; - end - end - endfunction /* * This tracer works with three process, @@ -1094,24 +1062,36 @@ module cv32e40p_rvfi * * The third updates the rvfi interface */ - `define CSR_FROM_PIPE(TRACE_NAME, - CSR_NAME) \ - trace_``TRACE_NAME``.m_csr.``CSR_NAME``_we = r_pipe_freeze_trace.csr.``CSR_NAME``_we; \ + `define CSR_FROM_PIPE(TRACE_NAME, CSR_NAME) \ + if (r_pipe_freeze_trace.csr.``CSR_NAME``_we || r_pipe_freeze_trace.csr.we) begin \ + trace_``TRACE_NAME``.m_csr.``CSR_NAME``_we = r_pipe_freeze_trace.csr.``CSR_NAME``_we; \ + trace_``TRACE_NAME``.m_csr.``CSR_NAME``_wdata = r_pipe_freeze_trace.csr.``CSR_NAME``_n; \ + trace_``TRACE_NAME``.m_csr.``CSR_NAME``_wmask = '1; \ + end \ trace_``TRACE_NAME``.m_csr.``CSR_NAME``_rdata = r_pipe_freeze_trace.csr.``CSR_NAME``_q; \ - trace_``TRACE_NAME``.m_csr.``CSR_NAME``_rmask = '1; \ - trace_``TRACE_NAME``.m_csr.``CSR_NAME``_wdata = r_pipe_freeze_trace.csr.``CSR_NAME``_n; \ - trace_``TRACE_NAME``.m_csr.``CSR_NAME``_wmask = r_pipe_freeze_trace.csr.``CSR_NAME``_we ? '1 : '0; + trace_``TRACE_NAME``.m_csr.``CSR_NAME``_rmask = '1; //those event are for debug purpose event e_dev_send_wb_1, e_dev_send_wb_2; - event e_dev_commit_rf_to_ex_1, e_dev_commit_rf_to_ex_2, e_dev_commit_rf_to_ex_3; + event + e_dev_commit_rf_to_ex_1, + e_dev_commit_rf_to_ex_2, + e_dev_commit_rf_to_ex_3, + e_dev_commit_rf_to_ex_4, + e_dev_commit_rf_to_ex_5; event e_if_2_id_1, e_if_2_id_2; event e_ex_to_wb_1, e_ex_to_wb_2; event e_id_to_ex_1, e_id_to_ex_2; event e_commit_dpc; event e_send_rvfi_trace_apu_resp; - event e_send_rvfi_trace_ex_1, e_send_rvfi_trace_ex_2, e_send_rvfi_trace_ex_3, e_send_rvfi_trace_ex_4; + event + e_send_rvfi_trace_ex_1, + e_send_rvfi_trace_ex_2, + e_send_rvfi_trace_ex_3, + e_send_rvfi_trace_ex_4, + e_send_rvfi_trace_ex_5, + e_send_rvfi_trace_ex_6; event e_send_rvfi_trace_wb_1, e_send_rvfi_trace_wb_2, e_send_rvfi_trace_wb_3; event e_send_rvfi_trace_id_1; @@ -1136,12 +1116,30 @@ module cv32e40p_rvfi function void csr_to_apu_req(); `CSR_FROM_PIPE(apu_req, misa) `CSR_FROM_PIPE(apu_req, tdata1) - trace_apu_req.m_csr.tinfo_we = '0; // READ ONLY csr_tinfo_we_i; + `CSR_FROM_PIPE(apu_req, tdata2) + trace_apu_req.m_csr.tinfo_we = '0; // READ ONLY csr_tinfo_we_i; trace_apu_req.m_csr.tinfo_rdata = r_pipe_freeze_trace.csr.tinfo_q; trace_apu_req.m_csr.tinfo_rmask = '1; trace_apu_req.m_csr.tinfo_wdata = r_pipe_freeze_trace.csr.tinfo_n; trace_apu_req.m_csr.tinfo_wmask = '0; + trace_apu_req.m_csr.minstret_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]; + trace_apu_req.m_csr.minstret_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2]; + trace_apu_req.m_csr.minstret_rmask = '1; + trace_apu_req.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q; + trace_apu_req.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; + + trace_apu_req.m_csr.lpcount0_we = '0; + trace_apu_req.m_csr.lpcount0_rdata = r_pipe_freeze_trace.hwloop.counter_q[0]; + trace_apu_req.m_csr.lpcount0_rmask = '1; + trace_apu_req.m_csr.lpcount0_wdata = '0; + trace_apu_req.m_csr.lpcount0_wmask = '0; + + trace_apu_req.m_csr.lpcount1_we = '0; + trace_apu_req.m_csr.lpcount1_rdata = r_pipe_freeze_trace.hwloop.counter_q[1]; + trace_apu_req.m_csr.lpcount1_rmask = '1; + trace_apu_req.m_csr.lpcount1_wdata = '0; + trace_apu_req.m_csr.lpcount1_wmask = '0; `CSR_FROM_PIPE(apu_req, frm) @@ -1182,75 +1180,76 @@ module cv32e40p_rvfi end endfunction + /* + * Decoding is complete and instruction enters execute stage + * If at that time, minstret is not asserted it means we have a trap + */ + bit s_is_pc_set; //If pc_set, wait until next trace_id to commit csr changes + bit s_is_irq_start; + bit s_id_done; + function void if_to_id(); + trace_id.init(trace_if); + trace_id.m_trap = ~r_pipe_freeze_trace.minstret; + trace_id.m_is_illegal = r_pipe_freeze_trace.is_illegal; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; + trace_if.m_valid = 1'b0; + s_id_done = 1'b0; + `CSR_FROM_PIPE(id, dpc) + endfunction + task compute_pipeline(); bit s_new_valid_insn; bit s_ex_valid_adjusted; bit s_wb_valid_adjusted; - bit s_id_done; - bit s_apu_wb_ok; bit s_apu_0_cycle_reps; bit s_fflags_we_non_apu; bit s_frm_we_non_apu; - bit s_is_pc_set; //If pc_set, wait until next trace_id to commit csr changes - bit s_is_irq_start; + bit s_fcsr_we_non_apu; bit s_skip_wb; // used to skip wb monitoring when apu resp and not lsu - bit s_increase_instret_1; - bit s_increase_instret_2; - - bit s_test_for_dret; - - trace_if = new(); - trace_id = new(); - trace_ex = new(); - trace_wb = new(); - s_new_valid_insn = 1'b0; - s_ex_valid_adjusted = 1'b0; - s_id_done = 1'b0; - s_apu_wb_ok = 1'b0; - s_apu_0_cycle_reps = 1'b0; + bit s_core_is_decoding; // For readability, ctrl_fsm is DECODE or DECODE_HWLOOP - next_send = 1; - cnt_data_req = 0; - cnt_data_resp = 0; - cnt_apu_req = 0; - cnt_apu_resp = 0; - csr_is_irq = '0; - is_dbg_taken = '0; - s_was_flush = 1'b0; + trace_if = new(); + trace_id = new(); + trace_ex = new(); + trace_wb = new(); + s_new_valid_insn = 1'b0; + s_ex_valid_adjusted = 1'b0; - r_previous_minstret = -1; + s_id_done = 1'b0; + s_apu_wb_ok = 1'b0; + s_apu_0_cycle_reps = 1'b0; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; + next_send = 1; + cnt_data_req = 0; + cnt_data_resp = 0; + cnt_apu_req = 0; + cnt_apu_resp = 0; + csr_is_irq = '0; + is_dbg_taken = '0; + s_was_flush = 1'b0; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; - s_skip_wb = 1'b0; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; - r_instret_cnt = 0; - s_increase_instret_1 = 1'b0; - s_increase_instret_2 = 1'b0; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; + s_skip_wb = 1'b0; - s_test_for_dret = 1'b0; + s_core_is_decoding = 1'b0; - $display("*****Starting pipeline computing*****\n"); forever begin - wait(e_pipe_monitor_ok.triggered); // event triggered + wait(e_pipe_monitor_ok.triggered); // event triggered #1; + s_core_is_decoding = (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE) || (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE_HWLOOP); check_trap(); - if (s_increase_instret_2) begin - r_instret_cnt = r_instret_cnt + 1; - end - s_increase_instret_2 = s_increase_instret_1; - s_increase_instret_1 = r_pipe_freeze_trace.minstret; - pc_mux_interrupt = 1'b0; if (r_pipe_freeze_trace.pc_mux == 4'b0100) begin if (r_pipe_freeze_trace.exc_pc_mux == 3'b001) begin @@ -1275,6 +1274,7 @@ module cv32e40p_rvfi minstret_to_id(); `CSR_FROM_PIPE(id, misa) `CSR_FROM_PIPE(id, tdata1) + `CSR_FROM_PIPE(id, tdata2) tinfo_to_id(); `CSR_FROM_PIPE(id, mip) send_rvfi(trace_id); @@ -1324,7 +1324,7 @@ module cv32e40p_rvfi s_new_valid_insn = r_pipe_freeze_trace.id_valid && r_pipe_freeze_trace.is_decoding;// && !r_pipe_freeze_trace.apu_rvalid; - s_wb_valid_adjusted = r_pipe_freeze_trace.wb_valid && ((r_pipe_freeze_trace.ctrl_fsm_cs == DECODE) || (r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_EX) || (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE_HWLOOP));// && !r_pipe_freeze_trace.apu_rvalid;; + s_wb_valid_adjusted = r_pipe_freeze_trace.wb_valid && (s_core_is_decoding || (r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_EX));// && !r_pipe_freeze_trace.apu_rvalid;; s_fflags_we_non_apu = 1'b0; if (r_pipe_freeze_trace.csr.fflags_we) begin @@ -1340,6 +1340,13 @@ module cv32e40p_rvfi end end + s_fcsr_we_non_apu = 1'b0; + if (r_pipe_freeze_trace.csr.fcsr_we) begin + if (cnt_apu_resp == cnt_apu_req) begin //No ongoing apu instruction + s_fcsr_we_non_apu = 1'b1; + end + end + //WB_STAGE s_skip_wb = 1'b0; if (r_pipe_freeze_trace.apu_rvalid && (apu_trace_q.size() > 0)) begin @@ -1353,7 +1360,7 @@ module cv32e40p_rvfi if((trace_wb.m_rd_addr[0] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[0])) begin trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; trace_wb.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - end else if (trace_wb.m_2_rd_insn && (trace_wb.m_rd_addr[1] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[0])) begin + end else if (trace_wb.m_2_rd_insn && (trace_wb.m_rd_addr[1] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[1])) begin trace_wb.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; trace_wb.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; end @@ -1361,8 +1368,7 @@ module cv32e40p_rvfi if (!trace_wb.m_data_missaligned) begin send_rvfi(trace_wb); - ->e_dev_send_wb_1; - ->e_send_rvfi_trace_wb_2; + ->e_dev_send_wb_1; ->e_send_rvfi_trace_wb_2; trace_wb.m_valid = 1'b0; end else begin if (s_wb_valid_adjusted) begin @@ -1375,27 +1381,32 @@ module cv32e40p_rvfi trace_wb.m_got_first_data = 1'b1; end else begin send_rvfi(trace_wb); - ->e_dev_send_wb_2; - ->e_send_rvfi_trace_wb_3; + ->e_dev_send_wb_2; ->e_send_rvfi_trace_wb_3; trace_wb.m_valid = 1'b0; end - end // rf_we_wb + end // rf_we_wb end end end if (trace_ex.m_valid) begin + if (!trace_ex.m_csr.got_minstret) begin + minstret_to_ex(); + end `CSR_FROM_PIPE(ex, misa) `CSR_FROM_PIPE(ex, tdata1) + `CSR_FROM_PIPE(ex, tdata2) tinfo_to_ex(); - if(r_pipe_freeze_trace.rf_we_wb) begin - if(cnt_data_resp == trace_ex.m_mem_req_id[0]) begin - trace_ex.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; + if (r_pipe_freeze_trace.regfile_we_lsu) begin + ->e_dev_commit_rf_to_ex_4; + if ((cnt_data_resp == trace_ex.m_mem_req_id[0]) && !(trace_ex.m_got_ex_reg)) begin + trace_ex.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; trace_ex.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; + trace_ex.m_got_first_data = 1'b1; end else if (cnt_data_resp == trace_ex.m_mem_req_id[1]) begin - trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; + trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; trace_ex.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; trace_ex.m_got_first_data = 1'b1; end @@ -1407,12 +1418,13 @@ module cv32e40p_rvfi trace_ex.m_valid = 1'b0; ->e_send_rvfi_trace_ex_2; end else begin - if (r_pipe_freeze_trace.rf_we_wb) begin + if (r_pipe_freeze_trace.rf_we_wb && !s_apu_to_lsu_port) begin ->e_dev_commit_rf_to_ex_1; if (trace_ex.m_got_ex_reg) begin - trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; + trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; trace_ex.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_2_rd_insn = 1'b1; + trace_ex.m_2_rd_insn = 1'b1; + trace_ex.m_got_first_data = 1'b1; end else begin trace_ex.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; trace_ex.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; @@ -1423,16 +1435,25 @@ module cv32e40p_rvfi if (!s_ex_valid_adjusted & !trace_ex.m_csr.got_minstret) begin minstret_to_ex(); end - ->e_ex_to_wb_1; - trace_wb.move_down_pipe(trace_ex); + if (trace_ex.m_is_load) begin // only move relevant instr in wb stage + ->e_ex_to_wb_1; + trace_wb.move_down_pipe(trace_ex); + end else begin + if (!trace_ex.m_csr.got_minstret) begin + minstret_to_ex(); + end + send_rvfi(trace_ex); + ->e_send_rvfi_trace_ex_6; + end trace_ex.m_valid = 1'b0; end - end else if (r_pipe_freeze_trace.rf_we_wb) begin + end else if (r_pipe_freeze_trace.rf_we_wb && !s_apu_to_lsu_port && !s_was_flush) begin ->e_dev_commit_rf_to_ex_2; if (trace_ex.m_got_ex_reg) begin - trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; + trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; trace_ex.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_2_rd_insn = 1'b1; + trace_ex.m_2_rd_insn = 1'b1; + trace_ex.m_got_first_data = 1'b1; end else begin trace_ex.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; trace_ex.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; @@ -1441,13 +1462,13 @@ module cv32e40p_rvfi end end - s_ex_valid_adjusted = (r_pipe_freeze_trace.ex_valid || s_test_for_dret) && ((r_pipe_freeze_trace.ctrl_fsm_cs == DECODE) || (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE_HWLOOP)) && (!r_pipe_freeze_trace.apu_rvalid || r_pipe_freeze_trace.data_req_ex); - s_test_for_dret = r_pipe_freeze_trace.ex_valid && r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF; + s_ex_valid_adjusted = (r_pipe_freeze_trace.ex_valid && r_pipe_freeze_trace.ex_ready) && (s_core_is_decoding || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF)) && (!r_pipe_freeze_trace.apu_rvalid || r_pipe_freeze_trace.data_req_ex); //EX_STAGE if (trace_id.m_valid) begin mtvec_to_id(); `CSR_FROM_PIPE(id, mip) + `CSR_FROM_PIPE(id, misa) if (!csr_is_irq && !s_is_irq_start) begin mstatus_to_id(); @@ -1466,7 +1487,7 @@ module cv32e40p_rvfi `CSR_FROM_PIPE(id, fcsr) if (r_pipe_freeze_trace.csr.we) begin - `CSR_FROM_PIPE(id, dpc) + `CSR_FROM_PIPE(id, dpc) end if (s_fflags_we_non_apu) begin trace_id.m_fflags_we_non_apu = 1'b1; @@ -1475,21 +1496,21 @@ module cv32e40p_rvfi if (s_frm_we_non_apu) begin trace_id.m_frm_we_non_apu = 1'b1; end + + if (s_fcsr_we_non_apu) begin + trace_id.m_fcsr_we_non_apu = 1'b1; + end + trace_ex.m_csr.fflags_wmask = '0; trace_ex.m_csr.frm_wmask = '0; trace_ex.m_csr.fcsr_wmask = '0; - if (r_pipe_freeze_trace.apu_req) begin + if (r_pipe_freeze_trace.apu_req && r_pipe_freeze_trace.apu_gnt) begin trace_id.m_is_apu = 1'b1; trace_id.m_apu_req_id = cnt_apu_req; trace_apu_req = new(); trace_apu_req.copy_full(trace_id); csr_to_apu_req(); - if(s_increase_instret_2) begin - trace_apu_req.m_instret_cnt = r_instret_cnt + 1; - end else begin - trace_apu_req.m_instret_cnt = r_instret_cnt; - end trace_apu_req.set_to_apu(); apu_trace_q.push_back(trace_apu_req); trace_id.m_valid = 1'b0; @@ -1505,7 +1526,7 @@ module cv32e40p_rvfi trace_ex.m_valid = 1'b0; ->e_send_rvfi_trace_ex_3; end - if (r_pipe_freeze_trace.ex_reg_we && !r_pipe_freeze_trace.apu_rvalid) begin + if (r_pipe_freeze_trace.ex_reg_we && !s_apu_to_alu_port) begin trace_id.m_ex_fw = 1'b1; trace_id.m_rd_addr[0] = r_pipe_freeze_trace.ex_reg_addr; trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.ex_reg_wdata; @@ -1532,33 +1553,49 @@ module cv32e40p_rvfi trace_id.m_mem_req_id[0] = cnt_data_req; end end + if (trace_id.m_got_ex_reg) begin // Shift index 0 to 1 + trace_id.m_mem_req_id[1] = trace_id.m_mem_req_id[0]; + trace_id.m_mem_req_id[0] = 0; + end end - ->e_id_to_ex_1; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); // The instruction moves forward from ID to EX trace_id.m_valid = 1'b0; ->e_id_to_ex_1; - end else if (r_pipe_freeze_trace.ex_reg_we) begin + end else if (r_pipe_freeze_trace.ex_reg_we && r_pipe_freeze_trace.rf_alu_we_ex) begin trace_id.m_ex_fw = 1'b1; trace_id.m_rd_addr[0] = r_pipe_freeze_trace.ex_reg_addr; trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.ex_reg_wdata; trace_id.m_got_ex_reg = 1'b1; trace_id.m_got_regs_write = 1'b1; + // mem_req_id[0] already set here indicates req_id was set before rf write from EX + // Hence adjust the req_id again here for such cases + if (trace_id.m_mem_req_id[0] != 0) begin + trace_id.m_mem_req_id[1] = trace_id.m_mem_req_id[0]; + trace_id.m_mem_req_id[0] = 0; + end end - end + end //trace_if.m_valid //ID_STAGE if (s_new_valid_insn) begin // There is a new valid instruction if (trace_id.m_valid) begin if (trace_ex.m_valid) begin - minstret_to_ex(); + if (!trace_ex.m_csr.got_minstret) begin + minstret_to_ex(); + end if (trace_wb.m_valid) begin send_rvfi(trace_ex); ->e_send_rvfi_trace_ex_4; end else begin - ->e_ex_to_wb_2; - trace_wb.move_down_pipe(trace_ex); + if (trace_ex.m_is_load) begin // only move relevant instr in wb stage + ->e_ex_to_wb_2; + trace_wb.move_down_pipe(trace_ex); + end else begin + send_rvfi(trace_ex); + ->e_send_rvfi_trace_ex_5; + end end trace_ex.m_valid = 1'b0; end @@ -1579,6 +1616,10 @@ module cv32e40p_rvfi trace_id.m_mem_req_id[0] = cnt_data_req; end end + if (trace_id.m_got_ex_reg) begin // Shift index 0 to 1 + trace_id.m_mem_req_id[1] = trace_id.m_mem_req_id[0]; + trace_id.m_mem_req_id[0] = 0; + end end else if (r_pipe_freeze_trace.rf_we_wb && !r_pipe_freeze_trace.ex_reg_we) begin trace_id.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; @@ -1589,21 +1630,12 @@ module cv32e40p_rvfi trace_id.m_valid = 1'b0; ->e_id_to_ex_2; end - trace_id.init(trace_if); - trace_id.m_is_ebreak = trace_if.m_is_ebreak; - trace_id.m_is_illegal = r_pipe_freeze_trace.is_illegal; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; - trace_if.m_valid = 1'b0; - s_id_done = 1'b0; - + if_to_id(); + trace_id.m_is_ebreak = trace_if.m_is_ebreak; csrId_to_id(); - `CSR_FROM_PIPE(id, dscratch0) `CSR_FROM_PIPE(id, dscratch1) mstatus_to_id(); - - `CSR_FROM_PIPE(id, dpc) ->e_if_2_id_1; end else begin if (trace_id.m_valid) begin @@ -1613,17 +1645,10 @@ module cv32e40p_rvfi end //IF_STAGE - if (r_pipe_freeze_trace.if_valid && r_pipe_freeze_trace.if_ready) begin if(trace_if.m_valid && r_pipe_freeze_trace.id_valid && r_pipe_freeze_trace.id_ready && !trace_id.m_valid && r_pipe_freeze_trace.ebrk_insn_dec) begin - trace_id.init(trace_if); - trace_id.m_is_ebreak = '1; //trace_if.m_is_ebreak; - trace_id.m_is_illegal = r_pipe_freeze_trace.is_illegal; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; - trace_if.m_valid = 1'b0; - s_id_done = 1'b0; - `CSR_FROM_PIPE(id, dpc) + if_to_id(); + trace_id.m_is_ebreak = '1; //trace_if.m_is_ebreak; ->e_if_2_id_2; end @@ -1652,7 +1677,6 @@ module cv32e40p_rvfi s_is_pc_set = 1'b1; end - csr_is_irq = r_pipe_freeze_trace.csr_cause[5]; is_dbg_taken = ((r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_ID) | (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF)) ? 1'b1 : 1'b0; saved_debug_cause = r_pipe_freeze_trace.debug_cause; diff --git a/bhv/cv32e40p_tb_wrapper.sv b/bhv/cv32e40p_tb_wrapper.sv index 14de3b607..be184867f 100644 --- a/bhv/cv32e40p_tb_wrapper.sv +++ b/bhv/cv32e40p_tb_wrapper.sv @@ -210,7 +210,7 @@ module cv32e40p_tb_wrapper .apu_en_i (cv32e40p_top_i.apu_req), .apu_singlecycle_i(cv32e40p_top_i.core_i.ex_stage_i.apu_singlecycle), .apu_multicycle_i (cv32e40p_top_i.core_i.ex_stage_i.apu_multicycle), - .apu_rvalid_i (cv32e40p_top_i.apu_rvalid) + .apu_rvalid_i (cv32e40p_top_i.core_i.ex_stage_i.apu_valid) ); `endif @@ -298,7 +298,7 @@ module cv32e40p_tb_wrapper .apu_multicycle_i (cv32e40p_top_i.core_i.ex_stage_i.apu_multicycle), .wb_contention_lsu_i(cv32e40p_top_i.core_i.ex_stage_i.wb_contention_lsu), .wb_contention_i (cv32e40p_top_i.core_i.ex_stage_i.wb_contention), - + .regfile_we_lsu_i (cv32e40p_top_i.core_i.ex_stage_i.regfile_we_lsu), // .rf_we_alu_i (cv32e40p_top_i.core_i.id_stage_i.regfile_alu_we_fw_i), // .rf_addr_alu_i (cv32e40p_top_i.core_i.id_stage_i.regfile_alu_waddr_fw_i), // .rf_wdata_alu_i (cv32e40p_top_i.core_i.id_stage_i.regfile_alu_wdata_fw_i), @@ -339,11 +339,12 @@ module cv32e40p_tb_wrapper .rf_we_wb_i(cv32e40p_top_i.core_i.id_stage_i.regfile_we_wb_i), .rf_addr_wb_i(cv32e40p_top_i.core_i.id_stage_i.regfile_waddr_wb_i), .rf_wdata_wb_i(cv32e40p_top_i.core_i.id_stage_i.regfile_wdata_wb_i), + .regfile_alu_we_ex_i(cv32e40p_top_i.core_i.id_stage_i.regfile_alu_we_ex_o), // APU .apu_req_i (cv32e40p_top_i.core_i.apu_req_o), .apu_gnt_i (cv32e40p_top_i.core_i.apu_gnt_i), - .apu_rvalid_i(cv32e40p_top_i.core_i.apu_rvalid_i), + .apu_rvalid_i(cv32e40p_top_i.core_i.ex_stage_i.apu_valid), // Controller FSM probes .ctrl_fsm_cs_i(cv32e40p_top_i.core_i.id_stage_i.controller_i.ctrl_fsm_cs), @@ -367,6 +368,10 @@ module cv32e40p_tb_wrapper .csr_tdata1_q_i (cv32e40p_top_i.core_i.cs_registers_i.tmatch_control_rdata),//gen_trigger_regs.tmatch_control_exec_q ), .csr_tdata1_we_i(cv32e40p_top_i.core_i.cs_registers_i.gen_trigger_regs.tmatch_control_we), + .csr_tdata2_n_i (cv32e40p_top_i.core_i.cs_registers_i.tmatch_value_rdata),//csr_wdata_int ), + .csr_tdata2_q_i (cv32e40p_top_i.core_i.cs_registers_i.tmatch_value_rdata),//gen_trigger_regs.tmatch_control_exec_q ), + .csr_tdata2_we_i(cv32e40p_top_i.core_i.cs_registers_i.gen_trigger_regs.tmatch_value_we), + .csr_tinfo_n_i({16'h0, cv32e40p_top_i.core_i.cs_registers_i.tinfo_types}), .csr_tinfo_q_i({16'h0, cv32e40p_top_i.core_i.cs_registers_i.tinfo_types}), diff --git a/bhv/insn_trace.sv b/bhv/insn_trace.sv index cb766fc7e..f73e98145 100644 --- a/bhv/insn_trace.sv +++ b/bhv/insn_trace.sv @@ -15,6 +15,10 @@ this.m_csr.``CSR_NAME``_wdata = m_source.m_csr.``CSR_NAME``_wdata; \ this.m_csr.``CSR_NAME``_wmask = m_source.m_csr.``CSR_NAME``_wmask; + `define INIT_CSR(CSR_NAME) \ + this.m_csr.``CSR_NAME``_we = '0; \ + this.m_csr.``CSR_NAME``_wmask = '0; + class insn_trace_t; bit m_valid; logic [63:0] m_order; @@ -38,6 +42,7 @@ logic m_fflags_we_non_apu; logic m_frm_we_non_apu; + logic m_fcsr_we_non_apu; logic [5:0] m_rs1_addr; logic [5:0] m_rs2_addr; logic [31:0] m_rs1_rdata; @@ -148,9 +153,40 @@ this.m_trap = 1'b0; this.m_fflags_we_non_apu = 1'b0; this.m_frm_we_non_apu = 1'b0; + this.m_fcsr_we_non_apu = 1'b0; this.m_instret_cnt = 0; endfunction + function void init_csr(); + `INIT_CSR(mstatus) + `INIT_CSR(misa) + `INIT_CSR(mie) + `INIT_CSR(mtvec) + `INIT_CSR(mcountinhibit) + `INIT_CSR(mscratch) + `INIT_CSR(mepc) + `INIT_CSR(mcause) + `INIT_CSR(minstret) + `INIT_CSR(mip) + `INIT_CSR(tdata1) + `INIT_CSR(tdata2) + `INIT_CSR(tinfo) + `INIT_CSR(dcsr) + `INIT_CSR(dpc) + `INIT_CSR(dscratch0) + `INIT_CSR(dscratch1) + `INIT_CSR(mvendorid) + `INIT_CSR(marchid) + `INIT_CSR(fflags) + `INIT_CSR(frm ) + `INIT_CSR(fcsr ) + `INIT_CSR(lpstart0 ) + `INIT_CSR(lpend0 ) + `INIT_CSR(lpcount0 ) + `INIT_CSR(lpstart1 ) + `INIT_CSR(lpend1 ) + `INIT_CSR(lpcount1 ) + endfunction /* * */ @@ -189,6 +225,7 @@ this.m_trap = 1'b0; this.m_fflags_we_non_apu = 1'b0; this.m_frm_we_non_apu = 1'b0; + this.m_fcsr_we_non_apu = 1'b0; this.m_csr.mcause_we = '0; if (is_compressed_id_i) begin this.m_insn[31:16] = '0; @@ -213,6 +250,8 @@ this.m_mem.wmask = '0; this.m_mem.rdata = '0; this.m_mem.wdata = '0; + + init_csr(); endfunction function logic [63:0] get_order_for_trap(); @@ -256,6 +295,7 @@ this.m_trap = m_source.m_trap; this.m_fflags_we_non_apu = m_source.m_fflags_we_non_apu; this.m_frm_we_non_apu = m_source.m_frm_we_non_apu ; + this.m_fcsr_we_non_apu = m_source.m_fcsr_we_non_apu; this.m_mem = m_source.m_mem; //CRS diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index b5d433df1..3635fd030 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -94,6 +94,7 @@ typedef struct { logic apu_multicycle; logic wb_contention_lsu; logic wb_contention; + logic regfile_we_lsu; logic branch_in_ex; logic branch_decision_ex; @@ -121,6 +122,7 @@ typedef struct { logic rf_we_wb; logic [5:0] rf_addr_wb; logic [31:0] rf_wdata_wb; + logic rf_alu_we_ex; // LSU logic [31:0] lsu_rdata_wb; @@ -436,6 +438,7 @@ task monitor_pipeline(); r_pipe_freeze_trace.apu_multicycle = apu_multicycle_i; r_pipe_freeze_trace.wb_contention_lsu = wb_contention_lsu_i; r_pipe_freeze_trace.wb_contention = wb_contention_i; + r_pipe_freeze_trace.regfile_we_lsu = regfile_we_lsu_i; r_pipe_freeze_trace.branch_in_ex = branch_in_ex_i; r_pipe_freeze_trace.branch_decision_ex = branch_decision_ex_i; @@ -463,6 +466,7 @@ task monitor_pipeline(); r_pipe_freeze_trace.rf_we_wb = rf_we_wb_i; r_pipe_freeze_trace.rf_addr_wb = rf_addr_wb_i; r_pipe_freeze_trace.rf_wdata_wb = rf_wdata_wb_i; + r_pipe_freeze_trace.rf_alu_we_ex = regfile_alu_we_ex_i; // LSU r_pipe_freeze_trace.lsu_rdata_wb = lsu_rdata_wb_i; @@ -676,6 +680,17 @@ task monitor_pipeline(); r_pipe_freeze_trace.hwloop.counter_n = hwlp_counter_n_i; compute_csr_we(); + + //If fcsr_we has triggered, then fflags_we and frm_we should also be triggered + if (r_pipe_freeze_trace.csr.fcsr_we) begin + r_pipe_freeze_trace.csr.fflags_we = 1'b1; + r_pipe_freeze_trace.csr.frm_we = 1'b1; + end else begin + if (r_pipe_freeze_trace.csr.fflags_we || r_pipe_freeze_trace.csr.frm_we) begin + r_pipe_freeze_trace.csr.fcsr_we = 1'b1; + end + end + if (csr_fcsr_fflags_we_i) begin r_pipe_freeze_trace.csr.fflags_we = 1'b1; r_pipe_freeze_trace.csr.fcsr_we = 1'b1; diff --git a/docs/source/corev_hw_loop.rst b/docs/source/corev_hw_loop.rst index 6168c713f..7b83ec09f 100644 --- a/docs/source/corev_hw_loop.rst +++ b/docs/source/corev_hw_loop.rst @@ -46,7 +46,8 @@ Following constraints must be respected by any toolchain compiler or by hand-wri In order to catch **as early as possible** those software exceptions when executing a program either on a verification Reference Model or on a virtual platform Instruction Set Simulator, ``those model/simulation platforms -must generate a fatal error`` with a meaningfull message related to Hardware Loops constraints violation. +should generate an error`` with a meaningfull message related to Hardware Loops constraints violation. +Those constraint checks could be done only for each instruction in the hardware loop body, meaning when (lpstartX <= PC <= lpendX - 4) and (lpcountX > 0). The HWLoop constraints are: @@ -63,6 +64,8 @@ The HWLoop constraints are: - When both loops are nested, the End address of the outermost HWLoop (must be #1) must be at least 2 instructions further than the End address of the innermost HWLoop (must be #0), i.e. HWLoop[1].endaddress >= HWLoop[0].endaddress + 8. + Remark: To avoid to add 2 NOPs in case nothing can be put there by the compiler, lpcount setting of the the inner loop could be moved after it + without forgetting to add the same in the preamble before the outer loop start address. - HWLoop must always be entered from its start location (no branch/jump to a location inside a HWLoop body). @@ -74,7 +77,7 @@ The HWLoop constraints are: - No memory ordering instructions (fence, fence.i) allowed in the HWLoop body. -- No privileged instructions (mret, dret, ecall, wfi) allowed in the HWLoop body, except for ebreak. +- No privileged instructions (mret, dret, wfi) allowed in the HWLoop body, except for ebreak and ecall. The rationale of NOT generating any hardware exception when violating any of those constraints is that it would add resources (32-bit adders and substractors needed for the third and fourth rules) which are costly in area and power consumption. @@ -111,17 +114,17 @@ Below an assembly code example of a nested HWLoop that computes a matrix additio ".balign 4;" "cv.endi 0, endZ;" "cv.starti 0, startZ;" + "cv.count 0, %[N];" "any instructions here" ".balign 4;" ".option norvc;" "startO:;" - " cv.count 0, %[N];" " startZ:;" " addi %[i], %[i], 1;" " addi %[i], %[i], 1;" " addi %[i], %[i], 1;" " endZ:;" - " addi %[j], %[j], 2;" + " cv.count 0, %[N];" " addi %[j], %[j], 2;" "endO:;" : [i] "+r" (i), [j] "+r" (j) @@ -132,6 +135,6 @@ Below an assembly code example of a nested HWLoop that computes a matrix additio At the beginning of the HWLoop, the registers %[i] and %[j] are 0. The innermost loop, from startZ to (endZ - 4), adds to %[i] three times 1 and it is executed 10x10 times. Whereas the outermost loop, from startO to (endO - 4), -executes 10 times the innermost loop and adds two times 2 to the register %[j]. -At the end of the loop, the register %[i] contains 300 and the register %[j] contains 40. +executes 10 times the innermost loop and adds 2 to the register %[j]. +At the end of the loop, the register %[i] contains 300 and the register %[j] contains 20. diff --git a/docs/source/instruction_set_extensions.rst b/docs/source/instruction_set_extensions.rst index 351720f22..5e4357ef7 100644 --- a/docs/source/instruction_set_extensions.rst +++ b/docs/source/instruction_set_extensions.rst @@ -672,19 +672,19 @@ Bit Manipulation Encoding +--------+----------------------+---------------+---------+------------+--------+------------+------------------------------------+ | 31: 30 | 29 : 25 | 24 : 20 | 19 : 15 | 14 : 12 | 11 : 7 | 6 : 0 | | +--------+----------------------+---------------+---------+------------+--------+------------+------------------------------------+ - | **f2** | **ls3[4:0]** | **ls2[4:0]** | **rs1** | **funct3** | **rD** | **opcode** | **Mnemonic** | + | **f2** | **Is3[4:0]** | **Is2[4:0]** | **rs1** | **funct3** | **rD** | **opcode** | **Mnemonic** | +========+======================+===============+=========+============+========+============+====================================+ - | 00 | Luimm5[4:0] | Iuimm5[4:0] | src | 000 | dest | 101 1011 | **cv.extract rD, rs1, Is3, Is2** | + | 00 | Luimm5[4:0] | Luimm5[4:0] | src | 000 | dest | 101 1011 | **cv.extract rD, rs1, Is3, Is2** | +--------+----------------------+---------------+---------+------------+--------+------------+------------------------------------+ - | 01 | Luimm5[4:0] | Iuimm5[4:0] | src | 000 | dest | 101 1011 | **cv.extractu rD, rs1, Is3, Is2** | + | 01 | Luimm5[4:0] | Luimm5[4:0] | src | 000 | dest | 101 1011 | **cv.extractu rD, rs1, Is3, Is2** | +--------+----------------------+---------------+---------+------------+--------+------------+------------------------------------+ - | 10 | Luimm5[4:0] | Iuimm5[4:0] | src | 000 | dest | 101 1011 | **cv.insert rD, rs1, Is3, Is2** | + | 10 | Luimm5[4:0] | Luimm5[4:0] | src | 000 | dest | 101 1011 | **cv.insert rD, rs1, Is3, Is2** | +--------+----------------------+---------------+---------+------------+--------+------------+------------------------------------+ - | 00 | Luimm5[4:0] | Iuimm5[4:0] | src | 001 | dest | 101 1011 | **cv.bclr rD, rs1, Is3, Is2** | + | 00 | Luimm5[4:0] | Luimm5[4:0] | src | 001 | dest | 101 1011 | **cv.bclr rD, rs1, Is3, Is2** | +--------+----------------------+---------------+---------+------------+--------+------------+------------------------------------+ - | 01 | Luimm5[4:0] | Iuimm5[4:0] | src | 001 | dest | 101 1011 | **cv.bset rD, rs1, Is3, Is2** | + | 01 | Luimm5[4:0] | Luimm5[4:0] | src | 001 | dest | 101 1011 | **cv.bset rD, rs1, Is3, Is2** | +--------+----------------------+---------------+---------+------------+--------+------------+------------------------------------+ - | 11 | 000, Luimm2[1:0] | Iuimm5[4:0] | src | 001 | dest | 101 1011 | **cv.bitrev rD, rs1, Is3, Is2** | + | 11 | 000, Luimm2[1:0] | Luimm5[4:0] | src | 001 | dest | 101 1011 | **cv.bitrev rD, rs1, Is3, Is2** | +--------+----------------------+---------------+---------+------------+--------+------------+------------------------------------+ .. table:: Register Bit Manipulation operations encoding @@ -771,7 +771,7 @@ General ALU operations | | | | | else rD = rs1 | | | | - | | Note: If ls2 is equal to 0, | + | | Note: If Is2 is equal to 0, | | | | | | -2^(Is2-1) is equivalent to -1 while (2^(Is2-1)-1) is equivalent to 0. | +-------------------------------------------+------------------------------------------------------------------------+ @@ -781,7 +781,7 @@ General ALU operations | | | | | else rD = rs1 | | | | - | | Note: If ls2 is equal to 0, (2^(Is2-1)-1) is equivalent to 0. | + | | Note: If Is2 is equal to 0, (2^(Is2-1)-1) is equivalent to 0. | +-------------------------------------------+------------------------------------------------------------------------+ | **cv.clipr rD, rs1, rs2** | if rs1 <= -(rs2+1), rD = -(rs2+1), | | | | @@ -926,9 +926,9 @@ General ALU Encoding +------------+---------------+---------+------------+--------+------------+-----------------------------+ | **funct7** | **Is2[4:0]** | **rs1** | **funct3** | **rD** | **opcode** | | +============+===============+=========+============+========+============+=============================+ - | 011 1000 | Iuimm5[4:0] | src1 | 011 | dest | 010 1011 | **cv.clip rD, rs1, Is2** | + | 011 1000 | Luimm5[4:0] | src1 | 011 | dest | 010 1011 | **cv.clip rD, rs1, Is2** | +------------+---------------+---------+------------+--------+------------+-----------------------------+ - | 011 1001 | Iuimm5[4:0] | src1 | 011 | dest | 010 1011 | **cv.clipu rD, rs1, Is2** | + | 011 1001 | Luimm5[4:0] | src1 | 011 | dest | 010 1011 | **cv.clipu rD, rs1, Is2** | +------------+---------------+---------+------------+--------+------------+-----------------------------+ | 011 1010 | src2 | src1 | 011 | dest | 010 1011 | **cv.clipr rD, rs1, rs2** | +------------+---------------+---------+------------+--------+------------+-----------------------------+ diff --git a/docs/source/verification.rst b/docs/source/verification.rst index afb4b3490..afc645886 100644 --- a/docs/source/verification.rst +++ b/docs/source/verification.rst @@ -35,10 +35,13 @@ Final functional, code and test coverage reports can be found `here `_. + +--------------------------------+-----------+---------------------------------------------------------------------------------------+ + | **Issue Type** | **Count** | **Note** | + +================================+===========+=======================================================================================+ + | Illegal instructions exception | 5 | F and XPULP instructions corner cases or CSR accesses not flagged as Illegal | + | | | instructions exception. | + +--------------------------------+-----------+---------------------------------------------------------------------------------------+ + | Multi-cycle F instructions | 8 | FDIV, FSQRT or respective F instructions (when FPU_ADDMUL_LAT or FPU_OTHERS_LAT = 2) | + | | | are executed in the background and the pipeline can continue to execute other | + | | | instructions as long as there is no Read-After-Write or Write-After-Write dependency. | + | | | When the multi-cycle F instructions are finally writing back their result in the | + | | | Register File, this register update can corrupt on-going instructions behaviour or | + | | | result. This is the case for Misaligned Loads, Post-Incremented Load/Stores, MULH, | + | | | JALR or cv.add*NR/cv.sub*NR. | + +--------------------------------+-----------+---------------------------------------------------------------------------------------+ + | F instructions result or flags | 5 | F result or flags computations is incorrect with respect to IEEE 754-2008 standard. | + +--------------------------------+-----------+---------------------------------------------------------------------------------------+ -.. TO DEVELOP ... +A classification of the simulation issues by method used to identify them is informative: + +.. table:: Breakdown of Issues found by Simulation in v2.0.0 + :name: Breakdown of Issues found by Simulation in v2.0.0 + :widths: 27 9 64 + :class: no-scrollbar-table + + +------------------------------+-----------+----------------------------------------------------------------------------------------+ + | **Simulation Method** | **Count** | **Note** | + +==============================+===========+========================================================================================+ + | Directed, self-checking test | | Many test supplied by Design team and a couple from the Open Source Community at large | + +------------------------------+-----------+----------------------------------------------------------------------------------------+ + | RVFI/RVVI | | Issues directly attributed to comparison against Reference Model | + +------------------------------+-----------+----------------------------------------------------------------------------------------+ + | Constrained-Random | | Test generated by corev-dv (extension of riscv-dv) | + +------------------------------+-----------+----------------------------------------------------------------------------------------+ + + +A classification of the Simulation issues themselves: + +.. table:: Simulation Issue Classification in v2.0.0 + :name: Simulation Issue Classification in v2.0.0 + :widths: 27 9 64 + :class: no-scrollbar-table + + +------------------------------+-----------+----------------------------------------------------------------------------------------+ + | **Issue Type** | **Count** | **Note** | + +==============================+===========+========================================================================================+ + | RTL Functional bug | | | + +------------------------------+-----------+----------------------------------------------------------------------------------------+ + | | | | + +------------------------------+-----------+----------------------------------------------------------------------------------------+ Formal verification ^^^^^^^^^^^^^^^^^^^ -To accelerate the verification of more than 300 Xpulp instructions, Formal Verification methodology has been used with Siemens EDA Onespin tools and their RISC-V ISA app. +To accelerate the verification of more than 300 Xpulp instructions, Formal Verification methodology has been used with Siemens EDA Onespin tools and its RISC-V ISA Processor Verification app. -The Xpulp instructions pseudo-code description using Sail language have been added to the RISC-V ISA app to successfully formally verify all the CV32E40P instructions, including the previously verified standard IMC together with the new F, Zfinx and Xpulp extensions. -This has been applied on 5 different core configurations (controlled via SystemVerilog parameters). +The Xpulp instructions pseudo-code description using Sail language have been added to the RISC-V ISA app to successfully formally verify all the CV32E40P instructions, including the previously verified standard IMC together with the new F, Zfinx and Xpulp extensions and all additional custom CSRs. -WIP... +Example: -.. ADD VERIFIED PARAMETERS VALUES TABLE +.. code-block:: text + + { + "name": "CV.SDOTUP.B", + "disassembly": "cv.sdotup.b {rd},{rs1},{rs2}", + "decoding": "1001100 rs2 rs1 001 rd/rs3 1111011", + "restrictions": "", + "execution": "X(rd) = X(rs3) + EXTZ(mul(X(rs1)[7..0],X(rs2)[7..0])) + + EXTZ(mul(X(rs1)[15..8],X(rs2)[15..8])) + + EXTZ(mul(X(rs1)[23..16],X(rs2)[23..16])) + + EXTZ(mul(X(rs1)[31..24],X(rs2)[31..24]))" + }, + +Those SAIL instructions description are then used to automatically generate more than 430 assertions and 29 CSRs descriptions. +Those assertions have been applied on the 9 different configurations listed in :ref:`Verified configurations` table. + +RTL code coverage is generated using Siemens EDA Onespin Quantify tool which uses RTL mutation to check assertions quality and can produce standard UCDB database that can be merged with simulation one afterwards. + +WIP... .. ADD PLANS AND REPORTS LINKS -.. `Verification Plan `_. -.. Final functional, code and test coverage reports can be found `here `_. +.. Formal Verification assertions and RTL code coverage reports can be found `here `_. + +.. TO DEVELOP ... + +Simulation verification +^^^^^^^^^^^^^^^^^^^^^^^ + +core-v-verif verification environment for v1.0.0 was using a *step&compare* methodology with an instruction set simulator (ISS) from Imperas Software as the reference model. +This strategy was successful, but inefficient because the *step&compare* logic in the testbench must compensate for the cycle-time effects of events that are asynchronous to the instruction stream such as interrupts, debug resets plus bus errors and random delays on instruction fetch and load/store memory buses. +For verification of v2.0.0 release of the CV32E40P core, the step-and-compare and the ISS have been replaced by a true reference model (RM) called ImperasDV. In addition, the Imperas Reference Model has been extended to support the v2 Xpulp instructions specification. + +Another innovation for v2.0.0 was the adoption of a standardized tracer interface to the DUT and RM, based on the open-source RISC-V Verification Interface (RVVI). The use of well documented, standardized interfaces greatly simplifies the integration of the DUT with the RM. + +Additionaly to V1 Verification plans, `Verification Plan `_ contains a `new section `_ related to F and XPULP verification. + +WIP... + +.. ADD REPORTS LINKS .. TO DEVELOP ... @@ -159,13 +284,9 @@ WIP... Tracer ------ -TODO: To re-work with ImperasDV tracer. - -.. ALL CHAPTER TO ADAPT WITH NEW IMPERASDV METHODOLOGY AND RVVI TRACER - -The module ``cv32e40p_tracer`` can be used to create a log of the executed instructions. +The module ``cv32e40p_rvfi_trace`` can be used to create a log of the executed instructions. It is a behavioral, non-synthesizable, module instantiated in the example testbench that is provided for -the ``cv32e40p_top``. It can be enabled during simulation by defining **CV32E40P_TRACE_EXECUTION**. +the ``cv32e40p_top``. It can be enabled during simulation by defining **CV32E40P_RVFI_TRACE_EXECUTION**. Output file ^^^^^^^^^^^ @@ -188,13 +309,15 @@ The trace output is in tab-separated columns. - Numeric register names are used (e.g. ``x1``). - Symbolic CSR names are used. - Jump/branch targets are given as absolute address if possible (PC + immediate). -6. **Register and memory contents**: For all accessed registers, the value before and after the instruction execution is given. Writes to registers are indicated as ``registername=value``, reads as ``registername:value``. For memory accesses, the address and the loaded and stored data are given. +6. **Register and memory contents**: For all accessed registers, the value before and after the instruction execution is given. Writes to registers are indicated as ``registername=value``, reads as ``registername:value``. For memory accesses, the physical address (PA), the loaded and stored data are given. + + .. code-block:: text Time Cycle PC Instr Decoded instruction Register and memory contents 130 61 00000150 4481 c.li x9,0 x9=0x00000000 132 62 00000152 00008437 lui x8,0x8 x8=0x00008000 - 134 63 00000156 fff40413 addi x8,x8,-1 x8:0x00008000 x8=0x00007fff - 136 64 0000015a 8c65 c.and x8,x9 x8:0x00007fff x9:0x00000000 x8=0x00000000 - 142 67 0000015c c622 c.swsp x8,12(x2) x2:0x00002000 x8:0x00000000 PA:0x0000200c + 134 63 00000156 fff40413 addi x8,x8,-1 x8=0x00007fff x8:0x00008000 + 136 64 0000015a 8c65 c.and x8,x9 x8=0x00000000 x8:0x00007fff x9:0x00000000 + 142 67 0000015c c622 c.swsp x8,12(x2) x2:0x00002000 x8:0x00000000 PA:0x0000200c diff --git a/rtl/cv32e40p_apu_disp.sv b/rtl/cv32e40p_apu_disp.sv index adc9a3485..94ca9bcbd 100644 --- a/rtl/cv32e40p_apu_disp.sv +++ b/rtl/cv32e40p_apu_disp.sv @@ -47,6 +47,7 @@ module cv32e40p_apu_disp ( input logic [2:0][5:0] read_regs_i, input logic [2:0] read_regs_valid_i, output logic read_dep_o, + output logic read_dep_for_jalr_o, input logic [1:0][5:0] write_regs_i, input logic [1:0] write_regs_valid_i, @@ -189,6 +190,10 @@ module cv32e40p_apu_disp ( assign read_dep_o = (read_dep_req | read_dep_inflight | read_dep_waiting) & is_decoding_i; assign write_dep_o = (write_dep_req | write_dep_inflight | write_dep_waiting) & is_decoding_i; + assign read_dep_for_jalr_o = is_decoding_i & ((|read_deps_req & enable_i) | + (|read_deps_inflight & valid_inflight) | + (|read_deps_waiting & valid_waiting)); + // // Stall signals // diff --git a/rtl/cv32e40p_controller.sv b/rtl/cv32e40p_controller.sv index 7cd3d41d7..d3941dd28 100644 --- a/rtl/cv32e40p_controller.sv +++ b/rtl/cv32e40p_controller.sv @@ -31,7 +31,8 @@ module cv32e40p_controller import cv32e40p_pkg::*; #( parameter COREV_CLUSTER = 0, - parameter COREV_PULP = 1 + parameter COREV_PULP = 0, + parameter FPU = 0 ) ( input logic clk, // Gated clock @@ -78,7 +79,6 @@ module cv32e40p_controller import cv32e40p_pkg::*; // HWLoop signls input logic [31:0] pc_id_i, - input logic is_compressed_i, // from hwloop_regs input logic [1:0] [31:0] hwlp_start_addr_i, @@ -105,6 +105,7 @@ module cv32e40p_controller import cv32e40p_pkg::*; // APU dependency checks input logic apu_en_i, input logic apu_read_dep_i, + input logic apu_read_dep_for_jalr_i, input logic apu_write_dep_i, output logic apu_stall_o, @@ -208,13 +209,13 @@ module cv32e40p_controller import cv32e40p_pkg::*; // Debug state debug_state_e debug_fsm_cs, debug_fsm_ns; - logic jump_done, jump_done_q, jump_in_dec, branch_in_id_dec, branch_in_id; + logic jump_done, jump_done_q, jump_in_dec, branch_in_id; logic data_err_q; logic debug_mode_q, debug_mode_n; logic ebrk_force_debug_mode; - logic is_hwlp_illegal, is_hwlp_body; + logic is_hwlp_body; logic illegal_insn_q, illegal_insn_n; logic debug_req_entry_q, debug_req_entry_n; logic debug_force_wakeup_q, debug_force_wakeup_n; @@ -292,7 +293,6 @@ module cv32e40p_controller import cv32e40p_pkg::*; jump_in_dec = ctrl_transfer_insn_in_dec_i == BRANCH_JALR || ctrl_transfer_insn_in_dec_i == BRANCH_JAL; branch_in_id = ctrl_transfer_insn_in_id_i == BRANCH_COND; - branch_in_id_dec = ctrl_transfer_insn_in_dec_i == BRANCH_COND; ebrk_force_debug_mode = (debug_ebreakm_i && current_priv_lvl_i == PRIV_LVL_M) || (debug_ebreaku_i && current_priv_lvl_i == PRIV_LVL_U); @@ -317,8 +317,6 @@ module cv32e40p_controller import cv32e40p_pkg::*; hwlp_mask_o = 1'b0; - is_hwlp_illegal = 1'b0; - hwlp_dec_cnt_o = '0; hwlp_end_4_id_d = 1'b0; @@ -527,9 +525,7 @@ module cv32e40p_controller import cv32e40p_pkg::*; else begin - is_hwlp_illegal = is_hwlp_body & (jump_in_dec || branch_in_id_dec || mret_insn_i || uret_insn_i || dret_insn_i || is_compressed_i || fencei_insn_i || wfi_active); - - if(illegal_insn_i || is_hwlp_illegal) begin + if (illegal_insn_i) begin halt_if_o = 1'b1; halt_id_o = 1'b0; @@ -752,9 +748,7 @@ module cv32e40p_controller import cv32e40p_pkg::*; else begin - is_hwlp_illegal = (jump_in_dec || branch_in_id_dec || mret_insn_i || uret_insn_i || dret_insn_i || is_compressed_i || fencei_insn_i || wfi_active); - - if(illegal_insn_i || is_hwlp_illegal) begin + if (illegal_insn_i) begin halt_if_o = 1'b1; halt_id_o = 1'b1; @@ -1352,7 +1346,10 @@ endgenerate if ((ctrl_transfer_insn_in_dec_i == BRANCH_JALR) && (((regfile_we_wb_i == 1'b1) && (reg_d_wb_is_reg_a_i == 1'b1)) || ((regfile_we_ex_i == 1'b1) && (reg_d_ex_is_reg_a_i == 1'b1)) || - ((regfile_alu_we_fw_i == 1'b1) && (reg_d_alu_is_reg_a_i == 1'b1))) ) + ((regfile_alu_we_fw_i == 1'b1) && (reg_d_alu_is_reg_a_i == 1'b1)) || + (FPU && (apu_read_dep_for_jalr_i == 1'b1)) + ) + ) begin jr_stall_o = 1'b1; deassert_we_o = 1'b1; @@ -1565,7 +1562,7 @@ endgenerate property p_no_hwlp; @(posedge clk) (1'b1) |-> ((pc_mux_o != PC_HWLOOP) && (ctrl_fsm_cs != DECODE_HWLOOP) && - (hwlp_mask_o == 1'b0) && (is_hwlp_illegal == 'b0) && (is_hwlp_body == 'b0) && + (hwlp_mask_o == 1'b0) && (is_hwlp_body == 'b0) && (hwlp_start_addr_i == 'b0) && (hwlp_end_addr_i == 'b0) && (hwlp_counter_i[1] == 32'b0) && (hwlp_counter_i[0] == 32'b0) && (hwlp_dec_cnt_o == 2'b0) && (hwlp_jump_o == 1'b0) && (hwlp_targ_addr_o == 32'b0) && (hwlp_end0_eq_pc == 1'b0) && (hwlp_end1_eq_pc == 1'b0) && (hwlp_counter0_gt_1 == 1'b0) && (hwlp_counter1_gt_1 == 1'b0) && diff --git a/rtl/cv32e40p_core.sv b/rtl/cv32e40p_core.sv index 528ac1c8f..899492da4 100644 --- a/rtl/cv32e40p_core.sv +++ b/rtl/cv32e40p_core.sv @@ -213,6 +213,7 @@ module cv32e40p_core logic [ 2:0][ 5:0] apu_read_regs; logic [ 2:0] apu_read_regs_valid; logic apu_read_dep; + logic apu_read_dep_for_jalr; logic [ 1:0][ 5:0] apu_write_regs; logic [ 1:0] apu_write_regs_valid; logic apu_write_dep; @@ -361,7 +362,6 @@ module cv32e40p_core // APU master signals assign apu_flags_o = apu_flags_ex; - assign fflags_csr = apu_flags_i; ////////////////////////////////////////////////////////////////////////////////////////////// // ____ _ _ __ __ _ // @@ -621,14 +621,15 @@ module cv32e40p_core .apu_flags_ex_o (apu_flags_ex), .apu_waddr_ex_o (apu_waddr_ex), - .apu_read_regs_o (apu_read_regs), - .apu_read_regs_valid_o (apu_read_regs_valid), - .apu_read_dep_i (apu_read_dep), - .apu_write_regs_o (apu_write_regs), - .apu_write_regs_valid_o(apu_write_regs_valid), - .apu_write_dep_i (apu_write_dep), - .apu_perf_dep_o (perf_apu_dep), - .apu_busy_i (apu_busy), + .apu_read_regs_o (apu_read_regs), + .apu_read_regs_valid_o (apu_read_regs_valid), + .apu_read_dep_i (apu_read_dep), + .apu_read_dep_for_jalr_i(apu_read_dep_for_jalr), + .apu_write_regs_o (apu_write_regs), + .apu_write_regs_valid_o (apu_write_regs_valid), + .apu_write_dep_i (apu_write_dep), + .apu_perf_dep_o (perf_apu_dep), + .apu_busy_i (apu_busy), // CSR ID/EX .csr_access_ex_o (csr_access_ex), @@ -779,8 +780,12 @@ module cv32e40p_core .mult_multicycle_o(mult_multicycle), // to ID/EX pipe registers + .data_misaligned_ex_i(data_misaligned_ex), // from ID/EX pipeline + .data_misaligned_i (data_misaligned), + // FPU .fpu_fflags_we_o(fflags_we), + .fpu_fflags_o (fflags_csr), // APU .apu_en_i (apu_en_ex), @@ -788,14 +793,14 @@ module cv32e40p_core .apu_lat_i (apu_lat_ex), .apu_operands_i(apu_operands_ex), .apu_waddr_i (apu_waddr_ex), - .apu_flags_i (apu_flags_ex), - .apu_read_regs_i (apu_read_regs), - .apu_read_regs_valid_i (apu_read_regs_valid), - .apu_read_dep_o (apu_read_dep), - .apu_write_regs_i (apu_write_regs), - .apu_write_regs_valid_i(apu_write_regs_valid), - .apu_write_dep_o (apu_write_dep), + .apu_read_regs_i (apu_read_regs), + .apu_read_regs_valid_i (apu_read_regs_valid), + .apu_read_dep_o (apu_read_dep), + .apu_read_dep_for_jalr_o(apu_read_dep_for_jalr), + .apu_write_regs_i (apu_write_regs), + .apu_write_regs_valid_i (apu_write_regs_valid), + .apu_write_dep_o (apu_write_dep), .apu_perf_type_o(perf_apu_type), .apu_perf_cont_o(perf_apu_cont), @@ -813,6 +818,7 @@ module cv32e40p_core // response channel .apu_rvalid_i (apu_rvalid_i), .apu_result_i (apu_result_i), + .apu_flags_i (apu_flags_i), .lsu_en_i (data_req_ex), .lsu_rdata_i(lsu_rdata), @@ -901,8 +907,6 @@ module cv32e40p_core .data_misaligned_ex_i(data_misaligned_ex), // from ID/EX pipeline .data_misaligned_o (data_misaligned), - .apu_busy_i(apu_busy), - .p_elw_start_o (p_elw_start), .p_elw_finish_o(p_elw_finish), diff --git a/rtl/cv32e40p_ex_stage.sv b/rtl/cv32e40p_ex_stage.sv index 6b58a8425..08392f29f 100644 --- a/rtl/cv32e40p_ex_stage.sv +++ b/rtl/cv32e40p_ex_stage.sv @@ -76,8 +76,12 @@ module cv32e40p_ex_stage output logic mult_multicycle_o, + input logic data_misaligned_ex_i, + input logic data_misaligned_i, + // FPU signals output logic fpu_fflags_we_o, + output logic [APU_NUSFLAGS_CPU-1:0] fpu_fflags_o, // APU signals input logic apu_en_i, @@ -85,11 +89,12 @@ module cv32e40p_ex_stage input logic [ 1:0] apu_lat_i, input logic [ APU_NARGS_CPU-1:0][31:0] apu_operands_i, input logic [ 5:0] apu_waddr_i, - input logic [APU_NDSFLAGS_CPU-1:0] apu_flags_i, + input logic [APU_NUSFLAGS_CPU-1:0] apu_flags_i, input logic [2:0][5:0] apu_read_regs_i, input logic [2:0] apu_read_regs_valid_i, output logic apu_read_dep_o, + output logic apu_read_dep_for_jalr_o, input logic [1:0][5:0] apu_write_regs_i, input logic [1:0] apu_write_regs_valid_i, output logic apu_write_dep_o, @@ -143,7 +148,7 @@ module cv32e40p_ex_stage output logic branch_decision_o, // Stall Control - input logic is_decoding_i, // Used to mask data Dependency inside the APU dispatcher in case of an istruction non valid + input logic is_decoding_i, // Used to mask data Dependency inside the APU dispatcher in case of an istruction non valid input logic lsu_ready_ex_i, // EX part of LSU is done input logic lsu_err_i, @@ -152,29 +157,34 @@ module cv32e40p_ex_stage input logic wb_ready_i // WB stage ready for new data ); - logic [31:0] alu_result; - logic [31:0] mult_result; - logic alu_cmp_result; + logic [ 31:0] alu_result; + logic [ 31:0] mult_result; + logic alu_cmp_result; - logic regfile_we_lsu; - logic [ 5:0] regfile_waddr_lsu; + logic regfile_we_lsu; + logic [ 5:0] regfile_waddr_lsu; - logic wb_contention; - logic wb_contention_lsu; + logic wb_contention; + logic wb_contention_lsu; - logic alu_ready; - logic mult_ready; + logic alu_ready; + logic mulh_active; + logic mult_ready; // APU signals - logic apu_valid; - logic [ 5:0] apu_waddr; - logic [31:0] apu_result; - logic apu_stall; - logic apu_active; - logic apu_singlecycle; - logic apu_multicycle; - logic apu_req; - logic apu_gnt; + logic apu_valid; + logic [ 5:0] apu_waddr; + logic [ 31:0] apu_result; + logic apu_stall; + logic apu_active; + logic apu_singlecycle; + logic apu_multicycle; + logic apu_req; + logic apu_gnt; + + logic apu_rvalid_q; + logic [ 31:0] apu_result_q; + logic [APU_NUSFLAGS_CPU-1:0] apu_flags_q; // ALU write port mux always_comb begin @@ -295,9 +305,10 @@ module cv32e40p_ex_stage .result_o(mult_result), - .multicycle_o(mult_multicycle_o), - .ready_o (mult_ready), - .ex_ready_i (ex_ready_o) + .multicycle_o (mult_multicycle_o), + .mulh_active_o(mulh_active), + .ready_o (mult_ready), + .ex_ready_i (ex_ready_o) ); generate @@ -326,13 +337,14 @@ module cv32e40p_ex_stage .active_o(apu_active), .stall_o (apu_stall), - .is_decoding_i (is_decoding_i), - .read_regs_i (apu_read_regs_i), - .read_regs_valid_i (apu_read_regs_valid_i), - .read_dep_o (apu_read_dep_o), - .write_regs_i (apu_write_regs_i), - .write_regs_valid_i(apu_write_regs_valid_i), - .write_dep_o (apu_write_dep_o), + .is_decoding_i (is_decoding_i), + .read_regs_i (apu_read_regs_i), + .read_regs_valid_i (apu_read_regs_valid_i), + .read_dep_o (apu_read_dep_o), + .read_dep_for_jalr_o(apu_read_dep_for_jalr_o), + .write_regs_i (apu_write_regs_i), + .write_regs_valid_i (apu_write_regs_valid_i), + .write_dep_o (apu_write_dep_o), .perf_type_o(apu_perf_type_o), .perf_cont_o(apu_perf_cont_o), @@ -345,40 +357,60 @@ module cv32e40p_ex_stage .apu_rvalid_i(apu_valid) ); - assign apu_perf_wb_o = wb_contention | wb_contention_lsu; - assign apu_ready_wb_o = ~(apu_active | apu_en_i | apu_stall) | apu_valid; + assign apu_perf_wb_o = wb_contention | wb_contention_lsu; + assign apu_ready_wb_o = ~(apu_active | apu_en_i | apu_stall) | apu_valid; + + /////////////////////////////////////// + // APU result memorization Register // + /////////////////////////////////////// + always_ff @(posedge clk, negedge rst_n) begin : APU_Result_Memorization + if (~rst_n) begin + apu_rvalid_q <= 1'b0; + apu_result_q <= 'b0; + apu_flags_q <= 'b0; + end else begin + if (apu_rvalid_i && apu_multicycle && (data_misaligned_i || data_misaligned_ex_i || regfile_alu_we_i || (mulh_active && (mult_operator_i == MUL_H)))) begin + apu_rvalid_q <= 1'b1; + apu_result_q <= apu_result_i; + apu_flags_q <= apu_flags_i; + end else if (apu_rvalid_q && !(data_misaligned_i || data_misaligned_ex_i || regfile_alu_we_i || (mulh_active && (mult_operator_i == MUL_H)))) begin + apu_rvalid_q <= 1'b0; + end + end + end - assign apu_req_o = apu_req; - assign apu_gnt = apu_gnt_i; - assign apu_valid = apu_rvalid_i; - assign apu_operands_o = apu_operands_i; - assign apu_op_o = apu_op_i; - assign apu_result = apu_result_i; + assign apu_req_o = apu_req; + assign apu_gnt = apu_gnt_i; + assign apu_valid = (apu_multicycle && (data_misaligned_i || data_misaligned_ex_i || regfile_alu_we_i || (mulh_active && (mult_operator_i == MUL_H)))) ? 1'b0 : (apu_rvalid_i || apu_rvalid_q); + assign apu_operands_o = apu_operands_i; + assign apu_op_o = apu_op_i; + assign apu_result = apu_rvalid_q ? apu_result_q : apu_result_i; assign fpu_fflags_we_o = apu_valid; + assign fpu_fflags_o = apu_rvalid_q ? apu_flags_q : apu_flags_i; end else begin : gen_no_apu // default assignements for the case when no FPU/APU is attached. - assign apu_req_o = '0; - assign apu_operands_o[0] = '0; - assign apu_operands_o[1] = '0; - assign apu_operands_o[2] = '0; - assign apu_op_o = '0; - assign apu_req = 1'b0; - assign apu_gnt = 1'b0; - assign apu_result = 32'b0; - assign apu_valid = 1'b0; - assign apu_waddr = 6'b0; - assign apu_stall = 1'b0; - assign apu_active = 1'b0; - assign apu_ready_wb_o = 1'b1; - assign apu_perf_wb_o = 1'b0; - assign apu_perf_cont_o = 1'b0; - assign apu_perf_type_o = 1'b0; - assign apu_singlecycle = 1'b0; - assign apu_multicycle = 1'b0; - assign apu_read_dep_o = 1'b0; - assign apu_write_dep_o = 1'b0; - assign fpu_fflags_we_o = 1'b0; - + assign apu_req_o = '0; + assign apu_operands_o[0] = '0; + assign apu_operands_o[1] = '0; + assign apu_operands_o[2] = '0; + assign apu_op_o = '0; + assign apu_req = 1'b0; + assign apu_gnt = 1'b0; + assign apu_result = 32'b0; + assign apu_valid = 1'b0; + assign apu_waddr = 6'b0; + assign apu_stall = 1'b0; + assign apu_active = 1'b0; + assign apu_ready_wb_o = 1'b1; + assign apu_perf_wb_o = 1'b0; + assign apu_perf_cont_o = 1'b0; + assign apu_perf_type_o = 1'b0; + assign apu_singlecycle = 1'b0; + assign apu_multicycle = 1'b0; + assign apu_read_dep_o = 1'b0; + assign apu_read_dep_for_jalr_o = 1'b0; + assign apu_write_dep_o = 1'b0; + assign fpu_fflags_o = '0; end endgenerate diff --git a/rtl/cv32e40p_id_stage.sv b/rtl/cv32e40p_id_stage.sv index 80b5b645e..7b7f85cbd 100644 --- a/rtl/cv32e40p_id_stage.sv +++ b/rtl/cv32e40p_id_stage.sv @@ -146,6 +146,7 @@ module cv32e40p_id_stage output logic [2:0][5:0] apu_read_regs_o, output logic [2:0] apu_read_regs_valid_o, input logic apu_read_dep_i, + input logic apu_read_dep_for_jalr_i, output logic [1:0][5:0] apu_write_regs_o, output logic [1:0] apu_write_regs_valid_o, input logic apu_write_dep_i, @@ -804,6 +805,12 @@ module cv32e40p_id_stage // dependency checks always_comb begin unique case (alu_op_a_mux_sel) + OP_A_CURRPC: begin + if (ctrl_transfer_target_mux_sel == JT_JALR) begin + apu_read_regs[0] = regfile_addr_ra_id; + apu_read_regs_valid[0] = 1'b1; + end + end // OP_A_CURRPC: OP_A_REGA_OR_FWD: begin apu_read_regs[0] = regfile_addr_ra_id; apu_read_regs_valid[0] = 1'b1; @@ -847,7 +854,7 @@ module cv32e40p_id_stage apu_read_regs_valid[2] = 1'b1; end OP_C_REGC_OR_FWD: begin - if (alu_op_a_mux_sel != OP_A_REGC_OR_FWD) begin + if ((alu_op_a_mux_sel != OP_A_REGC_OR_FWD) && (ctrl_transfer_target_mux_sel != JT_JALR)) begin apu_read_regs[2] = regfile_addr_rc_id; apu_read_regs_valid[2] = 1'b1; end else begin @@ -1089,7 +1096,8 @@ module cv32e40p_id_stage cv32e40p_controller #( .COREV_CLUSTER(COREV_CLUSTER), - .COREV_PULP (COREV_PULP) + .COREV_PULP (COREV_PULP), + .FPU (FPU) ) controller_i ( .clk (clk), // Gated clock .clk_ungated_i(clk_ungated_i), // Ungated clock @@ -1136,8 +1144,7 @@ module cv32e40p_id_stage .trap_addr_mux_o(trap_addr_mux_o), // HWLoop signls - .pc_id_i (pc_id_i), - .is_compressed_i(is_compressed_i), + .pc_id_i(pc_id_i), .hwlp_start_addr_i(hwlp_start_o), .hwlp_end_addr_i (hwlp_end_o), @@ -1159,9 +1166,10 @@ module cv32e40p_id_stage .mult_multicycle_i(mult_multicycle_i), // APU - .apu_en_i (apu_en), - .apu_read_dep_i (apu_read_dep_i), - .apu_write_dep_i(apu_write_dep_i), + .apu_en_i (apu_en), + .apu_read_dep_i (apu_read_dep_i), + .apu_read_dep_for_jalr_i(apu_read_dep_for_jalr_i), + .apu_write_dep_i (apu_write_dep_i), .apu_stall_o(apu_stall), diff --git a/rtl/cv32e40p_load_store_unit.sv b/rtl/cv32e40p_load_store_unit.sv index 8df1d8498..7c08ffe11 100644 --- a/rtl/cv32e40p_load_store_unit.sv +++ b/rtl/cv32e40p_load_store_unit.sv @@ -59,8 +59,6 @@ module cv32e40p_load_store_unit #( input logic data_misaligned_ex_i, // misaligned access in last ld/st -> from ID/EX pipeline output logic data_misaligned_o, // misaligned access was detected -> to controller - input logic apu_busy_i, - input logic [5:0] data_atop_ex_i, // atomic instructions signal -> from ex stage output logic [5:0] data_atop_o, // atomic instruction signal -> core output @@ -76,8 +74,6 @@ module cv32e40p_load_store_unit #( localparam DEPTH = 2; // Maximum number of outstanding transactions - logic data_req_ex_filtered; // data request from ex stage filtered when it is misaligned and there is an on-going APU instruction - // Transaction request (to cv32e40p_obi_interface) logic trans_valid; logic trans_ready; @@ -352,14 +348,12 @@ module cv32e40p_load_store_unit #( // Busy if there are ongoing (or potentially outstanding) transfers assign busy_o = (cnt_q != 2'b00) || trans_valid; - assign data_req_ex_filtered = data_req_ex_i & !(apu_busy_i & (data_misaligned_o | data_misaligned_ex_i)); - ////////////////////////////////////////////////////////////////////////////// // Transaction request generation // // Assumes that corresponding response is at least 1 cycle after request // - // - Only request transaction when EX stage requires data transfer (data_req_ex_filtered), and + // - Only request transaction when EX stage requires data transfer (data_req_ex_i), and // - maximum number of outstanding transactions will not be exceeded (cnt_q < DEPTH) ////////////////////////////////////////////////////////////////////////////// @@ -376,12 +370,12 @@ module cv32e40p_load_store_unit #( // OBI compatible (avoids combinatorial path from data_rvalid_i to data_req_o). // Multiple trans_* transactions can be issued (and accepted) before a response // (resp_*) is received. - assign trans_valid = data_req_ex_filtered && (cnt_q < DEPTH); + assign trans_valid = data_req_ex_i && (cnt_q < DEPTH); end else begin : gen_pulp_obi // Legacy PULP OBI behavior, i.e. only issue subsequent transaction if preceding transfer // is about to finish (re-introducing timing critical path from data_rvalid_i to data_req_o) - assign trans_valid = (cnt_q == 2'b00) ? data_req_ex_filtered && (cnt_q < DEPTH) : - data_req_ex_filtered && (cnt_q < DEPTH) && resp_valid; + assign trans_valid = (cnt_q == 2'b00) ? data_req_ex_i && (cnt_q < DEPTH) : + data_req_ex_i && (cnt_q < DEPTH) && resp_valid; end endgenerate @@ -391,7 +385,7 @@ module cv32e40p_load_store_unit #( // LSU EX stage readyness requires two criteria to be met: // - // - A data request (data_req_ex_filtered) has been forwarded/accepted (trans_valid && trans_ready) + // - A data request (data_req_ex_i) has been forwarded/accepted (trans_valid && trans_ready) // - The LSU WB stage is available such that EX and WB can be updated in lock step // // Default (if there is not even a data request) LSU EX is signaled to be ready, else @@ -400,11 +394,10 @@ module cv32e40p_load_store_unit #( // in case there is already at least one outstanding transaction (so WB is full) the EX // and WB stage can only signal readiness in lock step (so resp_valid is used as well). - assign lsu_ready_ex_o = !(apu_busy_i & (data_misaligned_o | data_misaligned_ex_i)) & - ((data_req_ex_i == 1'b0) ? 1'b1 : - (cnt_q == 2'b00) ? ( trans_valid && trans_ready) : - (cnt_q == 2'b01) ? (resp_valid && trans_valid && trans_ready) : - resp_valid); + assign lsu_ready_ex_o = (data_req_ex_i == 1'b0) ? 1'b1 : + (cnt_q == 2'b00) ? ( trans_valid && trans_ready) : + (cnt_q == 2'b01) ? (resp_valid && trans_valid && trans_ready) : + resp_valid; // Update signals for EX/WB registers (when EX has valid data itself and is ready for next) assign ctrl_update = lsu_ready_ex_o && data_req_ex_i; diff --git a/rtl/cv32e40p_mult.sv b/rtl/cv32e40p_mult.sv index ea0da1937..afdc5e2fc 100644 --- a/rtl/cv32e40p_mult.sv +++ b/rtl/cv32e40p_mult.sv @@ -55,6 +55,7 @@ module cv32e40p_mult output logic [31:0] result_o, output logic multicycle_o, + output logic mulh_active_o, output logic ready_o, input logic ex_ready_i ); @@ -87,7 +88,6 @@ module cv32e40p_mult logic [ 1:0] mulh_signed; logic mulh_shift_arith; logic mulh_carry_q; - logic mulh_active; logic mulh_save; logic mulh_clearcarry; logic mulh_ready; @@ -105,7 +105,7 @@ module cv32e40p_mult assign short_op_a[16] = short_signed[0] & short_op_a[15]; assign short_op_b[16] = short_signed[1] & short_op_b[15]; - assign short_op_c = mulh_active ? $signed({mulh_carry_q, op_c_i}) : $signed(op_c_i); + assign short_op_c = mulh_active_o ? $signed({mulh_carry_q, op_c_i}) : $signed(op_c_i); assign short_mul = $signed(short_op_a) * $signed(short_op_b); assign short_mac = $signed(short_op_c) + $signed(short_mul) + $signed(short_round); @@ -116,13 +116,13 @@ module cv32e40p_mult ) >>> short_imm; // choose between normal short multiplication operation and mulh operation - assign short_imm = mulh_active ? mulh_imm : imm_i; - assign short_subword = mulh_active ? mulh_subword : {2{short_subword_i}}; - assign short_signed = mulh_active ? mulh_signed : short_signed_i; - assign short_shift_arith = mulh_active ? mulh_shift_arith : short_signed_i[0]; + assign short_imm = mulh_active_o ? mulh_imm : imm_i; + assign short_subword = mulh_active_o ? mulh_subword : {2{short_subword_i}}; + assign short_signed = mulh_active_o ? mulh_signed : short_signed_i; + assign short_shift_arith = mulh_active_o ? mulh_shift_arith : short_signed_i[0]; - assign short_mac_msb1 = mulh_active ? short_mac[33] : short_mac[31]; - assign short_mac_msb0 = mulh_active ? short_mac[32] : short_mac[31]; + assign short_mac_msb1 = mulh_active_o ? short_mac[33] : short_mac[31]; + assign short_mac_msb0 = mulh_active_o ? short_mac[32] : short_mac[31]; always_comb begin @@ -132,16 +132,16 @@ module cv32e40p_mult mulh_signed = 2'b00; mulh_shift_arith = 1'b0; mulh_ready = 1'b0; - mulh_active = 1'b1; + mulh_active_o = 1'b1; mulh_save = 1'b0; mulh_clearcarry = 1'b0; multicycle_o = 1'b0; case (mulh_CS) IDLE_MULT: begin - mulh_active = 1'b0; - mulh_ready = 1'b1; - mulh_save = 1'b0; + mulh_active_o = 1'b0; + mulh_ready = 1'b1; + mulh_save = 1'b0; if ((operator_i == MUL_H) && enable_i) begin mulh_ready = 1'b0; mulh_NS = STEP0; @@ -149,12 +149,12 @@ module cv32e40p_mult end STEP0: begin - multicycle_o = 1'b1; - mulh_imm = 5'd16; - mulh_active = 1'b1; + multicycle_o = 1'b1; + mulh_imm = 5'd16; + mulh_active_o = 1'b1; //AL*BL never overflows - mulh_save = 1'b0; - mulh_NS = STEP1; + mulh_save = 1'b0; + mulh_NS = STEP1; //Here always a 32'b unsigned result (no carry) end diff --git a/rtl/vendor/pulp_platform_fpnew.lock.hjson b/rtl/vendor/pulp_platform_fpnew.lock.hjson index e150bcb2f..de40549d5 100644 --- a/rtl/vendor/pulp_platform_fpnew.lock.hjson +++ b/rtl/vendor/pulp_platform_fpnew.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/pulp-platform/fpnew.git - rev: 11659d7ff3580ac3226c6d56a90ef717cdc530e3 + rev: 79e453139072df42c9ec8f697132ba485d74e23d } } diff --git a/rtl/vendor/pulp_platform_fpnew.vendor.hjson b/rtl/vendor/pulp_platform_fpnew.vendor.hjson index e76745d51..1fe09cca5 100644 --- a/rtl/vendor/pulp_platform_fpnew.vendor.hjson +++ b/rtl/vendor/pulp_platform_fpnew.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/pulp-platform/fpnew.git", - rev: "11659d7ff3580ac3226c6d56a90ef717cdc530e3", + rev: "79e453139072df42c9ec8f697132ba485d74e23d", }, exclude_from_upstream: [ diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv index 964ef7429..7abe33043 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv @@ -443,7 +443,11 @@ module fpnew_cast_multi #( // By default right shift mantissa to be an integer denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q); // overflow: when converting to unsigned the range is larger by one - if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin + if ((input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) // Exponent larger than max int range, + && !(!op_mod_q2 // unless cast to signed int + && input_sign_q // and input value is larges negative int value + && (input_exp_q == signed'(fpnew_pkg::int_width(int_fmt_q2) - 1)) + && (input_mant_q == {1'b1, {INT_MAN_WIDTH-1{1'b0}}}))) begin denorm_shamt = '0; // prevent shifting of_before_round = 1'b1; // underflow diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv index a8b004952..56a2f5d62 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv @@ -207,7 +207,7 @@ module fpnew_divsqrt_multi #( // Valid synch with other lanes // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni) // Tell the other units that this unit has finished now or in the past assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv index 051e6a698..6fdd89056 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv @@ -613,7 +613,9 @@ module fpnew_fma #( ); // Classification after rounding - assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0 + assign uf_after_round = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_i == fpnew_pkg::RNE) || (rnd_mode_i == fpnew_pkg::RMM))))); assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones // ----------------- diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv index e691f6777..471d966f0 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv @@ -745,8 +745,10 @@ module fpnew_fma_multi #( if (FpFmtConfig[fmt]) begin : active_format always_comb begin : post_process - // detect of / uf - fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + // detect of / uf + fmt_uf_after_round[fmt] = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_i == fpnew_pkg::RNE) || (rnd_mode_i == fpnew_pkg::RMM))))); fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. // Assemble regular result, nan box short ones. diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v index 87139a253..d22e85ba9 100644 --- a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v @@ -222,7 +222,7 @@ end assign ex4_rst_norm[31:0] = {fdsu_ex4_result_sign, ex4_expnt_rst[7:0], ex4_frac_23[22:0]}; -assign ex4_cor_uf = (fdsu_ex4_uf && !ex4_denorm_potnt_norm || ex4_uf_plus) +assign ex4_cor_uf = (fdsu_ex4_uf || ex4_denorm_potnt_norm || ex4_uf_plus) && fdsu_ex4_nx; assign ex4_cor_nx = fdsu_ex4_nx || fdsu_ex4_of diff --git a/util/format-verible b/util/format-verible index 69a46a0a4..974ecce81 100755 --- a/util/format-verible +++ b/util/format-verible @@ -11,4 +11,6 @@ then find rtl/ bhv/ -not -path "*rtl/vendor*" \ -name '*.sv' | \ xargs verible-verilog-format --inplace 2> /dev/zero -fi \ No newline at end of file +else +echo "verible-verilog-format not available!" +fi