diff --git a/core/load_store_unit.sv b/core/load_store_unit.sv index 175b31e811..b9949d353f 100644 --- a/core/load_store_unit.sv +++ b/core/load_store_unit.sv @@ -147,7 +147,7 @@ module load_store_unit // -------------------------------------- // those are the signals which are always correct // e.g.: they keep the value in the stall case - lsu_ctrl_t lsu_ctrl; + lsu_ctrl_t lsu_ctrl, lsu_ctrl_byp; logic pop_st; logic pop_ld; @@ -170,7 +170,7 @@ module load_store_unit logic st_valid_i; logic ld_valid_i; logic ld_translation_req; - logic st_translation_req; + logic cva6_st_translation_req, acc_st_translation_req, st_translation_req; logic [riscv::VLEN-1:0] ld_vaddr; logic [riscv::XLEN-1:0] ld_tinst; logic ld_hs_ld_st_inst; @@ -179,16 +179,16 @@ module load_store_unit logic [riscv::XLEN-1:0] st_tinst; logic st_hs_ld_st_inst; logic st_hlvx_inst; - logic translation_req; - logic translation_valid; - logic [riscv::VLEN-1:0] mmu_vaddr; - logic [riscv::PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen; + logic cva6_translation_req, acc_translation_req, translation_req; + logic cva6_translation_valid, acc_translataion_valid, translation_valid; + logic [riscv::VLEN-1:0] cva6_mmu_vaddr, acc_mmu_vaddr, mmu_vaddr; + logic [riscv::PLEN-1:0] cva6_mmu_paddr, acc_mmu_paddr, mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen; logic [ riscv::XLEN-1:0] mmu_tinst; logic mmu_hs_ld_st_inst; logic mmu_hlvx_inst; - exception_t mmu_exception; - logic dtlb_hit; - logic [ riscv::PPNW-1:0] dtlb_ppn; + exception_t cva6_mmu_exception, acc_mmu_exception, mmu_exception; + logic cva6_dtlb_hit, acc_dtlb_hit, dtlb_hit; + logic [riscv::PPNW-1:0] cva6_dtlb_ppn, acc_dtlb_ppn, dtlb_ppn; logic ld_valid; logic [TRANS_ID_BITS-1:0] ld_trans_id; @@ -200,20 +200,13 @@ module load_store_unit logic [ 11:0] page_offset; logic page_offset_matches; - exception_t misaligned_exception; + exception_t cva6_misaligned_exception, acc_misaligned_exception, misaligned_exception; exception_t ld_ex; exception_t st_ex; logic hs_ld_st_inst; logic hlvx_inst; - // Accelerator's request for the MMU - assign acc_mmu_resp_o.acc_mmu_dtlb_hit_o = '0; - assign acc_mmu_resp_o.acc_mmu_dtlb_ppn_o = '0; - assign acc_mmu_resp_o.acc_mmu_valid_o = '0; - assign acc_mmu_resp_o.acc_mmu_paddr_o = '0; - assign acc_mmu_resp_o.acc_mmu_exception_o = '0; - // ------------------- // MMU e.g.: TLBs/PTW // ------------------- @@ -314,10 +307,10 @@ module load_store_unit end else begin : gen_no_mmu if (riscv::VLEN > riscv::PLEN) begin - assign mmu_vaddr_plen = mmu_vaddr[riscv::PLEN-1:0]; + assign mmu_vaddr_plen = cva6_mmu_vaddr[riscv::PLEN-1:0]; assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; end else begin - assign mmu_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, mmu_vaddr}; + assign mmu_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, cva6_mmu_vaddr}; assign fetch_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, icache_areq_i.fetch_vaddr}; end @@ -337,22 +330,120 @@ module load_store_unit assign itlb_miss_o = 1'b0; assign dtlb_miss_o = 1'b0; - assign dtlb_ppn = mmu_vaddr_plen[riscv::PLEN-1:12]; - assign dtlb_hit = 1'b1; + assign cva6_dtlb_ppn = mmu_vaddr_plen[riscv::PLEN-1:12]; + assign cva6_dtlb_hit = 1'b1; always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - mmu_paddr <= '0; - translation_valid <= '0; - mmu_exception <= '0; + cva6_mmu_paddr <= '0; + translation_valid <= '0; + cva6_mmu_exception <= '0; end else begin - mmu_paddr <= mmu_vaddr_plen; - translation_valid <= translation_req; - mmu_exception <= misaligned_exception; + cva6_mmu_paddr <= mmu_vaddr_plen; + translation_valid <= translation_req; + cva6_mmu_exception <= misaligned_exception; end end end + if (CVA6Cfg.EnableAccelerator) begin + // The MMU can be connected to CVA6 or the ACCELERATOR + enum logic {CVA6, ACC} mmu_state_d, mmu_state_q; + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mmu_state_q <= CVA6; + end else begin + mmu_state_q <= mmu_state_d; + end + end + + // Straightforward and slow-reactive MMU arbitration logic + // This logic can be optimized to reduce answer latency and contention + always_comb begin + // Maintain state + mmu_state_d = mmu_state_q; + + // Serve CVA6 and gate the accelerator by default + // MMU input + misaligned_exception = cva6_misaligned_exception; + st_translation_req = cva6_st_translation_req; + translation_req = cva6_translation_req; + mmu_vaddr = cva6_mmu_vaddr; + // MMU output + cva6_translation_valid = translation_valid; + cva6_mmu_paddr = mmu_paddr; + cva6_mmu_exception = mmu_exception; + cva6_dtlb_hit = dtlb_hit; + cva6_dtlb_ppn = dtlb_ppn; + acc_mmu_resp_o.acc_mmu_valid = '0; + acc_mmu_resp_o.acc_mmu_paddr = '0; + acc_mmu_resp_o.acc_mmu_exception = '0; + acc_mmu_resp_o.acc_mmu_dtlb_hit = '0; + acc_mmu_resp_o.acc_mmu_dtlb_ppn = '0; + + unique case (mmu_state_q) + CVA6: begin + // Only the accelerator is requesting, and the lsu bypass queue is empty. + if (acc_mmu_req_i.acc_mmu_req && !lsu_valid_i && lsu_ready_o) begin + // Lock the MMU to the accelerator. + // If the issue stage is firing a mem op in this cycle, + // the bypass queue will buffer it. + mmu_state_d = ACC; + end + // Make this a mealy FSM to cut some latency. + // It should be okay timing-wise since cva6's requests already + // depend on lsu_valid_i. Moreover, lsu_ready_o is sequentially + // generated by the bypass and, in this first implementation, + // the acc request already depends combinatorially upon acc_mmu_req_i.acc_mmu_req. + end + ACC: begin + // MMU input + misaligned_exception = acc_mmu_req_i.acc_mmu_misaligned_ex; + st_translation_req = acc_mmu_req_i.acc_mmu_is_store; + translation_req = acc_mmu_req_i.acc_mmu_req; + mmu_vaddr = acc_mmu_req_i.acc_mmu_vaddr; + // MMU output + acc_mmu_resp_o.acc_mmu_valid = translation_valid; + acc_mmu_resp_o.acc_mmu_paddr = mmu_paddr; + acc_mmu_resp_o.acc_mmu_exception = mmu_exception; + acc_mmu_resp_o.acc_mmu_dtlb_hit = dtlb_hit; + acc_mmu_resp_o.acc_mmu_dtlb_ppn = dtlb_ppn; + cva6_translation_valid = '0; + cva6_mmu_paddr = '0; + cva6_mmu_exception = '0; + cva6_dtlb_hit = '0; + cva6_dtlb_ppn = '0; + // Get back to CVA6 after the translation + if (translation_valid) mmu_state_d = CVA6; + end + default: mmu_state_d = CVA6; + endcase + end + + always_comb begin + // Feed forward + lsu_ctrl = lsu_ctrl_byp; + // Mask the lsu valid so that cva6's req gets buffered in the + // bypass queue when the MMU is being used by the accelerator. + lsu_ctrl.valid = (mmu_state_q == ACC) ? 1'b0 : lsu_ctrl_byp.valid; + end + end else begin + // MMU input + assign misaligned_exception = cva6_misaligned_exception; + assign st_translation_req = cva6_st_translation_req; + assign translation_req = cva6_translation_req; + assign mmu_vaddr = cva6_mmu_vaddr; + // MMU output + assign cva6_translation_valid = translation_valid; + assign cva6_mmu_paddr = mmu_paddr; + assign cva6_mmu_exception = mmu_exception; + assign cva6_dtlb_hit = dtlb_hit; + assign cva6_dtlb_ppn = dtlb_ppn; + // No accelerator + assign acc_mmu_resp_o = '0; + // Feed forward the lsu_ctrl bypass + assign lsu_ctrl = lsu_ctrl_byp; + end logic store_buffer_empty; // ------------------ @@ -380,15 +471,15 @@ module load_store_unit .result_o (st_result), .ex_o (st_ex), // MMU port - .translation_req_o (st_translation_req), + .translation_req_o (cva6_st_translation_req), .vaddr_o (st_vaddr), .rvfi_mem_paddr_o (rvfi_mem_paddr_o), .tinst_o (st_tinst), .hs_ld_st_inst_o (st_hs_ld_st_inst), .hlvx_inst_o (st_hlvx_inst), - .paddr_i (mmu_paddr), - .ex_i (mmu_exception), - .dtlb_hit_i (dtlb_hit), + .paddr_i (cva6_mmu_paddr), + .ex_i (cva6_mmu_exception), + .dtlb_hit_i (cva6_dtlb_hit), // Load Unit .page_offset_i (page_offset), .page_offset_matches_o(page_offset_matches), @@ -420,10 +511,10 @@ module load_store_unit .tinst_o (ld_tinst), .hs_ld_st_inst_o (ld_hs_ld_st_inst), .hlvx_inst_o (ld_hlvx_inst), - .paddr_i (mmu_paddr), - .ex_i (mmu_exception), - .dtlb_hit_i (dtlb_hit), - .dtlb_ppn_i (dtlb_ppn), + .paddr_i (cva6_mmu_paddr), + .ex_i (cva6_mmu_exception), + .dtlb_hit_i (cva6_dtlb_hit), + .dtlb_ppn_i (cva6_dtlb_ppn), // to store unit .page_offset_o (page_offset), .page_offset_matches_i(page_offset_matches), @@ -469,19 +560,19 @@ module load_store_unit ld_valid_i = 1'b0; st_valid_i = 1'b0; - translation_req = 1'b0; - mmu_vaddr = {riscv::VLEN{1'b0}}; - mmu_tinst = {riscv::XLEN{1'b0}}; - mmu_hs_ld_st_inst = 1'b0; - mmu_hlvx_inst = 1'b0; + cva6_translation_req = 1'b0; + cva6_mmu_vaddr = {riscv::VLEN{1'b0}}; + mmu_tinst = {riscv::XLEN{1'b0}}; + mmu_hs_ld_st_inst = 1'b0; + mmu_hlvx_inst = 1'b0; // check the operation to activate the right functional unit accordingly unique case (lsu_ctrl.fu) // all loads go here LOAD: begin - ld_valid_i = lsu_ctrl.valid; - translation_req = ld_translation_req; - mmu_vaddr = ld_vaddr; + ld_valid_i = lsu_ctrl.valid; + cva6_translation_req = ld_translation_req; + cva6_mmu_vaddr = ld_vaddr; if (CVA6Cfg.RVH) begin mmu_tinst = ld_tinst; mmu_hs_ld_st_inst = ld_hs_ld_st_inst; @@ -490,9 +581,9 @@ module load_store_unit end // all stores go here STORE: begin - st_valid_i = lsu_ctrl.valid; - translation_req = st_translation_req; - mmu_vaddr = st_vaddr; + st_valid_i = lsu_ctrl.valid; + cva6_translation_req = st_translation_req; + cva6_mmu_vaddr = st_vaddr; if (CVA6Cfg.RVH) begin mmu_tinst = st_tinst; mmu_hs_ld_st_inst = st_hs_ld_st_inst; @@ -550,7 +641,7 @@ module load_store_unit // the misaligned exception is passed to the functional unit via the MMU, which in case // can augment the exception if other memory related exceptions like a page fault or access errors always_comb begin : data_misaligned_detection - misaligned_exception = { + cva6_misaligned_exception = { {riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, {riscv::GPLEN{1'b0}}, @@ -596,65 +687,65 @@ module load_store_unit if (data_misaligned) begin if (lsu_ctrl.fu == LOAD) begin - misaligned_exception.cause = riscv::LD_ADDR_MISALIGNED; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::LD_ADDR_MISALIGNED; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end else if (lsu_ctrl.fu == STORE) begin - misaligned_exception.cause = riscv::ST_ADDR_MISALIGNED; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::ST_ADDR_MISALIGNED; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end end if (ariane_pkg::MMU_PRESENT && en_ld_st_translation_i && lsu_ctrl.overflow) begin if (lsu_ctrl.fu == LOAD) begin - misaligned_exception.cause = riscv::LD_ACCESS_FAULT; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::LD_ACCESS_FAULT; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end else if (lsu_ctrl.fu == STORE) begin - misaligned_exception.cause = riscv::ST_ACCESS_FAULT; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::ST_ACCESS_FAULT; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end end if (ariane_pkg::MMU_PRESENT && CVA6Cfg.RVH && en_ld_st_g_translation_i && !en_ld_st_translation_i && lsu_ctrl.g_overflow) begin if (lsu_ctrl.fu == LOAD) begin - misaligned_exception.cause = riscv::LOAD_GUEST_PAGE_FAULT; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::LOAD_GUEST_PAGE_FAULT; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end else if (lsu_ctrl.fu == STORE) begin - misaligned_exception.cause = riscv::STORE_GUEST_PAGE_FAULT; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::STORE_GUEST_PAGE_FAULT; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval = {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end end end @@ -688,7 +779,7 @@ module load_store_unit .pop_ld_i (pop_ld), .pop_st_i (pop_st), - .lsu_ctrl_o(lsu_ctrl), + .lsu_ctrl_o(lsu_ctrl_byp), .ready_o (lsu_ready_o), .* );