diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1d15fef3e..60aa591d2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,6 +7,22 @@ The [OpenHW Work Flow](https://github.com/openhwgroup/core-v-docs/blob/master/ve is required reading. You will find information about the implementation and usage of the CORE-V verification environments in the [Verification Strategy](https://github.com/openhwgroup/core-v-docs/blob/master/verif/Common/OpenHWGroup_CORE-V_Verif_Strategy.pdf). +## Updating Copyright +The files in this repository are open-source artifacts licensed under the terms of the Solderpad license, see [LICENSE](LICENSE). +If you modify a file, a new copyright _may_ be added, but the existing copyright and license header _must not_ be removed or modified. +If your contribution uses a newer version of the existing license, you are encouraged to declare that with a one-liner SPDX header. + +In the example below, a new copyright and updated license are added to an existing copyright and license: +``` +// Copyright 2024 OpenHW Group and +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. +// ...remainder of original license header from ETHZ and UniBo. +``` + ## The Mechanics 1. From GitHub: [fork](https://help.github.com/articles/fork-a-repo/) the [cv32e40p](https://github.com/openhwgroup/cv32e40p) repository 2. Clone repository: `git clone https://github.com/[your_github_username]/cv32e40p` diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index f47afcb6b..13a5beb00 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -1155,7 +1155,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; e_dev_commit_rf_to_ex_3, e_dev_commit_rf_to_ex_4, e_dev_commit_rf_to_ex_5; - event e_if_2_id_1, e_if_2_id_2, e_if_2_id_3; + event e_if_2_id_1, e_if_2_id_2, e_if_2_id_3, e_if_2_id_4; event e_ex_to_wb_1, e_ex_to_wb_2; event e_id_to_ex_1, e_id_to_ex_2; event e_commit_dpc; @@ -1282,6 +1282,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.init(trace_if); trace_id.m_trap = ~r_pipe_freeze_trace.minstret; trace_id.m_is_illegal = trace_id.m_is_illegal | r_pipe_freeze_trace.is_illegal; + `CSR_FROM_PIPE(id, dpc) s_is_pc_set = 1'b0; s_is_irq_start = 1'b0; trace_if.m_valid = 1'b0; @@ -1318,6 +1319,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_ex_reg_we_adjusted; //ex_reg_we bit s_rf_we_wb_adjusted; // + bit s_dont_override_mstatus_fs_id; + trace_if = new(); trace_id = new(); trace_ex = new(); @@ -1350,6 +1353,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_ex_reg_we_adjusted = 1'b0; s_rf_we_wb_adjusted = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; + forever begin wait(e_pipe_monitor_ok.triggered); // event triggered #1; @@ -1533,6 +1538,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_ex.m_csr.mstatus_fs_wdata = FS_DIRTY; end else begin trace_id.m_csr.mstatus_fs_rdata = trace_ex.m_csr.mstatus_fs_wdata; + s_dont_override_mstatus_fs_id = 1'b1; end ->e_fregs_dirty_3; end @@ -1578,12 +1584,16 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if(trace_id.m_sample_csr_write_in_ex && !csr_is_irq && !s_is_irq_start) begin //First cycle after id_ready, csr write is asserted in this cycle `CSR_FROM_PIPE(id, mstatus) - `CSR_FROM_PIPE(id, mstatus_fs) + if(!s_dont_override_mstatus_fs_id) begin + `CSR_FROM_PIPE(id, mstatus_fs) + end `CSR_FROM_PIPE(id, mepc) `CSR_FROM_PIPE(id, mcause) `CSR_FROM_PIPE(id, dscratch0) `CSR_FROM_PIPE(id, dscratch1) - `CSR_FROM_PIPE(id, dpc) + if(r_pipe_freeze_trace.csr.we && (r_pipe_freeze_trace.csr.addr == CSR_DPC)) begin + `CSR_FROM_PIPE(id, dpc) + end ->e_csr_in_ex; end @@ -1629,6 +1639,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(id, tdata2) send_rvfi(trace_id); trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; end if (r_pipe_freeze_trace.apu_req && r_pipe_freeze_trace.apu_gnt) begin @@ -1640,6 +1651,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_apu_req.set_to_apu(); apu_trace_q.push_back(trace_apu_req); trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; if(r_pipe_freeze_trace.apu_rvalid && (cnt_apu_req == cnt_apu_resp)) begin//APU return in the same cycle apu_resp(); @@ -1695,6 +1707,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); // The instruction moves forward from ID to EX trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; ->e_id_to_ex_1; end else if (r_pipe_freeze_trace.ex_reg_we && r_pipe_freeze_trace.rf_alu_we_ex) begin @@ -1759,6 +1772,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; ->e_id_to_ex_2; end if_to_id(); @@ -1782,7 +1796,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end - if (r_pipe_freeze_trace.if_valid && r_pipe_freeze_trace.if_ready) begin + if (r_pipe_freeze_trace.if_valid && r_pipe_freeze_trace.if_ready && r_pipe_freeze_trace.instr_valid_if) begin if (trace_if.m_valid) begin if (r_pipe_freeze_trace.id_valid && r_pipe_freeze_trace.id_ready && !trace_id.m_valid && r_pipe_freeze_trace.ebrk_insn_dec) begin if_to_id(); @@ -1793,6 +1807,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; ->e_if_2_id_3; end else if (r_pipe_freeze_trace.ecall_insn_dec) begin if_to_id(); + ->e_if_2_id_4; end end diff --git a/cv32e40p_fpu_manifest.flist b/cv32e40p_fpu_manifest.flist index aca8c41b2..92ddce332 100644 --- a/cv32e40p_fpu_manifest.flist +++ b/cv32e40p_fpu_manifest.flist @@ -82,7 +82,6 @@ ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_noncomp.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_opgroup_fmt_slice.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_opgroup_multifmt_slice.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_opgroup_block.sv -${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_top.sv ${DESIGN_RTL_DIR}/cv32e40p_fp_wrapper.sv diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css index 3ae7d9ae9..34a579c86 100644 --- a/docs/source/_static/css/custom.css +++ b/docs/source/_static/css/custom.css @@ -1,3 +1,9 @@ .no-scrollbar-table td { white-space: normal !important; } +.wy-side-nav-search>div.version { + color:black +} +.wy-side-nav-search>a.icon.icon-home { + color:black +} diff --git a/docs/source/conf.py b/docs/source/conf.py index dbb7b0203..bb18fa6a2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -38,7 +38,7 @@ # -- Project information ----------------------------------------------------- project = u'CORE-V CV32E40P User Manual' -copyright = u'2023, OpenHW Group' +copyright = u'2024, OpenHW Group' author = u'PULP Platform and OpenHW Group' # The short X.Y version @@ -111,8 +111,10 @@ # further. For a list of options available for each theme, see the # documentation. # -html_theme_options = {'style_nav_header_background': '#DDDDDD'} +html_theme_options = {'style_nav_header_background': '#DDDDDD', 'prev_next_buttons_location': 'both'} html_logo = '../images/openhw-landscape.svg' +html_show_sphinx = False +html_show_sourcelink = False # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/source/corev_hw_loop.rst b/docs/source/corev_hw_loop.rst index ecc62e27d..e51b0fe8f 100644 --- a/docs/source/corev_hw_loop.rst +++ b/docs/source/corev_hw_loop.rst @@ -57,6 +57,8 @@ The HWLoop constraints are: - End Address must be strictly greater than Start Address. +- HWLoop #0 (resp. #1) start and end addresses **must not be modified** if HWLoop #0 (resp. #1) count is different than 0. + - End address of an HWLoop must point to the instruction just after the last one of the HWLoop body. - HWLoop body must contain at least 3 instructions. @@ -64,8 +66,6 @@ The HWLoop constraints are: - When both loops are nested, the End address of the outermost HWLoop (must be #1) must be at least 2 instructions further than the End address of the innermost HWLoop (must be #0), i.e. HWLoop[1].endaddress >= HWLoop[0].endaddress + 8. - Remark: To avoid to add 2 NOPs in case nothing can be put there by the compiler, lpcount setting of the the inner loop could be moved after it - without forgetting to add the same in the preamble before the outer loop start address. - HWLoop must always be entered from its start location (no branch/jump to a location inside a HWLoop body). @@ -103,37 +103,39 @@ Below an assembly code example of a nested HWLoop that computes a matrix additio asm volatile ( "add %[i],x0, x0;" "add %[j],x0, x0;" - "cv.count 1, %[N];" ".balign 4;" - "cv.endi 1, endO;" - "cv.starti 1, startO;" + "cv.starti 1, start1;" + "cv.endi 1, end1;" + "cv.count 1, %[N];" "any instructions here" ".balign 4;" - "cv.endi 0, endZ;" - "cv.starti 0, startZ;" - "cv.count 0, %[N];" + "cv.starti 0, start0;" + "cv.endi 0, end0;" "any instructions here" ".balign 4;" ".option norvc;" - "startO:;" - " startZ:;" + "start1:;" + " cv.count 0, %[N];" + " start0:;" " addi %[i], %[i], 1;" " addi %[i], %[i], 1;" " addi %[i], %[i], 1;" - " endZ:;" - " cv.count 0, %[N];" + " end0:;" + " addi %[j], %[j], 2;" " addi %[j], %[j], 2;" - "endO:;" + "end1:;" : [i] "+r" (i), [j] "+r" (j) : [N] "r" (10) ); +As HWLoop feature is enabled as soon as lpcountX > 0, lpstartX and lpendX **must** be programmed **before** lpcountX to avoid unexpected behavior. +For HWLoop where body contains up to 30 instructions, it is always better to use cv.setup* instructions which are updating all 3 HWLoop CSRs in the same cycle. At the beginning of the HWLoop, the registers %[i] and %[j] are 0. -The innermost loop, from startZ to (endZ - 4), adds to %[i] three times 1 and -it is executed 10x10 times. Whereas the outermost loop, from startO to (endO - 4), -executes 10 times the innermost loop and adds 2 to the register %[j]. -At the end of the loop, the register %[i] contains 300 and the register %[j] contains 20. +The innermost loop, from start0 to (end0 - 4), adds to %[i] three times 1 and +it is executed 10x10 times. Whereas the outermost loop, from start1 to (end1 - 4), +executes 10 times the innermost loop and adds two times 2 to the register %[j]. +At the end of the loop, the register %[i] contains 300 and the register %[j] contains 40. .. _hwloop-exceptions_handlers: diff --git a/docs/source/instruction_set_extensions.rst b/docs/source/instruction_set_extensions.rst index c7becb7f3..de3dc3ea8 100644 --- a/docs/source/instruction_set_extensions.rst +++ b/docs/source/instruction_set_extensions.rst @@ -788,12 +788,16 @@ General ALU operations | | else if rs1 >=rs2, rD = rs2, | | | | | | else rD = rs1 | + | | | + | | Note: rs2 is unsigned. | +-------------------------------------------+------------------------------------------------------------------------+ | **cv.clipur rD, rs1, rs2** | if rs1 <= 0, rD = 0, | | | | | | else if rs1 >= rs2, rD = rs2, | | | | | | else rD = rs1 | + | | | + | | Note: rs2 is unsigned. | +-------------------------------------------+------------------------------------------------------------------------+ | **cv.addN rD, rs1, rs2, Is3** | rD = (rs1 + rs2) >>> Is3 | | | |