Skip to content

Commit

Permalink
Add self-invalidation coherence
Browse files Browse the repository at this point in the history
added files required for compilation

Added target to test litmus tests

Per byte dirty bit added to std dcache and tested

basic support for dual core instantiation

Some automation added to the multi-core testing process

minor changes

temporary ci modifications for working without sudo permissions

branch prova

prova modified

Multi core instantiation made generic

Transition between WAIT_CRITICAL_WORD and WAIT_TAG removed if there is a flush - feature tested

Changed repo with master branch and added masks for reservation at cacheline granularity because burst not supported

Fix the never return problem for non boot cores and dt modified for 2 cores

Added master branch of common_cells and compilation of new file in Makefile

Added transition between FLUSHING and FLUSHING to avoid multiple flushs during atomics

Unused code removed and code commented

Added support to use the master branch of the axi_riscv_atomics repository

Added support for multiple ariane instances for fpga synthesis

Increased stack for big applications and reduced number of harts

Pheripherals configured to use multiple cores

List of issues not solved encountered during the master thesis

Co-authored-by: msc22h2 <[email protected]>
Signed-off-by: Nils Wistoff <[email protected]>
  • Loading branch information
niwis and Michelangelo98 committed Jan 26, 2024
1 parent 9c3a1ac commit 4c48ade
Show file tree
Hide file tree
Showing 20 changed files with 359 additions and 94 deletions.
16 changes: 8 additions & 8 deletions core/cache_subsystem/cache_ctrl.sv
Original file line number Diff line number Diff line change
Expand Up @@ -298,18 +298,18 @@ module cache_ctrl
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
if (!mshr_index_matches_i) begin
// store data, write dirty bit
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;

be_o.vldrty = hit_way_q;
be_o.vldrty = hit_way_q;

// set the correct byte enable
be_o.data[cl_offset>>3+:8] = mem_req_q.be;
data_o.data[cl_offset+:64] = mem_req_q.wdata;
be_o.data[cl_offset>>3+:8] = mem_req_q.be;
data_o.data[cl_offset+:64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
data_o.dirty[cl_offset>>3+:8] = mem_req_q.be;
data_o.valid = 1'b1;

// got a grant ~> this is finished now
if (gnt_i) begin
Expand Down
11 changes: 6 additions & 5 deletions core/cache_subsystem/miss_handler.sv
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ module miss_handler
automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way;

for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin
evict_way[i] = data_i[i].valid & data_i[i].dirty;
evict_way[i] = data_i[i].valid & (|data_i[i].dirty);
valid_way[i] = data_i[i].valid;
end
// ----------------------
Expand Down Expand Up @@ -264,10 +264,11 @@ module miss_handler
lfsr_enable = 1'b1;
evict_way_d = lfsr_oh;
// do we need to write back the cache line?
if (data_i[lfsr_bin].dirty) begin
if (|data_i[lfsr_bin].dirty) begin
state_d = WB_CACHELINE_MISS;
evict_cl_d.tag = data_i[lfsr_bin].tag;
evict_cl_d.data = data_i[lfsr_bin].data;
evict_cl_d.dirty = data_i[lfsr_bin].dirty;
cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
// no - we can request a cache line now
end else state_d = REQ_CACHELINE;
Expand Down Expand Up @@ -306,7 +307,7 @@ module miss_handler
data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
data_o.data = data_miss_fsm;
data_o.valid = 1'b1;
data_o.dirty = 1'b0;
data_o.dirty = '0;

// is this a write?
if (mshr_q.we) begin
Expand All @@ -316,7 +317,7 @@ module miss_handler
if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i];
end
// its immediately dirty if we write
data_o.dirty = 1'b1;
data_o.dirty[cl_offset>>3+:8] = mshr_q.be;
end
// reset MSHR
mshr_d.valid = 1'b0;
Expand All @@ -337,7 +338,7 @@ module miss_handler
cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET],
{{DCACHE_BYTE_OFFSET} {1'b0}}
};
req_fsm_miss_be = '1;
req_fsm_miss_be = evict_cl_q.dirty;
req_fsm_miss_we = 1'b1;
req_fsm_miss_wdata = evict_cl_q.data;

Expand Down
82 changes: 46 additions & 36 deletions core/cache_subsystem/std_nbdcache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -55,45 +55,46 @@ module std_nbdcache
// 3. Load Unit
// 4. Accelerator
// 5. Store unit
logic [ NumPorts:0][ DCACHE_SET_ASSOC-1:0] req;
logic [ NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr;
logic [ NumPorts:0] gnt;
cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata;
logic [ NumPorts:0][ DCACHE_TAG_WIDTH-1:0] tag;
logic [NumPorts:0][DCACHE_SET_ASSOC-1:0] req;
logic [NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr;
logic [NumPorts:0] gnt;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata;
logic [NumPorts:0][DCACHE_TAG_WIDTH-1:0] tag;

cache_line_t [ NumPorts:0] wdata;
logic [ NumPorts:0] we;
cl_be_t [ NumPorts:0] be;
logic [ DCACHE_SET_ASSOC-1:0] hit_way;
cache_line_t [NumPorts:0] wdata;
logic [NumPorts:0] we;
cl_be_t [NumPorts:0] be;
logic [DCACHE_SET_ASSOC-1:0] hit_way;
// -------------------------------
// Controller <-> Miss unit
// -------------------------------
logic [ NumPorts-1:0] busy;
logic [ NumPorts-1:0][ 55:0] mshr_addr;
logic [ NumPorts-1:0] mshr_addr_matches;
logic [ NumPorts-1:0] mshr_index_matches;
logic [ 63:0] critical_word;
logic critical_word_valid;
logic [NumPorts-1:0] busy;
logic [NumPorts-1:0][55:0] mshr_addr;
logic [NumPorts-1:0] mshr_addr_matches;
logic [NumPorts-1:0] mshr_index_matches;
logic [63:0] critical_word;
logic critical_word_valid;

logic [ NumPorts-1:0][ $bits(miss_req_t)-1:0] miss_req;
logic [ NumPorts-1:0] miss_gnt;
logic [ NumPorts-1:0] active_serving;
logic [NumPorts-1:0][$bits(miss_req_t)-1:0] miss_req;
logic [NumPorts-1:0] miss_gnt;
logic [NumPorts-1:0] active_serving;

logic [ NumPorts-1:0] bypass_gnt;
logic [ NumPorts-1:0] bypass_valid;
logic [ NumPorts-1:0][ 63:0] bypass_data;
logic [NumPorts-1:0] bypass_gnt;
logic [NumPorts-1:0] bypass_valid;
logic [NumPorts-1:0][63:0] bypass_data;
// -------------------------------
// Arbiter <-> Datram,
// -------------------------------
logic [ DCACHE_SET_ASSOC-1:0] req_ram;
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
logic we_ram;
cache_line_t wdata_ram;
cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
logic [DCACHE_SET_ASSOC-1:0] req_ram;
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
logic we_ram;
cache_line_t wdata_ram;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
logic [(DCACHE_LINE_WIDTH/8+1)*DCACHE_SET_ASSOC-1:0] be_valid_dirty_ram;

// Busy signals
logic miss_handler_busy;
logic miss_handler_busy;
assign busy_o = |busy | miss_handler_busy;

// ------------------
Expand Down Expand Up @@ -223,19 +224,28 @@ module std_nbdcache

// align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
// note: if you have an SRAM that supports flat bit enables for your target technology,
// you can use it here to save the extra 4x overhead introduced by this workaround.
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
// you can use it here to save the extra 17x overhead introduced by this workaround.
logic [(DCACHE_LINE_WIDTH+8)*DCACHE_SET_ASSOC-1:0] dirty_wdata, dirty_rdata;

for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[8*i] = wdata_ram.dirty;
assign dirty_wdata[8*i+1] = wdata_ram.valid;
assign rdata_ram[i].dirty = dirty_rdata[8*i];
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
for (genvar j = 0; j < DCACHE_LINE_WIDTH / 8; j++) begin
// dirty bits assignment
assign dirty_wdata[(DCACHE_LINE_WIDTH+8)*i+8*j] = wdata_ram.dirty[j];
assign rdata_ram[i].dirty[j] = dirty_rdata[(DCACHE_LINE_WIDTH+8)*i+8*j];
end
// valid bit assignment
assign dirty_wdata[DCACHE_LINE_WIDTH+(DCACHE_LINE_WIDTH+8)*i] = wdata_ram.valid;
assign rdata_ram[i].valid = dirty_rdata[DCACHE_LINE_WIDTH+(DCACHE_LINE_WIDTH+8)*i];
end

// be construction for valid_dirty_sram
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign be_valid_dirty_ram[i*(DCACHE_LINE_WIDTH/8+1)+:(DCACHE_LINE_WIDTH/8+1)] = {be_ram.vldrty[i], be_ram.data} & {(DCACHE_LINE_WIDTH/8+1){be_ram.vldrty[i]}};
end

sram #(
.USER_WIDTH(1),
.DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH),
.DATA_WIDTH((DCACHE_LINE_WIDTH + 8) * DCACHE_SET_ASSOC),
.NUM_WORDS (DCACHE_NUM_WORDS)
) valid_dirty_sram (
.clk_i (clk_i),
Expand All @@ -245,7 +255,7 @@ module std_nbdcache
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(dirty_wdata),
.be_i (be_ram.vldrty),
.be_i (be_valid_dirty_ram),
.ruser_o(),
.rdata_o(dirty_rdata)
);
Expand Down
66 changes: 39 additions & 27 deletions core/fpu_wrap.sv
Original file line number Diff line number Diff line change
Expand Up @@ -62,28 +62,40 @@ module fpu_wrap
Width: unsigned'(riscv::XLEN), // parameterized using XLEN
EnableVectors: CVA6Cfg.XFVec,
EnableNanBox: 1'b1,
FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT, CVA6Cfg.XF8ALT},
IntFmtMask: {CVA6Cfg.XFVec && CVA6Cfg.XF8, CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT), 1'b1, 1'b1}
FpFmtMask: {
CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT, CVA6Cfg.XF8ALT
},
IntFmtMask: {
CVA6Cfg.XFVec && CVA6Cfg.XF8,
CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT),
1'b1,
1'b1
}
};

// Implementation (number of registers etc)
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs: '{// FP32, FP64, FP16, FP8, FP16alt, FP8alt
'{unsigned'(LAT_COMP_FP32 ),
unsigned'(LAT_COMP_FP64 ),
unsigned'(LAT_COMP_FP16 ),
unsigned'(LAT_COMP_FP8 ),
unsigned'(LAT_COMP_FP16ALT),
unsigned'(LAT_COMP_FP8ALT)}, // ADDMUL
PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt, FP8alt
'{
unsigned'(LAT_COMP_FP32),
unsigned'(LAT_COMP_FP64),
unsigned'(LAT_COMP_FP16),
unsigned'(LAT_COMP_FP8),
unsigned'(LAT_COMP_FP16ALT),
unsigned'(LAT_COMP_FP8ALT)
}, // ADDMUL
'{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT
'{default: unsigned'(LAT_NONCOMP)}, // NONCOMP
'{default: unsigned'(LAT_CONV)}, // CONV
'{default: unsigned'(LAT_SDOTP)}}, // DOTP
UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: unsigned'(LAT_CONV)}, // CONV
'{default: unsigned'(LAT_SDOTP)}
}, // DOTP
UnitTypes: '{
'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}, // CONV
'{default: fpnew_pkg::DISABLED}}, // DOTP
'{default: fpnew_pkg::MERGED}, // CONV
'{default: fpnew_pkg::DISABLED}
}, // DOTP
PipeConfig: fpnew_pkg::DISTRIBUTED
};

Expand Down Expand Up @@ -526,19 +538,19 @@ module fpu_wrap
) i_fpnew_bulk (
.clk_i,
.rst_ni,
.hart_id_i ( '0 ),
.operands_i ( fpu_operands ),
.rnd_mode_i ( fpnew_pkg::roundmode_e'(fpu_rm) ),
.op_i ( fpnew_pkg::operation_e'(fpu_op) ),
.op_mod_i ( fpu_op_mod ),
.src_fmt_i ( fpnew_pkg::fp_format_e'(fpu_srcfmt) ),
.dst_fmt_i ( fpnew_pkg::fp_format_e'(fpu_dstfmt) ),
.int_fmt_i ( fpnew_pkg::int_format_e'(fpu_ifmt) ),
.vectorial_op_i ( fpu_vec_op ),
.tag_i ( fpu_tag ),
.simd_mask_i ( '1 ),
.in_valid_i ( fpu_in_valid ),
.in_ready_o ( fpu_in_ready ),
.hart_id_i ('0),
.operands_i (fpu_operands),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rm)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_srcfmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dstfmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_ifmt)),
.vectorial_op_i(fpu_vec_op),
.tag_i (fpu_tag),
.simd_mask_i ('1),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i,
.result_o,
.status_o (fpu_status),
Expand Down
4 changes: 2 additions & 2 deletions core/include/ariane_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ package ariane_pkg;
localparam int unsigned LAT_COMP_FP16 = 'd1;
localparam int unsigned LAT_COMP_FP16ALT = 'd1;
localparam int unsigned LAT_COMP_FP8 = 'd1;
localparam int unsigned LAT_COMP_FP8ALT = 'd1;
localparam int unsigned LAT_COMP_FP8ALT = 'd1;
localparam int unsigned LAT_DIVSQRT = 'd2;
localparam int unsigned LAT_NONCOMP = 'd1;
localparam int unsigned LAT_CONV = 'd2;
localparam int unsigned LAT_SDOTP = 'd2;
localparam int unsigned LAT_SDOTP = 'd2;

localparam riscv::xlen_t OPENHWGROUP_MVENDORID = {{riscv::XLEN - 32{1'b0}}, 32'h0602};
localparam riscv::xlen_t ARIANE_MARCHID = {{riscv::XLEN - 32{1'b0}}, 32'd3};
Expand Down
2 changes: 1 addition & 1 deletion core/include/cv32a60x_config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ package cva6_config_pkg;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigFVecEn = 0;

localparam CVA6ConfigCvxifEn = 1;
Expand Down
2 changes: 1 addition & 1 deletion core/include/cv32a6_embedded_config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ package cva6_config_pkg;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigFVecEn = 0;

localparam CVA6ConfigCvxifEn = 1;
Expand Down
2 changes: 1 addition & 1 deletion core/include/cv32a6_ima_sv32_fpga_config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ package cva6_config_pkg;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigFVecEn = 0;

localparam CVA6ConfigCvxifEn = 0;
Expand Down
2 changes: 1 addition & 1 deletion core/include/cv32a6_imac_sv0_config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ package cva6_config_pkg;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigFVecEn = 0;

localparam CVA6ConfigCvxifEn = 0;
Expand Down
2 changes: 1 addition & 1 deletion core/include/cv32a6_imac_sv32_config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ package cva6_config_pkg;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigFVecEn = 0;

localparam CVA6ConfigCvxifEn = 0;
Expand Down
2 changes: 1 addition & 1 deletion core/include/cv32a6_imafc_sv32_config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ package cva6_config_pkg;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigFVecEn = 0;

localparam CVA6ConfigCvxifEn = 0;
Expand Down
2 changes: 1 addition & 1 deletion core/include/cv64a6_imafdc_sv39_config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ package cva6_config_pkg;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigFVecEn = 0;

localparam CVA6ConfigCvxifEn = 1;
Expand Down
2 changes: 1 addition & 1 deletion core/include/cv64a6_imafdc_sv39_openpiton_config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ package cva6_config_pkg;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigFVecEn = 0;

localparam CVA6ConfigCvxifEn = 0;
Expand Down
2 changes: 1 addition & 1 deletion core/include/cv64a6_imafdcsclic_sv39_config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ package cva6_config_pkg;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigF8AltEn = 0;
localparam CVA6ConfigFVecEn = 0;

localparam CVA6ConfigCvxifEn = 0;
Expand Down
2 changes: 1 addition & 1 deletion core/include/std_cache_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ package std_cache_pkg;
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic dirty; // state array
logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] dirty; // state array
} cache_line_t;

// cache line byte enable
Expand Down
2 changes: 1 addition & 1 deletion corev_apu/tb/common/tb_dcache_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ package tb_pkg;
parameter ERROR_CNT_STOP_LEVEL = 1; // use 1 for debugging. 0 runs the complete simulation...

// tb_readport sequences
typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ } seq_t;
typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ, HALF_SEQ } seq_t;

typedef enum logic [1:0] { OTHER, BYPASS, CACHED } port_type_t;

Expand Down
Loading

0 comments on commit 4c48ade

Please sign in to comment.