From 973e44e7c0477a3eed924912e9be3de74bf26a56 Mon Sep 17 00:00:00 2001 From: Sylvain Lefebvre Date: Sun, 19 Nov 2023 10:28:04 +0100 Subject: [PATCH] ice-v-swirl, 5 stages --- projects/ice-v/CPUs/ice-v-conveyor.si | 49 +++++++++++--- projects/ice-v/CPUs/ice-v-swirl.si | 70 +++++++++++--------- projects/ice-v/SOCs/ice-v-soc-cmp.si.cpp | 8 +-- projects/ice-v/SOCs/ice-v-soc-swirl-cache.si | 2 +- projects/ice-v/SOCs/ice-v-soc-swirl.si | 2 +- projects/ice-v/src/fire.c | 2 + 6 files changed, 87 insertions(+), 46 deletions(-) diff --git a/projects/ice-v/CPUs/ice-v-conveyor.si b/projects/ice-v/CPUs/ice-v-conveyor.si index 60623739..33ee6120 100644 --- a/projects/ice-v/CPUs/ice-v-conveyor.si +++ b/projects/ice-v/CPUs/ice-v-conveyor.si @@ -4,7 +4,9 @@ // // https://github.com/sylefeb/Silice // MIT license, see LICENSE_MIT in Silice repo root -$$DEBUG = nil +$$DEBUG_conveyor = nil +$$TRACE_conveyor = nil + // -------------------------------------------------- $$if ICEV_FAST_SHIFT then $$print("Ice-V-conveyor configured for fast shift (barrel shifter)") @@ -91,7 +93,11 @@ unit rv32i_cpu_conveyor(bram_port mem,bram_port rom) { // decoder + ALU, executes the instruction and tells processor what to do decode_and_ALU_conveyor exec; // unit is defined after the CPU $$if SIMULATION then - uint32 cycle(0); // cycle counter for simulation + uint32 cycle(0); // cycle counter for simulation + uint32 nretired(0); // number of retired instr. +$$end +$$if TRACE_conveyor then + uint1 trace_on <:: nretired > 3145018; $$end always { @@ -121,7 +127,7 @@ $$end if ( written_regs[Rtype(rom.rdata).rs1,1] | written_regs[Rtype(rom.rdata).rs2,1] | (has_rd & written_regs[Rtype(rom.rdata).rd,1])) { -$$if DEBUG then +$$if DEBUG_conveyor then __display("[1] *** data hazard! ***"); $$end bubble = ~alu_was_working[0,1]; @@ -136,7 +142,7 @@ $$end : pc; // flag register being written written_regs_ins = {31b0,has_rd & ~bubble} << Rtype(rom.rdata).rd; -$$if DEBUG then +$$if DEBUG_conveyor then __display("[1] instr: %x @%x (bubble:%b refetch:%b alu:%b alu_was:%b)",instr,pc<<2,bubble,refetch,exec.working,alu_was_working); $$end // remember ALU was just busy @@ -150,7 +156,7 @@ $$end // ^^^ ^^^ start a bubble on refectch or ALU busy // give instruction, pc and registers to decoder+ALU instr = bubble ? exec.instr : instr; -$$if DEBUG then +$$if DEBUG_conveyor then __display("[2] instr: %x @%x (bubble:%b)",instr,pc<<2,bubble); $$end $$if SIMULATION then @@ -169,7 +175,7 @@ $$end // start a bubble on refectch or ALU busy, resume when ALU stops being busy bubble = (bubble & ~alu_was_working[2,1]) | refetch | exec.working; -$$if DEBUG then +$$if DEBUG_conveyor then __display("[3] instr: %x @%x (bubble:%b)",instr,pc<<2,bubble); $$end // memory address from which to load/store @@ -213,7 +219,7 @@ $$end xregsA.wenable1 = ~no_rd & ~bubble & ~refetch; xregsA.addr1 = rd; xregsA.wdata1 = load ? loaded : write_back; -$$if DEBUG then +$$if DEBUG_conveyor then __display("[4] instr: %x @%x (bubble:%b refetch:%b), loaded:%x, alu_n:%x", instr,pc<<2,bubble,refetch,loaded,alu_n); if (~bubble & ~refetch) { @@ -221,12 +227,30 @@ $$if DEBUG then instr,pc<<2,jump,Rtype(instr).rd,xregsA.wdata1,xregsA.wenable1); } $$end +$$if SIMULATION then + uint1 instr_done = ~bubble & ~refetch & ~reset; +$$end $$if ICEV_VERILATOR_TRACE then // this is used by SOCs/ice-v-cmp, to track retired instr. and compare CPUs - if (~bubble & ~refetch & ~reset) { + if (instr_done) { __verilog("$c32(\"cpu_retires(2,\",%,\",\",%,\",\",%,\",\",%,\");\");", pc<<2,instr,Rtype(instr).rd,xregsA.wdata1); } +$$end +$$if TRACE_conveyor then + if (instr_done) { + nretired = nretired + 1; + } + if (trace_on) { + if (instr_done) { + __write("@%h %h ",pc<<2,instr); + if (xregsA.wenable1) { + __display("x[%d]=%h",Rtype(instr).rd,xregsA.wdata1); + } else { + __display(""); + } + } + } $$end // clear register write bit written_regs_del = {31b0,~bubble & ~no_rd & ~refetch} << rd; @@ -246,7 +270,7 @@ $$end ? 0 // on a refetch, we reset the set of written regs : (written_regs & ~written_regs_del) | written_regs_ins ) & (~32b1); // keep x0 clear -$$if DEBUG then +$$if DEBUG_conveyor then __display("++ %b ",written_regs_ins); __display("-- %b ",written_regs_del); __display("= %b ",written_regs); @@ -258,6 +282,13 @@ $$end xregsB.wenable1 = xregsA.wenable1; xregsB.wdata1 = xregsA.wdata1; xregsB.addr1 = xregsA.addr1; +$$if TRACE_conveyor then + if (trace_on) { + if (|mem.wenable) { + __display("store @%x = %x",mem.addr<<2,mem.wdata); + } + } +$$end $$if SIMULATION then cycle = cycle + 1; $$end diff --git a/projects/ice-v/CPUs/ice-v-swirl.si b/projects/ice-v/CPUs/ice-v-swirl.si index 5822033a..495336f1 100644 --- a/projects/ice-v/CPUs/ice-v-swirl.si +++ b/projects/ice-v/CPUs/ice-v-swirl.si @@ -22,7 +22,7 @@ $include('../../common/divint_std.si') $$end // set to 1 for a copious amount of debug output in simulation -$$DEBUG_swirl = 1 +$$DEBUG_swirl = nil $$TRACE_swirl = nil // -------------------------------------------------- @@ -119,7 +119,7 @@ $$if ICEV_STALL then input uint1 stall_cpu, $$end $$if TRACE_swirl then - input uint1 trace_on, + // input uint1 trace_on, $$end ) { @@ -131,7 +131,7 @@ $$end // stage 1 => stage 2 uint$addrW$ pc(0); uint32 instr(0); - // stage 2 => stage 3 + // stage 2 => stage 4 int32 xb(0); // stage 3 => stage 4 int32 alu_r(0); int32 alu_val(0); @@ -143,6 +143,8 @@ $$end uint1 no_rd_4(0); uint5 rd_4(0); // stage 5 => stage 3 uint1 jumping(0); + // stage 4 => outside + int32 xb_store(0); uint$addrW+2$ alu_n_store(0); // pipeline control signals uint1 hold(0); uint1 bubble(0); uint1 stage3_bubble(0); uint$addrW$ refetch_addr(0); uint1 refetch(0); uint1 stage4_bubble(0); @@ -157,10 +159,11 @@ $$end decode_and_ALU_swirl exec; $$if SIMULATION then - uint32 cycle(0); uint32 reinstr(0); + uint32 cycle(0); uint32 nretired(0); $$end $$if TRACE_swirl then uint32 last_cycle(0); + uint1 trace_on <:: nretired > 3145018; $$end $$if not ICEV_STALL then uint1 stall_cpu(0); // stall disabled, never used @@ -170,7 +173,7 @@ $$end always { $$if DEBUG_swirl then - uint1 debug_on = 1; // {pc,2b00} > 24h01b050 && {pc,2b00} < 24h01b05f; // (cycle > 40333206) && (cycle < 40339206); + uint1 debug_on = nretired > 3145018; $$end // tracks whether a register was written cycle before @@ -253,8 +256,8 @@ $$end | store; // by default we select the register value read after stage 1 // (assuming no data haward) - xa_regR = 1; xa_regW = 0; xa_regW_prev = 0; xa_keep = 0; - xb_regR = 1; xb_regW = 0; xb_regW_prev = 0; xb_keep = 0; + xa_regR = ~hold; xa_regW = 0; xa_regW_prev = 0; xa_keep = hold; + xb_regR = ~hold; xb_regW = 0; xb_regW_prev = 0; xb_keep = hold; // [data hazards] case (c) detection // instruction in stage 3 will (cycle+2) write on a register needed now // instruction in stage 4 will (cycle+1) write on a register needed now @@ -287,11 +290,6 @@ if (debug_on) { } } $$end - if (hold) { - // holding, keep the same values on ALU inputs vvvv - xa_regR = 0; xa_regW = 0; xa_regW_prev = 0; xa_keep = 1; - xb_regR = 0; xb_regW = 0; xb_regW_prev = 0; xb_keep = 1; - } // update bubble bubble = (bubble | refetch | exec.working | hold); $$if DEBUG_swirl then @@ -335,7 +333,7 @@ $$end $$if DEBUG_swirl then if (debug_on) { if (~stall_cpu | on_stall) { - __display("[2] *** data hazard (b) on rs1 *** rs1[%d] rs2[%d] rd(stage4)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,rd_4); + __display("[2] *** data hazard (b) on rs1 *** rs1[%d] rs2[%d] rd(stage5)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,rd_4); } } $$end @@ -346,7 +344,7 @@ $$end $$if DEBUG_swirl then if (debug_on) { if (~stall_cpu | on_stall) { - __display("[2] *** data hazard (b) on rs2 *** rs1[%d] rs2[%d] rd(stage4)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,rd_4); + __display("[2] *** data hazard (b) on rs2 *** rs1[%d] rs2[%d] rd(stage5)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,rd_4); } } $$end @@ -385,8 +383,11 @@ $$end storeVal = exec.storeVal; // track bpred prev_bpred = bpred; + // capture xb set by stage 2 on previous cycle (for store) + xb = exec.xb; } -> { // ==== stage 4 ===================================================== - + // start a bubble on refetch or ALU busy + bubble = ((bubble & ~alu_was_working) | refetch | exec.working); $$if DEBUG_swirl then if (debug_on) { if (~stall_cpu | on_stall) { @@ -394,17 +395,20 @@ if (debug_on) { } } $$end - + // record rd, no_rd for the stage (used in hazard detection) rd_4 = rd_3; no_rd_4 = no_rd_3 | bubble; // ^^^^ disables data hazard in stage 2 on a bubble stage4_bubble ^= bubble; + // value to store + xb_store = xb; + alu_n_store = alu_n; // memory address from which to load/store $$if not ICEV_STALL then - dmem.addr = (alu_n >> 2); + dmem.addr = (alu_n >> 2); $$else - dmem.addr = (store|load) & ~bubble & ~jumping - ? (alu_n >> 2) : dmem.addr; + dmem.addr = (store|load) & ~bubble & ~jumping + ? (alu_n >> 2) : dmem.addr; // ^^ if a cache is used, we preserve dmem.addr when not accessing dmem $$end if (store & ~bubble & ~jumping) { @@ -452,16 +456,15 @@ $$if ICEV_VERILATOR_TRACE then if (instr_done) { __verilog("$c32(\"cpu_retires(3,\",%,\",\",%,\",\",%,\",\",%,\");\");", pc<<2,instr,Rtype(instr).rd,xregsA.wdata1); - reinstr = reinstr + 1; } $$end $$if DEBUG_swirl then if (debug_on) { if (~stall_cpu | on_stall) { - __display("[5] instr: %x @%x (bubble:%b jump:%b bpred:%b load:%b) reinstr:%d",instr,pc<<2,bubble,jump,bpred,load,reinstr); + __display("[5] instr: %x @%x (bubble:%b jump:%b bpred:%b load:%b) nretired:%d",instr,pc<<2,bubble,jump,bpred,load,nretired); if (instr_done) { - __display("[5] ++++ %x (@%x) jump %b, wreg:[%d]=%x (%b) reinstr:%d", - instr,pc<<2,jump,Rtype(instr).rd,xregsA.wdata1,xregsA.wenable1,reinstr); + __display("[5] ++++ %x (@%x) jump %b, wreg:[%d]=%x (%b) nretired:%d", + instr,pc<<2,jump,Rtype(instr).rd,xregsA.wdata1,xregsA.wenable1,nretired); } } if (xregsA.wenable1) { @@ -469,6 +472,11 @@ if (debug_on) { } } $$end +$$if SIMULATION then + if (instr_done) { + nretired = nretired + 1; + } +$$end $$if TRACE_swirl then if (trace_on) { if (instr_done) { @@ -493,6 +501,8 @@ $$if ICEV_STALL then // (on a stall, refetch at the ongoing refetch if one is in progress) uint1 was_refetching = refetching & ~instr_done; refetching = refetch | was_refetching; +$$else + uint1 was_refetching(0); $$end jumping ^= refetch; // warn stage 3 (cancel any store) exec.cancel = refetch; // cancel any pending ALU op on refetch @@ -532,25 +542,23 @@ $$end | xb_regW_prev ? xregsB.wdata1 : 32b0; // what to write on a store dmem.wdata = stall_cpu ? dmem.wdata - : (/*xb_regW ? xregsA.wdata1 :*/ xb) << {exec.n[0,2],3b000}; - // ^^---------\ - // capture xb from stage 2 so that stage 3 assign above sees the correct value + : (xb_store) << {alu_n_store[0,2],3b000}; + // ^^^^^^^^ ^^^^^^^^^^^ + // captured from stage 2 at stage 4 so that store sees the correct value $$if DEBUG_swirl then if (debug_on) { if (dmem.wenable) { - __display("[3] store @%x = %x",dmem.addr<<2,dmem.wdata); + __display("[4] store @%x = %x",dmem.addr<<2,dmem.wdata); } } $$end $$if TRACE_swirl then - //uint32 full = dmem.addr<<2; - if (trace_on /*|| full[8,16] == 16h14cd*/) { + if (trace_on) { if ((|dmem.wenable) & ~stall_cpu) { __display("store @%x = %x",dmem.addr<<2,dmem.wdata); } } $$end - xb = exec.xb; // register bank B follows A writes xregsB.wenable1 = xregsA.wenable1; xregsB.wdata1 = xregsA.wdata1; @@ -559,7 +567,7 @@ $$if DEBUG_swirl then if (debug_on) { if (~stall_cpu) { __display("exec.xa = %x exec.xb = %x mem.wdata = %x",exec.xa,exec.xb,dmem.wdata); - __display("exec.jump = %b exec.n = %x",exec.jump,exec.n); + __display("exec.jump = %b exec.n = %x xb_store = %x",exec.jump,exec.n,xb_store); __display("xa_keep %b xa_regR %b xa_regW %b xa_regW_prev %b",xa_keep,xa_regR,xa_regW,xa_regW_prev); __display("xb_keep %b xb_regR %b xb_regW %b xb_regW_prev %b",xb_keep,xb_regR,xb_regW,xb_regW_prev); __display("imem.addr @%x, dmem.addr @%x\n",imem.addr<<2,dmem.addr<<2); diff --git a/projects/ice-v/SOCs/ice-v-soc-cmp.si.cpp b/projects/ice-v/SOCs/ice-v-soc-cmp.si.cpp index 07895ecf..89d9746d 100644 --- a/projects/ice-v/SOCs/ice-v-soc-cmp.si.cpp +++ b/projects/ice-v/SOCs/ice-v-soc-cmp.si.cpp @@ -61,8 +61,8 @@ void check_and_synch() ++ num_retired_synch; #if 0 // verify coherence - if (retired[0].front() == retired[1].front() - && retired[0].front() == retired[2].front()) { + if ( retired[0].front() == retired[1].front() + && retired[1].front() == retired[2].front()) { for (int i = 0 ; i < 3 ; ++i) { retired[i].pop_front(); } @@ -125,8 +125,8 @@ void cpu_retires(int id,unsigned int pc,unsigned int instr, exit (-1); } t_retired_instr ri; - ri.pc = pc; ri.instr = instr; - ri.rd = rd; ri.val = val; + ri.pc = pc; ri.instr = instr; + ri.rd = rd&31; ri.val = val; retired[id-1].push_back(ri); ++ num_retired[id-1]; check_and_synch(); diff --git a/projects/ice-v/SOCs/ice-v-soc-swirl-cache.si b/projects/ice-v/SOCs/ice-v-soc-swirl-cache.si index 153ea64f..faacdd5b 100644 --- a/projects/ice-v/SOCs/ice-v-soc-swirl-cache.si +++ b/projects/ice-v/SOCs/ice-v-soc-swirl-cache.si @@ -255,7 +255,7 @@ $$end algorithm { $$if SIMULATION and not BARE then //while (1) { } - while (cycle < 250) { } + while (cycle < 1000) { } __display("stopping at cycle %d",cycle); $$else while (1) { } diff --git a/projects/ice-v/SOCs/ice-v-soc-swirl.si b/projects/ice-v/SOCs/ice-v-soc-swirl.si index e0a8d4e7..b8f975da 100644 --- a/projects/ice-v/SOCs/ice-v-soc-swirl.si +++ b/projects/ice-v/SOCs/ice-v-soc-swirl.si @@ -158,7 +158,7 @@ $$if SIMULATION then } $$end $$if OLED then - if (memio.addr[1,1]) { __display("[cycle %d] OLED: byte %x",cycle,displ_byte); } + // if (memio.addr[1,1]) { __display("[cycle %d] OLED: byte %x",cycle,displ_byte); } // command displ_en = (mem.wdata[9,1] | mem.wdata[10,1]) & memio.addr[1,1]; // reset diff --git a/projects/ice-v/src/fire.c b/projects/ice-v/src/fire.c index bf6b440d..d0381a93 100644 --- a/projects/ice-v/src/fire.c +++ b/projects/ice-v/src/fire.c @@ -4,6 +4,8 @@ #include "oled.h" +void f_putchar(int) {} + unsigned char tbl[32*32]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,