Skip to content

Commit

Permalink
Merge pull request #15 from pulp-platform/chi/increase_ce_array
Browse files Browse the repository at this point in the history
Allow larger accelerator dimensions.
  • Loading branch information
yvantor authored Jan 11, 2024
2 parents 473371b + 0f15ce9 commit 4b74428
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 89 deletions.
41 changes: 35 additions & 6 deletions rtl/redmule_ctrl.sv
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS
logic [4:0] w_computed;
logic [15:0] w_rows;
logic [15:0] w_rows_iter, w_row_count_d, w_row_count_q;
logic [15:0] z_storings_d, z_storings_q, tot_stores;
logic [15:0] z_storings_d, z_storings_q, tot_stores, issued_store_d, issued_store_q;

typedef enum logic [2:0] {REDMULE_IDLE, REDMULE_STARTING, REDMULE_COMPUTING, REDMULE_BUFFERING, REDMULE_STORING, REDMULE_FINISHED} redmule_ctrl_state;
redmule_ctrl_state current, next;
Expand Down Expand Up @@ -191,6 +191,18 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS
end
assign accumulate_o = accumulate_q & !accumulate_ctrl_q;

logic finish_d, finish_q;
always_ff @(posedge clk_i or negedge rst_ni) begin : finish_sampler
if(~rst_ni) begin
finish_q <= 1'b0;
end else begin
if (clear)
finish_q <= 1'b0;
else
finish_q <= finish_d;
end
end

always_ff @(posedge clk_i or negedge rst_ni) begin : last_w_row_reg
if(~rst_ni) begin
last_w_row <= 1'b0;
Expand All @@ -202,6 +214,17 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS
end
end

always_ff @(posedge clk_i or negedge rst_ni) begin : issued_store
if(~rst_ni) begin
issued_store_q <= '0;
end else begin
if (clear)
issued_store_q <= '0;
else
issued_store_q <= issued_store_d;
end
end

// This register counts the number of times we exit from the REDMULE_STORING
// state and go to the REDMULE_COMPUTING one. Every time this happens, it
// means that a piece of computation fas done, and we can track the number
Expand Down Expand Up @@ -258,6 +281,8 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS
last_w_row_rst = 1'b0;
w_row_count_d = w_row_count_q;
z_storings_d = z_storings_q;
issued_store_d = issued_store_q;
finish_d = finish_q;
accumulate_en = 1'b0;
accumulate_rst = 1'b0;
storing_rst = 1'b0;
Expand All @@ -276,6 +301,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS
z_buffer_clk_en = 1'b1;
if ( (slave_start & tiler_valid) || test_mode_i) begin
tiler_setback = 1'b1;
finish_d = 1'b0;
next = REDMULE_STARTING;
end
else
Expand Down Expand Up @@ -307,20 +333,23 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS

case (last_w_row)
1'b0: begin
if (w_computed == Height - 1) begin
if (!accumulate_q)
if (w_computed == NumPipeRegs) begin
if (!accumulate_q && !finish_q)
accumulate_en = 1'b1;
if (count_w_q)
w_computed_rst = 1'b1;
end
end

1'b1: begin
if (w_computed == Height - 2 && reg_enable_i) begin
if (w_computed == NumPipeRegs - 1 && reg_enable_i) begin
w_row_count_d = 16'd1;
issued_store_d = issued_store_q + 'd1;
next = REDMULE_BUFFERING;
if (accumulate_q)
if (accumulate_q) begin
accumulate_rst = 1'b1;
finish_d = (issued_store_q == tot_stores - 1) ? 1'b1 : 1'b0;
end
if (count_w_q)
w_computed_rst = 1'b1;
end else
Expand All @@ -337,7 +366,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS
w_row_count_d = w_row_count_q + 1;
z_fill_o = reg_enable_i;
if (flgs_z_buffer_i.full) begin
accumulate_en = 1'b1;
accumulate_en = finish_q ? 1'b0 : 1'b1;
next = REDMULE_STORING;
end
else
Expand Down
7 changes: 4 additions & 3 deletions rtl/redmule_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*
* RedMulE Package
*/

`include "hci/typedef.svh"
`include "hci/assign.svh"
`include "hwpe-ctrl/typedef.svh"
Expand All @@ -28,7 +29,7 @@ import hwpe_stream_package::*;

package redmule_pkg;

parameter int unsigned DATA_W = 288; // TCDM port dimension (in bits)
parameter int unsigned DATA_W = 544; // TCDM port dimension (in bits)
parameter int unsigned MemDw = 32;
parameter int unsigned NumByte = MemDw/8;
parameter int unsigned ADDR_W = hci_package::DEFAULT_AW;
Expand All @@ -37,9 +38,9 @@ package redmule_pkg;
parameter int unsigned N_CONTEXT = 2;
parameter fpnew_pkg::fp_format_e FPFORMAT = fpnew_pkg::FP16;
parameter int unsigned BITW = fpnew_pkg::fp_width(FPFORMAT);
parameter int unsigned ARRAY_HEIGHT = 4;
parameter int unsigned ARRAY_HEIGHT = 8;
parameter int unsigned PIPE_REGS = 3;
parameter int unsigned ARRAY_WIDTH = 12; /* Superior limit is ARRAY_HEIGHT*PIPE_REGS */
parameter int unsigned ARRAY_WIDTH = ARRAY_HEIGHT*PIPE_REGS; // Superior limit, smaller values are allowed.
parameter int unsigned TOT_DEPTH = DATAW/BITW;
parameter int unsigned DEPTH = TOT_DEPTH/ARRAY_HEIGHT;
parameter int unsigned STRB = DATA_W/8;
Expand Down
25 changes: 20 additions & 5 deletions rtl/redmule_scheduler.sv
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,18 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : tot_stores_counter
end
end

logic [15:0] consumed_y_d, consumed_y_q;
always_ff @(posedge clk_i or negedge rst_ni) begin : y_consumed_counter
if(~rst_ni) begin
consumed_y_q <= '0;
end else begin
if (clear_i || clear_regs)
consumed_y_q <= '0;
else
consumed_y_q <= consumed_y_d;
end
end

always_comb begin
if (store_rows_lftovr_q == '0 || tot_z_stored_q < store_rows_lftovr_q) begin
if (store_cols_lftovr_q) begin
Expand Down Expand Up @@ -989,6 +1001,7 @@ transfer_count_d = transfer_count_q;
gate_count_d = gate_count_q;
store_count_d = store_count_q;
tot_store_d = tot_store_q;
consumed_y_d = consumed_y_q;
y_rows_iter_d = y_rows_iter_q;
y_cols_iter_d = y_cols_iter_q;

Expand Down Expand Up @@ -1116,6 +1129,7 @@ clear_regs = 1'b0;
if (reg_file_i.hwpe_params[OP_SELECTION][0]) begin
y_push_rst = (flgs_z_buffer_i.y_pushed) ? 1'b1 : 1'b0;
consume_y_rst = (flgs_z_buffer_i.y_pushed && consume_y_q) ? 1'b1 : 1'b0;
consumed_y_d = consume_y_rst ? consumed_y_q + 'd1 : consumed_y_q;
if (!accumulate_i && consume_y_q && !skip_w_q) begin
flgs_scheduler_o.y_push_enable = 1'b1;
z_buffer_clk_en = 1'b1;
Expand Down Expand Up @@ -1183,7 +1197,8 @@ clear_regs = 1'b0;
if (w_valid_i == 1'b1 && w_strb_i == '1) begin
w_loaded = 1'b1;
count_w_cycles_en = (!count_w_cycles_q & x_preloaded_q) ? 1'b1 : 1'b0;
if (reg_file_i.hwpe_params[OP_SELECTION][0]) begin
if (reg_file_i.hwpe_params[OP_SELECTION][0] &&
consumed_y_q < reg_file_i.hwpe_params[LEFT_PARAMS][31:16]) begin
y_push_en = (!y_push_q) ? 1'b1 : 1'b0;
consume_y_en = (!consume_y_q) ? 1'b1 : 1'b0;
end
Expand All @@ -1210,9 +1225,9 @@ clear_regs = 1'b0;
if ((reg_file_i.hwpe_params[LEFTOVERS][7:0] != '0) && (y_cols_lftovr_q == '0))
y_cols_lftovr_en = 1'b1;
end
end else if (d_shift_d == H && !flgs_streamer_i.x_stream_source_flags.ready_start) begin
end else if ((d_shift_d == NumPipeRegs + 'd1) && !flgs_streamer_i.x_stream_source_flags.ready_start) begin
load_x_en = 1'b1;
d_shift_d = '0;
d_shift_d = '0;
next = LOAD_X;
if (x_cols_iter_q == reg_file_i.hwpe_params[X_ITERS][15:0]) begin
if ( (reg_file_i.hwpe_params[LEFTOVERS][23:16] != '0) && (x_cols_lftovr_q == '0) )
Expand Down Expand Up @@ -1245,7 +1260,7 @@ clear_regs = 1'b0;
x_buffer_clk_en = 1'b1;
if (reg_file_i.hwpe_params[OP_SELECTION][0]) begin
if (!accumulate_i & !skip_w_q) begin
flgs_scheduler_o.y_push_enable = 1'b1;
flgs_scheduler_o.y_push_enable = consume_y_q ? 1'b1 : 1'b0;
z_buffer_clk_en = 1'b1;
end
consume_y_rst = (flgs_z_buffer_i.y_pushed && consume_y_q) ? 1'b1 : 1'b0;
Expand Down Expand Up @@ -1340,7 +1355,7 @@ clear_regs = 1'b0;
gate_count_d = '0;

if (!accumulate_i && reg_file_i.hwpe_params[OP_SELECTION][0] && !skip_w_q) begin
flgs_scheduler_o.y_push_enable = 1'b1;
flgs_scheduler_o.y_push_enable = consume_y_q ? 1'b1 : 1'b0;
z_buffer_clk_en = 1'b1;
end

Expand Down
4 changes: 2 additions & 2 deletions rtl/redmule_z_buffer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ always_ff @(posedge buffer_clock or negedge rst_ni) begin : z_buffer
z_buffer_q <= '0;
else if (ctrl_i.fill || ctrl_i.y_push_enable) begin
if (reg_enable_i) begin
for (int d = 0; d < D; d++) begin
for (int d = 0; d < D; d++) begin
for (int w = 0; w < W; w++)
z_buffer_q[d][w] <= (d == 0) ? z_buffer_i[w] : z_buffer_q[d-1][w];
end
end else
end else
z_buffer_q <= z_buffer_q;
end else if (ctrl_i.store && ctrl_i.ready) begin
for (int w = 0; w < W; w++) begin
Expand Down
6 changes: 3 additions & 3 deletions sw/archi_redmule.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@

// RedMulE architecture
#define ADDR_WIDTH 32
#define DATA_WIDTH 256
#define DATA_WIDTH 512
#define REDMULE_FMT 16
#define ARRAY_HEIGHT 4
#define ARRAY_HEIGHT 8
#define PIPE_REGS 3
#define ARRAY_WIDTH 12 /* Superior limit is ARRAY_HEIGHT*PIPE_REGS */
#define ARRAY_WIDTH 24 /* Superior limit is ARRAY_HEIGHT*PIPE_REGS */

// Base address
#define REDMULE_BASE_ADD 0x00100000
Expand Down
3 changes: 2 additions & 1 deletion tb/redmule_complex_tb.sv
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@ timeunit 1ps;
timeprecision 1ps;

module redmule_complex_tb;
import redmule_pkg::*;

// parameters
parameter int unsigned PROB_STALL = 0;
parameter int unsigned NC = 1;
parameter int unsigned ID = 10;
parameter int unsigned DW = 288;
parameter int unsigned DW = redmule_pkg::DATA_W;
parameter int unsigned MP = DW/32;
parameter int unsigned MEMORY_SIZE = 192*1024;
parameter int unsigned STACK_MEMORY_SIZE = 192*1024;
Expand Down
3 changes: 2 additions & 1 deletion tb/redmule_tb.sv
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ timeunit 1ps;
timeprecision 1ps;

module redmule_tb;
import redmule_pkg::*;

// parameters
parameter int unsigned PROB_STALL = 0;
parameter int unsigned NC = 1;
parameter int unsigned ID = 10;
parameter int unsigned DW = 288;
parameter int unsigned DW = redmule_pkg::DATA_W;
parameter int unsigned MP = DW/32;
parameter int unsigned MEMORY_SIZE = 192*1024;
parameter int unsigned STACK_MEMORY_SIZE = 192*1024;
Expand Down
Loading

0 comments on commit 4b74428

Please sign in to comment.