Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenPiton Transducer in WARP-V #50

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions formal/warp-v_formal.tlv
Original file line number Diff line number Diff line change
@@ -3,12 +3,67 @@ m4+definitions
m4_define(['M4_ISA'], ['RISCV'])
// Configure for formal verification.
m4_define(['M4_FORMAL'], 1)
m4_define(['M4_OPENPITON'], 0)
m4_define(['M4_RISCV_FORMAL_ALTOPS'], 1)
m4_define(['M4_VIZ'], 0)
m4_define(['M4_STANDARD_CONFIG'], ['1-stage'])
\SV
// Include WARP-V.
m4_include_lib(['./warp-v.tlv'])
module dmem_ext #(parameter SIZE = 1024, ADDR_WIDTH = 10, COL_WIDTH = 8, NB_COL = 4) (
input clk,
input mem_valid,
input mem_instr,
//output mem_ready,
input mem_ready,
input [NB_COL*COL_WIDTH-1:0] mem_addr,
input [NB_COL*COL_WIDTH-1:0] mem_wdata,
input [NB_COL-1:0] mem_wstrb,
output [NB_COL*COL_WIDTH-1:0] mem_rdata
);
//
//assign mem_ready = 1'b1;
reg [31:0] counter;
always @(posedge clk) begin
//if(reset)
// counter <= 0;
// else
counter <= counter + 1'b1;
end
/* verilator lint_off WIDTH */
//assign mem_ready = (counter % 2 == 0);
/* verilator lint_on WIDTH */
reg [NB_COL*COL_WIDTH-1:0] outputreg;
reg [NB_COL*COL_WIDTH-1:0] RAM [SIZE-1:0];
//
always @(posedge clk) begin
if(mem_ready) begin //checking wstrb might be optional here
outputreg <= RAM[mem_addr];
end
end
//
assign mem_rdata = outputreg;
//
wire valid_write_locn;
assign valid_write_locn = (mem_wstrb == 4'b1111) ||
(mem_wstrb == 4'b1100) ||
(mem_wstrb == 4'b0011) ||
(mem_wstrb == 4'b1000) ||
(mem_wstrb == 4'b0100) ||
(mem_wstrb == 4'b0010) ||
(mem_wstrb == 4'b0001) ;
//
generate
genvar i;
for (i = 0; i < NB_COL; i = i+1) begin
always @(posedge clk) begin
if (mem_valid && mem_wstrb[i] && valid_write_locn)
RAM[mem_addr][(i+1)*COL_WIDTH-1:i*COL_WIDTH] <= mem_wdata[(i+1)*COL_WIDTH-1:i*COL_WIDTH];
end
end
endgenerate
endmodule
m4+module_def
\TLV
m4+warpv()
2 changes: 1 addition & 1 deletion impl/Makefile
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@ out/%/status: warp-v_%.tlv ../warp-v.tlv
&& mv out/* . && rmdir out \
&& (exit `cat status`) \
&& sed -i 's/wire\( \[[^\[]\+\] L1_Mem_Value_\)/reg\1/' $(patsubst warp-v_%.tlv,warp-v_%.sv,$<) \
&& sed -i 's/wire\( \[[^\[]\+\] FETCH_Instr_Regs_vae_\)/reg\1/' $(patsubst %.tlv,%_gen.sv,$<)
&& sed -i 's/wire\( \[[^\[]\+\] FETCH_Instr_Regs_value_\)/reg\1/' $(patsubst %.tlv,%_gen.sv,$<)
@# Above sed HACKS fix bugs in the generated code.

impl:
95 changes: 95 additions & 0 deletions openpiton/byte_en.tlv
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
\m4_TLV_version 1d: tl-x.org
\SV
// m4_define_vector(['M4_INSTR'], 32) // ISA dependent instr width
// m4_define_vector(['M4_ADDR'], 32) // ISA dependent address width
m4_define_hier(['M4_DATA_MEM_WORDS'], 32) // number of dmem locations
m4_define(['M4_BITS_PER_ADDR'], 8) // 8 for byte addressing.
m4_define_vector(['M4_WORD'], 32) // machine width (RV32/64)
m4_define(['M4_ADDRS_PER_WORD'], m4_eval(M4_WORD_CNT / M4_BITS_PER_ADDR))
m4_define(['M4_SUB_WORD_BITS'], m4_width(m4_eval(M4_ADDRS_PER_WORD - 1)))
// m4_define_hier(M4_DATA_MEM_ADDRS, m4_eval(M4_DATA_MEM_WORDS_HIGH * M4_ADDRS_PER_WORD)) // Addressable data memory locations,
// can be useful in future

module dmem_ext #(parameter SIZE = 1024, ADDR_WIDTH = 10, COL_WIDTH = 8, NB_COL = 4) (
input clk,
input [NB_COL-1:0] we, // for enabling individual column accessible (for writes)
input [ADDR_WIDTH-1:0] addr,
input [NB_COL*COL_WIDTH-1:0] din,
output [NB_COL*COL_WIDTH-1:0] dout
);

reg [NB_COL*COL_WIDTH-1:0] outputreg;
reg [NB_COL*COL_WIDTH-1:0] RAM [SIZE-1:0];

always @(posedge clk) begin
outputreg <= RAM[addr];
end

assign dout = outputreg;

generate
genvar i;
for (i = 0; i < NB_COL; i = i+1) begin
always @(posedge clk) begin
if (we[i])
RAM[addr][(i+1)*COL_WIDTH-1:i*COL_WIDTH] <= din[(i+1)*COL_WIDTH-1:i*COL_WIDTH];
end
end
endgenerate

endmodule

m4_makerchip_module // (Expanded in Nav-TLV pane.)

\TLV fake_dmem_sv(/_top, /_scope, $_clk, $_addr, $_write, $_din, $_dout)
/_scope
\SV_plus
dmem_ext #(
.SIZE(M4_DATA_MEM_WORDS_HIGH),
.ADDR_WIDTH(M4_DATA_MEM_WORDS_INDEX_HIGH),
.COL_WIDTH(M4_WORD_HIGH / M4_ADDRS_PER_WORD),
.NB_COL(M4_ADDRS_PER_WORD)
)
dmem_ext (
.clk (/_top$_clk),
.addr (/_top$_addr[M4_DATA_MEM_WORDS_INDEX_MAX + M4_SUB_WORD_BITS : M4_SUB_WORD_BITS]),
.we (/_top$_write),
.din (/_top$_din),
.dout (/_top$['']$_dout[31:0])
);

\TLV
|mem
@0
$clk = *clk;
$addr[6:0] = (!(*reset) && *cyc_cnt < 3) ? 6'h0 :
(*cyc_cnt < 4) ? 6'h1 :
(*cyc_cnt < 5) ? 6'h2 :
(*cyc_cnt < 6) ? 6'h3 :
(*cyc_cnt < 7) ? 6'h10 :
(*cyc_cnt < 9) ? 6'h0 :
(*cyc_cnt < 10) ? 6'h4 :
(*cyc_cnt < 11) ? 6'h8 :
(*cyc_cnt < 12) ? 6'hc :
6'hXX ;

$write[3:0] = (!(*reset) && *cyc_cnt <3) ? 4'b0001 :
(*cyc_cnt <4) ? 4'b0010 :
(*cyc_cnt <5) ? 4'b0100 :
(*cyc_cnt <6) ? 4'b1000 :
(*cyc_cnt <7) ? 4'b1111 :
4'b0;

$din[31:0] = (!(*reset) && *cyc_cnt < 3) ? 32'hCA :
(*cyc_cnt < 4) ? 32'hBA << 8 :
(*cyc_cnt < 5) ? 32'hDE << 16 :
(*cyc_cnt < 6) ? 32'h55 << 24:
32'h1234 ;

m4+fake_dmem_sv(|mem, /memscope, $clk, $addr, $write, $din, $dout)
`BOGUS_USE($dout)
// Assert these to end simulation (before Makerchip cycle limit).
*passed = *cyc_cnt > 40;
*failed = 1'b0;
\SV
endmodule
29 changes: 29 additions & 0 deletions openpiton/dmem_ext.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
module dmem_ext #(parameter SIZE = 1024, ADDR_WIDTH = 10, COL_WIDTH = 8, NB_COL = 4) (
input clk, valid_st, spec_ld,
input [NB_COL-1:0] we, // for enabling individual column accessible (for writes)
input [ADDR_WIDTH-1:0] addr,
input [NB_COL*COL_WIDTH-1:0] din,
output [NB_COL*COL_WIDTH-1:0] dout
);

reg [NB_COL*COL_WIDTH-1:0] outputreg;
reg [NB_COL*COL_WIDTH-1:0] RAM [SIZE-1:0];

always @(posedge clk) begin
if(spec_ld) begin
outputreg <= RAM[addr];
end
end

assign dout = outputreg;

generate
genvar i;
for (i = 0; i < NB_COL; i = i+1) begin
always @(posedge clk) begin
if (valid_st && we[i])
RAM[addr][(i+1)*COL_WIDTH-1:i*COL_WIDTH] <= din[(i+1)*COL_WIDTH-1:i*COL_WIDTH];
end
end
endgenerate
endmodule
85 changes: 85 additions & 0 deletions openpiton/fake_dmem_sv.tlv
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
\m4_TLV_version 1d: tl-x.org
\SV
m4_define_hier(['M4_DATA_MEM_WORDS'], 32)
m4_define_vector(['M4_INSTR'], 32)
m4_define_vector(['M4_ADDR'], 32)
m4_define(['M4_BITS_PER_ADDR'], 8) // 8 for byte addressing.
m4_define_vector(['M4_WORD'], 32)
// Default Makerchip TL-Verilog Code Template
m4_define(['M4_ADDRS_PER_WORD'], m4_eval(M4_WORD_CNT / M4_BITS_PER_ADDR))
m4_define(['M4_SUB_WORD_BITS'], m4_width(m4_eval(M4_ADDRS_PER_WORD - 1)))

module twoport4(
input logic clk,
input logic rst,
input logic [6:0] ra, wa,
input logic write,
input logic [31:0] d,
output logic [31:0] q);

logic [31:0] mem [0:127];

integer i;

always_ff @(posedge clk) begin
if(rst) begin
for(i=0;i<128;i=i+1)
assign mem[i] = 0;
end
else begin
if (write) mem[wa] <= d;
q <= mem[ra];
end
end

endmodule
// =========================================
// Welcome! Try the tutorials via the menu.
// =========================================

// Macro providing required top-level module definition, random
// stimulus support, and Verilator config.
m4_makerchip_module // (Expanded in Nav-TLV pane.)

\TLV fake_dmem_sv(/_top, /_scope, $_clk, $_rst, $_ra, $_wa, $_write, $_d, $_q)
/_scope
\SV_plus
twoport4 twoport4(.clk (/_top$_clk),
.rst (/_top$_rst),
.ra (/_top$_ra),
.wa (/_top$_wa),
.write (/_top$_write),
.d (/_top$_d),
.q (/_top$['']$_q[31:0]));

\TLV
|mem
@0
$reset = *reset;
$clk = *clk;
$ra[6:0] = (*cyc_cnt < 3) ? 6'h0 :
(*cyc_cnt < 4) ? 6'h1 :
(*cyc_cnt < 5) ? 6'h2 :
(*cyc_cnt < 6) ? 6'h3 :
6'h4 ;

$wa[6:0] = (*cyc_cnt < 2) ? 6'h0 :
(*cyc_cnt < 3) ? 6'h1 :
(*cyc_cnt < 4) ? 6'h2 :
(*cyc_cnt < 5) ? 6'h3 :
6'h10 ;
//$wa[6:0] = 6'b0;
$write = 1'b1;
$d[31:0] = (*cyc_cnt < 2) ? 32'hCAFE :
(*cyc_cnt < 3) ? 32'hBABE :
(*cyc_cnt < 4) ? 32'hDEAD :
(*cyc_cnt < 5) ? 32'h5555 :
32'h1234 ;
//|mem
m4+fake_dmem_sv(|mem, /memscope, $clk, $reset, $ra, $wa, $write, $d, $q)

// Assert these to end simulation (before Makerchip cycle limit).
*passed = *cyc_cnt > 40;
*failed = 1'b0;
\SV
endmodule
876 changes: 876 additions & 0 deletions openpiton/iop.h

Large diffs are not rendered by default.

4,249 changes: 4,249 additions & 0 deletions openpiton/warp-v_fake_sv_mem.tlv

Large diffs are not rendered by default.

249 changes: 249 additions & 0 deletions openpiton/warpv_openpiton_transducer.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
`include "iop.h"

`define MSG_DATA_SIZE_1B 3'b001 // define.h.pyv
`define MSG_DATA_SIZE_2B 3'b010
`define MSG_DATA_SIZE_4B 3'b011
`define L15_AMO_OP_WIDTH 4 // l15.h.pyv
`define PHY_ADDR_WIDTH 40 // define.h.pyv
`define PCX_REQTYPE_AMO `SWAP_RQ // l15.h.pyv
`define CPX_RESTYPE_ATOMIC_RES 4'b1110 // custom type l15.h.pyv


module warpv_openpiton_transducer(
input logic clk,
input logic rst_n,

// WARP-V --> L1.5
input warpv_transducer_mem_valid,
input [31:0] warpv_transducer_mem_addr,
input [ 3:0] warpv_transducer_mem_wstrb,

input [31:0] warpv_transducer_mem_wdata,
input [`L15_AMO_OP_WIDTH-1:0] warpv_transducer_mem_amo_op,
input l15_transducer_ack,
input l15_transducer_header_ack,

// outputs warpv uses
output reg [4:0] transducer_l15_rqtype,
output [`L15_AMO_OP_WIDTH-1:0] transducer_l15_amo_op,
output reg [2:0] transducer_l15_size,
output transducer_l15_val,
output [`PHY_ADDR_WIDTH-1:0] transducer_l15_address,
output [63:0] transducer_l15_data,
output transducer_l15_nc,

// outputs warpv doesn't use
output [0:0] transducer_l15_threadid,
output transducer_l15_prefetch,
output transducer_l15_invalidate_cacheline,
output transducer_l15_blockstore,
output transducer_l15_blockinitstore,
output [1:0] transducer_l15_l1rplway,
output [63:0] transducer_l15_data_next_entry,
output [32:0] transducer_l15_csm_data,

//--- L1.5 -> WARP-V
input l15_transducer_val,
input [3:0] l15_transducer_returntype,

input [63:0] l15_transducer_data_0,
input [63:0] l15_transducer_data_1,

output reg transducer_warpv_mem_ready,
output [31:0] transducer_warpv_mem_rdata,

output transducer_l15_req_ack,
output reg warpv_int);

localparam ACK_IDLE = 1'b0;
localparam ACK_WAIT = 1'b1;

// ** DECODER ** //
reg current_val;
reg prev_val;
wire new_request = current_val & ~prev_val;
always @(posedge clk) begin
if(!rst_n) begin
current_val <= 0;
prev_val <= 0;
end
else begin
current_val <= warpv_transducer_mem_valid;
prev_val <= current_val;
end
end

// are we waiting for an ack
reg ack_reg;
reg ack_next;
always @ (posedge clk) begin
if (!rst_n) begin
ack_reg <= 0;
end
else begin
ack_reg <= ack_next;
end
end

always @ (*) begin
// be careful with these conditionals.
if (l15_transducer_ack) begin
ack_next = ACK_IDLE;
end
else if (new_request) begin
ack_next = ACK_WAIT;
end
else begin
ack_next = ack_reg;
end
end

// if we haven't got an ack and it's an old request, valid should be high
// otherwise if we got an ack valid should be high only if we got a new
// request
assign transducer_l15_val = (ack_reg == ACK_WAIT) ? warpv_transducer_mem_valid :
(ack_reg == ACK_IDLE) ? new_request :
warpv_transducer_mem_valid;

reg [31:0] warpv_wdata_flipped;

// unused wires tie to zero
assign transducer_l15_threadid = 1'b0;
assign transducer_l15_prefetch = 1'b0;
assign transducer_l15_csm_data = 33'b0;
assign transducer_l15_data_next_entry = 64'b0;
assign transducer_l15_blockstore = 1'b0;
assign transducer_l15_blockinitstore = 1'b0;

// is this set when something in the l1 gets replaced? pico has no cache
assign transducer_l15_l1rplway = 2'b0;
// will pico ever need to invalidate cachelines?
assign transducer_l15_invalidate_cacheline = 1'b0;

// logic to check if a request is new
assign transducer_l15_address = {{8{warpv_transducer_mem_addr[31]}}, warpv_transducer_mem_addr};
assign transducer_l15_nc = warpv_transducer_mem_addr[31] | (transducer_l15_rqtype == `PCX_REQTYPE_AMO);
assign transducer_l15_data = {warpv_wdata_flipped, warpv_wdata_flipped};

// set rqtype specific data
always @ *
begin
if (warpv_transducer_mem_valid) begin
// store or atomic operation
if (warpv_transducer_mem_wstrb) begin
transducer_l15_rqtype = `STORE_RQ;
// endian wizardry
warpv_wdata_flipped = {warpv_transducer_mem_wdata[7:0], warpv_transducer_mem_wdata[15:8],
warpv_transducer_mem_wdata[23:16], warpv_transducer_mem_wdata[31:24]};

// NO Atomics at the moment
// // if it's an atomic operation, modify the request type.
// // That's it
// if (pico_mem_amo_op != `L15_AMO_OP_NONE) begin
// transducer_l15_rqtype = `PCX_REQTYPE_AMO;
// end

case(warpv_transducer_mem_wstrb)
4'b1111: begin
transducer_l15_size = `MSG_DATA_SIZE_4B;
end
4'b1100, 4'b0011: begin
transducer_l15_size = `MSG_DATA_SIZE_2B;
end
4'b1000, 4'b0100, 4'b0010, 4'b0001: begin
transducer_l15_size = `MSG_DATA_SIZE_1B;
end
// this should never happen
default: begin
transducer_l15_size = 0;
end
endcase
end
// load operation
else begin
warpv_wdata_flipped = 32'b0;
transducer_l15_rqtype = `LOAD_RQ;
transducer_l15_size = `MSG_DATA_SIZE_4B;
end
end
else begin
warpv_wdata_flipped = 32'b0;
transducer_l15_rqtype = 5'b0;
transducer_l15_size = 3'b0;
end
end

// ** ENCODER ** //

reg [31:0] rdata_part;
assign transducer_warpv_mem_rdata = {rdata_part[7:0], rdata_part[15:8],
rdata_part[23:16], rdata_part[31:24]};
assign transducer_l15_req_ack = l15_transducer_val;

// keep track of whether we have received the wakeup interrupt
reg int_recv;
always @ (posedge clk) begin
if (!rst_n) begin
warpv_int <= 1'b0;
end
else if (int_recv) begin
warpv_int <= 1'b1;
end
else if (warpv_int) begin
warpv_int <= 1'b0;
end
end

always @ * begin
if (l15_transducer_val) begin
case(l15_transducer_returntype)
`LOAD_RET, `CPX_RESTYPE_ATOMIC_RES: begin
// load
int_recv = 1'b0;
transducer_warpv_mem_ready = 1'b1;
case(transducer_l15_address[3:2])
2'b00: begin
rdata_part = l15_transducer_data_0[63:32];
end
2'b01: begin
rdata_part = l15_transducer_data_0[31:0];
end
2'b10: begin
rdata_part = l15_transducer_data_1[63:32];
end
2'b11: begin
rdata_part = l15_transducer_data_1[31:0];
end
default: begin
end
endcase
end
`ST_ACK: begin
int_recv = 1'b0;
transducer_warpv_mem_ready = 1'b1;
rdata_part = 32'b0;
end
`INT_RET: begin
if (l15_transducer_data_0[17:16] == 2'b01) begin
int_recv = 1'b1;
end
else begin
int_recv = 1'b0;
end
transducer_warpv_mem_ready = 1'b0;
rdata_part = 32'b0;
end
default: begin
int_recv = 1'b0;
transducer_warpv_mem_ready = 1'b0;
rdata_part = 32'b0;
end
endcase
end
else begin
int_recv = 1'b0;
transducer_warpv_mem_ready = 1'b0;
rdata_part = 32'b0;
end
end
endmodule
567 changes: 503 additions & 64 deletions warp-v.tlv

Large diffs are not rendered by default.