Skip to content

Commit

Permalink
Add FPGA Optimized Register File Version
Browse files Browse the repository at this point in the history
Add a register file, optimized for synthesis on FPGAs supporting
distributed RAM. The register file features two RAM blocks each with 1
sync-write and 3 async read ports. To achieve the behavior of a 2
sync-write / 3 async-read register file, the read access is arbitrated
depending on which block was last written to. For this purpose an
additional array of *NUM_TOT_WORDS* 1-bit registers is introduced.

Savings for FPGA synthesis are achieved by:
- Replacing an Array of FFs with distributed RAM. Example: 31 32-bit
  registers as FFs occupy 992 FFs, or 446 LUTs on Xilinx Artix-7 FPGAs.
  The equivalent storage capacity using distributed RAM is implemented
  by 36 RAM32M primitives (inferrred from generic HDL), or 144
  distributed RAM enabled LUTs, and 31 FFs for block selection (16
  LUTs).
- The distributed RAM primitives have the read- address
  decoders already integrated. This saves three 32-bit 32 to 1
  multiplexers at the read ports.
- Since both write ports unconditionally write to their respective
  RAM blocks, the multiplexing of the write ports is also saved. That
  is 32 32-bit 2 to 1 multiplexers.

Concrete Savings: (synthesized for Xilinx Artix-7 FPGA)
- without FPU reg file:
        baseline:   7347 LUTs, 2508 FFs
        optimized:  5722 LUTs, 1541 FFs
        -------------------------------
        difference: -1625 LUTS (-22.1%)
                    -967 FFs   (-38.6%)

- with FPU reg file:
        baseline:   13160 LUTs, 4027 FFs
        optimized:  10257 LUTs, 2062 FFs
        -------------------------------
        difference: -3353 LUTS (-24.6%)
                    -1965 FFs  (-48.8%)

Signed-off-by: ganoam <[email protected]>
  • Loading branch information
ganoam committed Aug 5, 2020
1 parent 1607d8b commit ea467b5
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 2 deletions.
5 changes: 4 additions & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ sources:
- target: asic
files:
- rtl/cv32e40p_register_file_latch.sv
- target: not(asic)
- target: fpga
files:
- rtl/cv32e40p_register_file_fpga.sv
- target: rtl
files:
- rtl/cv32e40p_register_file_ff.sv
- target: rtl
Expand Down
178 changes: 178 additions & 0 deletions rtl/cv32e40p_register_file_fpga.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@

// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

////////////////////////////////////////////////////////////////////////////////
// Engineer: Francesco Conti - [email protected] //
// //
// Additional contributions by: //
// Michael Gautschi - [email protected] //
// Davide Schiavone - [email protected] //
// Noam Gallmann - [email protected] //
// //
// Design Name: RISC-V register file //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Register file with 31x 32 bit wide registers. Register 0 //
// is fixed to 0. This register file is optimized for FPGAs //
// featuring distributed RAM-enabled logic cells. //
// Also supports the fp-register file now if FPU=1 //
// If PULP_ZFINX is 1, floating point operations take values //
// from the X register file //
// //
////////////////////////////////////////////////////////////////////////////////

module cv32e40p_register_file
#(
parameter ADDR_WIDTH = 5,
parameter DATA_WIDTH = 32,
parameter FPU = 0,
parameter PULP_ZFINX = 0
)
(
// Clock and Reset
input logic clk,
input logic rst_n,

input logic scan_cg_en_i,

//Read port R1
input logic [ADDR_WIDTH-1:0] raddr_a_i,
output logic [DATA_WIDTH-1:0] rdata_a_o,

//Read port R2
input logic [ADDR_WIDTH-1:0] raddr_b_i,
output logic [DATA_WIDTH-1:0] rdata_b_o,

//Read port R3
input logic [ADDR_WIDTH-1:0] raddr_c_i,
output logic [DATA_WIDTH-1:0] rdata_c_o,

// Write port W1
input logic [ADDR_WIDTH-1:0] waddr_a_i,
input logic [DATA_WIDTH-1:0] wdata_a_i,
input logic we_a_i,

// Write port W2
input logic [ADDR_WIDTH-1:0] waddr_b_i,
input logic [DATA_WIDTH-1:0] wdata_b_i,
input logic we_b_i
);

// The register values are stored in two separate RAM blocks each featuring 1 sync-write and
// 3 async-read ports. A set of 1-bit flip-flops keeps track of which RAM block contains the valid
// entry for each register.
// The integer register file occupies adresses 0-31. If enabled, the floating-point registers are
// located at addresses 32-63.

// number of integer registers
localparam NUM_WORDS = 2**(ADDR_WIDTH-1);
// number of floating point registers
localparam NUM_FP_WORDS = 2**(ADDR_WIDTH-1);
localparam NUM_TOT_WORDS = FPU ? ( PULP_ZFINX ? NUM_WORDS : NUM_WORDS + NUM_FP_WORDS ) : NUM_WORDS;

// integer and floating-point register file
// distributed RAM blocks
logic [DATA_WIDTH-1:0] mem_a [NUM_TOT_WORDS];
logic [DATA_WIDTH-1:0] mem_b [NUM_TOT_WORDS];

// distributed RAM block selectors
logic [NUM_TOT_WORDS-1:0] mem_block_sel;
logic [NUM_TOT_WORDS-1:0] mem_block_sel_q;

// write enable signals for all registers
logic [NUM_TOT_WORDS-1:0] we_a_dec;
logic [NUM_TOT_WORDS-1:0] we_b_dec;

//-----------------------------------------------------------------------------
//-- READ : Read address decoder RAD
//-----------------------------------------------------------------------------

// Read from the block corresponding to the write port that last wrote to the corresponding
// address.
if (FPU == 1 && PULP_ZFINX == 0) begin
assign rdata_a_o = (raddr_a_i == '0) ? '0 :
mem_block_sel_q[raddr_a_i[5:0]] ? mem_b[raddr_a_i[5:0]] : mem_a[raddr_a_i[5:0]];
assign rdata_b_o = (raddr_b_i == '0) ? '0 :
mem_block_sel_q[raddr_b_i[5:0]] ? mem_b[raddr_b_i[5:0]] : mem_a[raddr_b_i[5:0]];
assign rdata_c_o = (raddr_c_i == '0) ? '0 :
mem_block_sel_q[raddr_c_i[5:0]] ? mem_b[raddr_c_i[5:0]] : mem_a[raddr_c_i[5:0]];
end else begin
assign rdata_a_o = (raddr_a_i == '0) ? '0 :
mem_block_sel_q[raddr_a_i[4:0]] ? mem_b[raddr_a_i[4:0]] : mem_a[raddr_a_i[4:0]];
assign rdata_b_o = (raddr_b_i == '0) ? '0 :
mem_block_sel_q[raddr_b_i[4:0]] ? mem_b[raddr_b_i[4:0]] : mem_a[raddr_b_i[4:0]];
assign rdata_c_o = (raddr_c_i == '0) ? '0 :
mem_block_sel_q[raddr_c_i[4:0]] ? mem_b[raddr_c_i[4:0]] : mem_a[raddr_c_i[4:0]];
end

//-----------------------------------------------------------------------------
//-- WRITE : Write Address Decoder (WAD)
//-----------------------------------------------------------------------------

always_comb begin : we_a_decoder
for (int i = 0; i < NUM_TOT_WORDS; i++) begin
if (waddr_a_i == i) begin
we_a_dec[i] = we_a_i;
end else begin
we_a_dec[i] = 1'b0;
end
end
end

always_comb begin : we_b_decoder
for (int i=0; i<NUM_TOT_WORDS; i++) begin
if (waddr_b_i == i) begin
we_b_dec[i] = we_b_i;
end else begin
we_b_dec[i] = 1'b0;
end
end
end

// update block selector:
// signal mem_block_sel records where the current valid value is stored.
// if port a and b try to write to the same address simultaneously, write port b has priority.
always_comb begin
mem_block_sel[0] = '0;
for (int i = 1; i<NUM_TOT_WORDS; i++) begin
if (we_b_dec[i] == 1'b1) begin
mem_block_sel[i] = 1'b1;
end else if (we_a_dec[i] == 1'b1) begin
mem_block_sel[i] = 1'b0;
end else begin
mem_block_sel[i] = mem_block_sel_q[i];
end
end
end

// block selector flops
always_ff @(posedge clk) begin
if (rst_n == 1'b0) begin
mem_block_sel_q <= '0;
end else begin
mem_block_sel_q <= mem_block_sel;
end
end

always_ff @(posedge clk) begin : regs_a
if(we_a_i) begin
mem_a[waddr_a_i] <= wdata_a_i;
end
end

always_ff @(posedge clk) begin : regs_b
if(we_b_i) begin
mem_b[waddr_b_i] <= wdata_b_i;
end
end

endmodule
2 changes: 1 addition & 1 deletion src_files.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ cv32e40p_regfile_fpga:
]
files: [
./rtl/cv32e40p_register_file_test_wrap.sv,
./rtl/cv32e40p_register_file_ff.sv,
./rtl/cv32e40p_register_file_fpga.sv,
]

tb_riscv:
Expand Down

0 comments on commit ea467b5

Please sign in to comment.