Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fiting logical operations into LUT4 #62

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 100 additions & 72 deletions FemtoRV/RTL/PROCESSOR/femtorv32_quark.v
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,22 @@
`define NRV_ABI "ilp32"
`define NRV_OPTIMIZE "-Os"

module FemtoRV32(
input clk,

output [31:0] mem_addr, // address bus
output [31:0] mem_wdata, // data to be written
output [3:0] mem_wmask, // write mask for the 4 bytes of each word
input [31:0] mem_rdata, // input lines for both data and instr
output mem_rstrb, // active to initiate memory read (used by IO)
input mem_rbusy, // asserted if memory is busy reading value
input mem_wbusy, // asserted if memory is busy writing value

input reset // set to 0 to reset the processor
module FemtoRV32 #(
parameter RESET_ADDR = 32'h00000000,
parameter ADDR_WIDTH = 24
)(
input clk,
input reset, // set to 0 to reset the processor
// system bus
output wire [31:0] mem_addr, // address bus
output reg [31:0] mem_wdata, // data to be written
output wire [3:0] mem_wmask, // write mask for the 4 bytes of each word
input wire [31:0] mem_rdata, // input lines for both data and instr
output wire mem_rstrb, // active to initiate memory read (used by IO)
input wire mem_rbusy, // asserted if memory is busy reading value
input wire mem_wbusy // asserted if memory is busy writing value
);

parameter RESET_ADDR = 32'h00000000;
parameter ADDR_WIDTH = 24;

/***************************************************************************/
// Instruction decoding.
/***************************************************************************/
Expand Down Expand Up @@ -98,6 +97,7 @@ module FemtoRV32(
reg [31:0] rs2;
reg [31:0] registerFile [31:0];

// write access
always @(posedge clk) begin
if (writeBack)
if (rdId != 0)
Expand Down Expand Up @@ -132,36 +132,50 @@ module FemtoRV32(
wire LTU = aluMinus[32];
wire EQ = (aluMinus[31:0] == 0);

// Logical operations
reg [32-1:0] aluLog;

always @(*)
case (instr[13:12])
2'b00 : aluLog = aluIn1 ^ aluIn2;
2'b10 : aluLog = aluIn1 | aluIn2;
2'b11 : aluLog = aluIn1 & aluIn2;
default: aluLog = 32'hxxxxxxxx;
endcase

// Notes:
// - instr[30] is 1 for SUB and 0 for ADD
// - for SUB, need to test also instr[5] to discriminate ADDI:
// (1 for ADD/SUB, 0 for ADDI, and Iimm used by ADDI overlaps bit 30 !)
// - instr[30] is 1 for SRA (do sign extension) and 0 for SRL

wire [31:0] aluOut =
(funct3Is[0] ? instr[30] & instr[5] ? aluMinus[31:0] : aluPlus : 32'b0) |
(funct3Is[2] ? {31'b0, LT} : 32'b0) |
(funct3Is[3] ? {31'b0, LTU} : 32'b0) |
(funct3Is[4] ? aluIn1 ^ aluIn2 : 32'b0) |
(funct3Is[6] ? aluIn1 | aluIn2 : 32'b0) |
(funct3Is[7] ? aluIn1 & aluIn2 : 32'b0) |
(funct3IsShift ? aluReg : 32'b0) ;
reg [32-1:0] aluOut;
always @(*)
case (instr[14:12])
3'b000: aluOut = instr[30] & instr[5] ? aluMinus[31:0] : aluPlus; // ADD
3'b001: aluOut = aluReg; // SL
3'b010: aluOut = {31'b0, LT}; // SLT
3'b011: aluOut = {31'b0, LTU}; // SLTU
3'b100: aluOut = aluLog; // XOR
3'b101: aluOut = aluReg; // SR
3'b110: aluOut = aluLog; // OR
3'b111: aluOut = aluLog; // AND
endcase

wire funct3IsShift = funct3Is[1] | funct3Is[5];

always @(posedge clk) begin
if(aluWr) begin
if (funct3IsShift) begin // SLL, SRA, SRL
aluReg <= aluIn1;
aluShamt <= aluIn2[4:0];
end
aluReg <= aluIn1;
aluShamt <= aluIn2[4:0];
end
end

`ifdef NRV_TWOLEVEL_SHIFTER
else if(|aluShamt[4:2]) begin // Shift by 4
aluShamt <= aluShamt - 4;
aluReg <= funct3Is[1] ? aluReg << 4 :
{{4{instr[30] & aluReg[31]}}, aluReg[31:4]};
aluReg <= funct3Is[1] ? aluReg << 4 :
{{4{instr[30] & aluReg[31]}}, aluReg[31:4]};
end else
`endif
// Compact form of:
Expand All @@ -171,22 +185,28 @@ module FemtoRV32(

if (|aluShamt) begin
aluShamt <= aluShamt - 1;
aluReg <= funct3Is[1] ? aluReg << 1 : // SLL
{instr[30] & aluReg[31], aluReg[31:1]}; // SRA,SRL
aluReg <= funct3Is[1] ? aluReg << 1 : // SLL
{instr[30] & aluReg[31], aluReg[31:1]}; // SRA,SRL
end
end

/***************************************************************************/
// The predicate for conditional branches.
/***************************************************************************/

wire predicate =
funct3Is[0] & EQ | // BEQ
funct3Is[1] & !EQ | // BNE
funct3Is[4] & LT | // BLT
funct3Is[5] & !LT | // BGE
funct3Is[6] & LTU | // BLTU
funct3Is[7] & !LTU ; // BGEU
reg predicate;

always @(*)
case (instr[14:12])
3'b000: predicate = EQ ; // BEQ
3'b001: predicate = !EQ ; // BNE
3'b010: predicate = 1'bx; //
3'b011: predicate = 1'bx; //
3'b100: predicate = LT ; // BLT
3'b101: predicate = !LT ; // BGE
3'b110: predicate = LTU; // BLTU
3'b111: predicate = !LTU; // BGEU
endcase

/***************************************************************************/
// Program counter and branch target computation.
Expand All @@ -202,20 +222,20 @@ module FemtoRV32(
// branch->PC+Bimm AUIPC->PC+Uimm JAL->PC+Jimm
// Equivalent to PCplusImm = PC + (isJAL ? Jimm : isAUIPC ? Uimm : Bimm)
wire [ADDR_WIDTH-1:0] PCplusImm = PC + ( instr[3] ? Jimm[ADDR_WIDTH-1:0] :
instr[4] ? Uimm[ADDR_WIDTH-1:0] :
Bimm[ADDR_WIDTH-1:0] );
instr[4] ? Uimm[ADDR_WIDTH-1:0] :
Bimm[ADDR_WIDTH-1:0] );

// A separate adder to compute the destination of load/store.
// testing instr[5] is equivalent to testing isStore in this context.
wire [ADDR_WIDTH-1:0] loadstore_addr = rs1[ADDR_WIDTH-1:0] +
(instr[5] ? Simm[ADDR_WIDTH-1:0] : Iimm[ADDR_WIDTH-1:0]);
(instr[5] ? Simm[ADDR_WIDTH-1:0] : Iimm[ADDR_WIDTH-1:0]);

/* verilator lint_off WIDTH */
// internal address registers and cycles counter may have less than
// 32 bits, so we deactivate width test for mem_addr and writeBackData

assign mem_addr = state[WAIT_INSTR_bit] | state[FETCH_INSTR_bit] ?
PC : loadstore_addr ;
PC : {loadstore_addr[ADDR_WIDTH-1:2], 2'b00} ;

/***************************************************************************/
// The value written back to the register file.
Expand All @@ -237,38 +257,47 @@ module FemtoRV32(
/***************************************************************************/

// All memory accesses are aligned on 32 bits boundary. For this
// reason, we need some circuitry that does unaligned halfword
// reason, we need some circuitry that does unaligned half
// and byte load/store, based on:
// - funct3[1:0]: 00->byte 01->halfword 10->word
// - mem_addr[1:0]: indicates which byte/halfword is accessed
// - funct3[1:0]: 00->byte 01->half 10->word
// - mem_addr[1:0]: indicates which byte/half is accessed

wire mem_byteAccess = instr[13:12] == 2'b00; // funct3[1:0] == 2'b00;
wire mem_halfwordAccess = instr[13:12] == 2'b01; // funct3[1:0] == 2'b01;
wire mem_byteAccess = instr[13:12] == 2'b00; // funct3[1:0] == 2'b00;
wire mem_halfAccess = instr[13:12] == 2'b01; // funct3[1:0] == 2'b01;

// LOAD, in addition to funct3[1:0], LOAD depends on:
// - funct3[2] (instr[14]): 0->do sign expansion 1->no sign expansion

wire LOAD_sign =
!instr[14] & (mem_byteAccess ? LOAD_byte[7] : LOAD_halfword[15]);
!instr[14] & (mem_byteAccess ? LOAD_byte[7] : LOAD_half[15]);

wire [31:0] LOAD_data =
mem_byteAccess ? {{24{LOAD_sign}}, LOAD_byte} :
mem_halfwordAccess ? {{16{LOAD_sign}}, LOAD_halfword} :
mem_rdata ;
mem_byteAccess ? {{24{LOAD_sign}}, LOAD_byte} :
mem_halfAccess ? {{16{LOAD_sign}}, LOAD_half} :
mem_rdata ;

wire [15:0] LOAD_halfword =
loadstore_addr[1] ? mem_rdata[31:16] : mem_rdata[15:0];
wire [15:0] LOAD_half =
loadstore_addr[1] ? mem_rdata[31:16] : mem_rdata[15:0];

wire [7:0] LOAD_byte =
loadstore_addr[0] ? LOAD_halfword[15:8] : LOAD_halfword[7:0];
loadstore_addr[0] ? LOAD_half[15:8] : LOAD_half[7:0];

// STORE

assign mem_wdata[ 7: 0] = rs2[7:0];
assign mem_wdata[15: 8] = loadstore_addr[0] ? rs2[7:0] : rs2[15: 8];
assign mem_wdata[23:16] = loadstore_addr[1] ? rs2[7:0] : rs2[23:16];
assign mem_wdata[31:24] = loadstore_addr[0] ? rs2[7:0] :
loadstore_addr[1] ? rs2[15:8] : rs2[31:24];
always @(*)
case (instr[14:12])
3'b000 : case (loadstore_addr[1:0])
2'b00: mem_wdata = {8'hxx , 8'hxx , 8'hxx , rs2[ 7: 0]};
2'b01: mem_wdata = {8'hxx , 8'hxx , rs2[ 7: 0], 8'hxx };
2'b10: mem_wdata = {8'hxx , rs2[ 7: 0], 8'hxx , 8'hxx };
2'b11: mem_wdata = {rs2[ 7: 0], 8'hxx , 8'hxx , 8'hxx };
endcase
3'b001 : casez (loadstore_addr[1])
1'b0 : mem_wdata = {8'hxx , 8'hxx , rs2[15: 8], rs2[ 7: 0]};
1'b1 : mem_wdata = {rs2[15: 8], rs2[ 7: 0], 8'hxx , 8'hxx };
endcase
3'b010 : mem_wdata = {rs2[31:24], rs2[23:16], rs2[15: 8], rs2[ 7: 0]};
default: mem_wdata = {8'hxx , 8'hxx , 8'hxx , 8'hxx };
endcase

// The memory write mask:
// 1111 if writing a word
Expand All @@ -278,13 +307,13 @@ module FemtoRV32(
// (depending on loadstore_addr[1:0])

wire [3:0] STORE_wmask =
mem_byteAccess ?
(loadstore_addr[1] ?
(loadstore_addr[0] ? 4'b1000 : 4'b0100) :
(loadstore_addr[0] ? 4'b0010 : 4'b0001)
mem_byteAccess ?
(loadstore_addr[1] ?
(loadstore_addr[0] ? 4'b1000 : 4'b0100) :
(loadstore_addr[0] ? 4'b0010 : 4'b0001)
) :
mem_halfwordAccess ?
(loadstore_addr[1] ? 4'b1100 : 4'b0011) :
mem_halfAccess ?
(loadstore_addr[1] ? 4'b1100 : 4'b0011) :
4'b1111;

/*************************************************************************/
Expand All @@ -310,7 +339,7 @@ module FemtoRV32(

// register write-back enable.
wire writeBack = ~(isBranch | isStore ) &
(state[EXECUTE_bit] | state[WAIT_ALU_OR_MEM_bit]);
(state[EXECUTE_bit] | state[WAIT_ALU_OR_MEM_bit]);

// The memory-read signal.
assign mem_rstrb = state[EXECUTE_bit] & isLoad | state[FETCH_INSTR_bit];
Expand All @@ -324,8 +353,8 @@ module FemtoRV32(
wire jumpToPCplusImm = isJAL | (isBranch & predicate);
`ifdef NRV_IS_IO_ADDR
wire needToWait = isLoad |
isStore & `NRV_IS_IO_ADDR(mem_addr) |
isALU & funct3IsShift;
isStore & `NRV_IS_IO_ADDR(mem_addr) |
isALU & funct3IsShift;
`else
wire needToWait = isLoad | isStore | isALU & funct3IsShift;
`endif
Expand Down Expand Up @@ -353,15 +382,15 @@ module FemtoRV32(
PC <= isJALR ? {aluPlus[ADDR_WIDTH-1:1],1'b0} :
jumpToPCplusImm ? PCplusImm :
PCplus4;
state <= needToWait ? WAIT_ALU_OR_MEM : FETCH_INSTR;
state <= needToWait ? WAIT_ALU_OR_MEM : FETCH_INSTR;
end

state[WAIT_ALU_OR_MEM_bit]: begin
if(!aluBusy & !mem_rbusy & !mem_wbusy) state <= FETCH_INSTR;
end

default: begin // FETCH_INSTR
state <= WAIT_INSTR;
state <= WAIT_INSTR;
end

endcase
Expand Down Expand Up @@ -415,4 +444,3 @@ endmodule
// [2] state uses 1-hot encoding (at any time, state has only one bit set to 1).
// It uses a larger number of bits (one bit per state), but often results in
// a both more compact (fewer LUTs) and faster state machine.