Skip to content

Commit

Permalink
Add Bitmanipulation Support
Browse files Browse the repository at this point in the history
  • Loading branch information
gullahmed1 committed Nov 7, 2023
1 parent e70418e commit 5ff2112
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 7 deletions.
138 changes: 137 additions & 1 deletion rtl/cv32e40p_alu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,11 @@ module cv32e40p_alu
logic [31:0] bmask_first, bmask_inv;
logic [31:0] bextins_and;
logic [31:0] bextins_result, bclr_result, bset_result;

logic [31:0] result_bitmanip; // Store result of bitmanip operations
logic [31:0] clmul_result; // Store carry-less multiplication result
logic [ 5:0] cpop; // Store no of set bits in operand a
logic [ 4:0] ff_one_result; // Return the position of first one
logic ff_one_all_zeros; // Return true if all input is zero

// construct bit mask for insert/extract/bclr/bset
// bmask looks like this 00..0011..1100..00
Expand All @@ -823,6 +827,124 @@ module cv32e40p_alu
assign bclr_result = operand_a_i & bmask_inv;
assign bset_result = operand_a_i | bmask;

if (ZBITMANIP) begin : gen_zbc_zbb_results

// Temporary registers
logic [31:0] ff_one_in;
logic [31:0] [31:0] clmul_temp0;
logic [ 7:0] [31:0] clmul_temp1;
logic [ 1:0] [31:0] clmul_temp2;
logic [31:0] operand_b_rev;

// Decide the input of cv32e40p_ff_one module based on operator_i
assign ff_one_in = (operator_i == ALU_B_CTZ) ? operand_a_i : operand_a_rev;

// Instantiate cv32e40p_popcnt module, it will return 1's count
cv32e40p_popcnt popcnt_i (
.in_i (operand_a_i),
.result_o(cpop)
);

// Instantiate Find First One Module
cv32e40p_ff_one ff_one_i(
.in_i (ff_one_in ),
.first_one_o(ff_one_result),
.no_ones_o (ff_one_all_zeros)
);

// Reverse operand_b_i using streaming operator
assign operand_b_rev = {<<{operand_b_i}};

// Create 32 rows like traditional multiplication
for (genvar i = 0; i < 32; i++) begin : gen_32_rows
assign clmul_temp0[i] = (operator_i == ALU_B_CLMUL) ?
operand_b_i[i] ? operand_a_i << i : '0 :
operand_b_rev[i] ? operand_a_rev << i : '0;
end

// Xor 4 rows 8 times
for (genvar i = 0; i < 8; i++) begin : gen_xor_result_8_rows
assign clmul_temp1[i] = clmul_temp0[i<<2] ^ clmul_temp0[(i<<2)+1] ^
clmul_temp0[(i<<2)+2] ^ clmul_temp0[(i<<2)+3];
end

// XOR 4 rows twice
for (genvar i = 0; i < 2; i++) begin : gen_xor_result_2_rows
assign clmul_temp2[i] = clmul_temp1[i<<2] ^ clmul_temp1[(i<<2)+1] ^
clmul_temp1[(i<<2)+2] ^ clmul_temp1[(i<<2)+3];
end

// Xor on last 2 rows
assign clmul_result = clmul_temp2[0] ^ clmul_temp2[1];
end

always_comb begin
if (ZBITMANIP) begin
unique case (operator_i)

// Zba: Address generation Instructions , Shift left rs1 by 1/2/3 + rs2
ALU_B_SH1ADD: result_bitmanip = {operand_a_i[30:0],1'b0} + operand_b_i;
ALU_B_SH2ADD: result_bitmanip = {operand_a_i[29:0],2'b0} + operand_b_i;
ALU_B_SH3ADD: result_bitmanip = {operand_a_i[28:0],3'b0} + operand_b_i;

// Zbb: Basic Bit-Manipulation
// Logical with Negate
ALU_B_ANDN: result_bitmanip = operand_a_i & operand_b_neg;
ALU_B_ORN: result_bitmanip = operand_a_i | operand_b_neg;
ALU_B_XNOR: result_bitmanip = ~(operand_a_i ^ operand_b_i);

// Count leading/trailing zero bits
ALU_B_CLZ: result_bitmanip = ff_one_all_zeros ? {26'b0,6'b100000} : {26'b0,ff_one_result};
ALU_B_CTZ: result_bitmanip = ff_one_all_zeros ? {26'b0,6'b100000} : {26'b0,ff_one_result};

// Count set bits
ALU_B_CPOP: result_bitmanip = cpop;

// Integer Minimum/Maximum
ALU_B_MAX: result_bitmanip = ($signed(operand_a_i) < $signed(operand_b_i)) ? operand_b_i : operand_a_i;
ALU_B_MAXU: result_bitmanip = (operand_a_i < operand_b_i) ? operand_b_i : operand_a_i;
ALU_B_MIN: result_bitmanip = ($signed(operand_a_i) < $signed(operand_b_i)) ? operand_a_i : operand_b_i;
ALU_B_MINU: result_bitmanip = (operand_a_i < operand_b_i) ? operand_a_i : operand_b_i;

// Sign and zero-extension
ALU_B_SEXTB: result_bitmanip = {{24{operand_a_i[7]}}, operand_a_i[7:0]};
ALU_B_SEXTH: result_bitmanip = {{16{operand_a_i[15]}}, operand_a_i[15:0]};
ALU_B_ZEXTH: result_bitmanip = {{16{1'b0}}, operand_a_i[15:0]};

// Bitwise rotation
ALU_B_ROL: result_bitmanip = (operand_a_i << operand_b_i[4:0]) | (operand_a_i >> (32-operand_b_i[4:0]));
ALU_B_ROR: result_bitmanip = (operand_a_i >> operand_b_i[4:0]) | (operand_a_i << (32-operand_b_i[4:0]));
ALU_B_RORI: result_bitmanip = (operand_a_i >> operand_b_i[4:0]) | (operand_a_i << (32-operand_b_i[4:0]));

// Bitwise OR-Combine, byte granule
ALU_B_ORCB: result_bitmanip = {{8{|operand_a_i[31:24]}}, {8{|operand_a_i[23:16]}}, {8{|operand_a_i[15:8]}}, {8{|operand_a_i[7:0]}}};

// Byte-reverse register
ALU_B_REV8: result_bitmanip = {{operand_a_i[7:0]}, {operand_a_i[15:8]}, {operand_a_i[23:16]}, {operand_a_i[31:24]}};

// Zbc: Carry-less Multiplication low/reversed/high part
ALU_B_CLMUL: result_bitmanip = clmul_result;
ALU_B_CLMULR: result_bitmanip = {<<{clmul_result}};
ALU_B_CLMULH: result_bitmanip = {<<{clmul_result}} >> 1'b1;

// Zbs: Single-bit Instructions
ALU_B_BCLR: result_bitmanip = operand_a_i & ~(1'b1 << (operand_b_i & 5'b11111));
ALU_B_BCLRI: result_bitmanip = operand_a_i & ~(1'b1 << (operand_b_i & 5'b11111));
ALU_B_BEXT: result_bitmanip = (operand_a_i >> (operand_b_i & 5'b11111)) & 1'b1;
ALU_B_BEXTI: result_bitmanip = (operand_a_i >> (operand_b_i & 5'b11111)) & 1'b1;
ALU_B_BINV: result_bitmanip = operand_a_i ^ (1'b1 << (operand_b_i & 5'b11111));
ALU_B_BINVI: result_bitmanip = operand_a_i ^ (1'b1 << (operand_b_i & 5'b11111));
ALU_B_BSET: result_bitmanip = operand_a_i | (1'b1 << (operand_b_i & 5'b11111));
ALU_B_BSETI: result_bitmanip = operand_a_i | (1'b1 << (operand_b_i & 5'b11111));

default: result_bitmanip = '0;
endcase
end
else begin
result_bitmanip = '0;
end
end

/////////////////////////////////////////////////////////////////////////////////
// ____ _____ _______ _____ ________ ________ _____ _____ ______ //
// | _ \_ _|__ __| | __ \| ____\ \ / / ____| __ \ / ____| ____| //
Expand Down Expand Up @@ -979,6 +1101,20 @@ module cv32e40p_alu

default: ; // default case to suppress unique warning
endcase

if (ZBITMANIP) begin
unique case (operator_i)
// Bit-Manip Operations Result
ALU_B_SH1ADD, ALU_B_MIN, ALU_B_ROL, ALU_B_ROR, ALU_B_XNOR, ALU_B_MAXU,
ALU_B_SH2ADD, ALU_B_ANDN, ALU_B_MAX, ALU_B_ORN, ALU_B_MINU, ALU_B_RORI,
ALU_B_SEXTB, ALU_B_SEXTH, ALU_B_ZEXTH, ALU_B_CPOP, ALU_B_CTZ, ALU_B_BCLR,
ALU_B_BEXT, ALU_B_BEXTI, ALU_B_BINV, ALU_B_BINVI, ALU_B_BSET, ALU_B_REV8,
ALU_B_CLMUL, ALU_B_CLMULH, ALU_B_CLMULR, ALU_B_CLZ, ALU_B_BSETI, ALU_B_ORCB,
ALU_B_BCLRI, ALU_B_SH3ADD : result_o = result_bitmanip;

default: ;
endcase
end
end

assign ready_o = div_ready;
Expand Down
84 changes: 79 additions & 5 deletions rtl/cv32e40p_decoder.sv
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ module cv32e40p_decoder
// unittypes for latencies to help us decode for APU
enum logic[1:0] {ADDMUL, DIVSQRT, NONCOMP, CONV} fp_op_group;

// Illegal Instr flags for bitmanip
logic illegal_instr_bm;
logic illegal_instr_non_bm;

/////////////////////////////////////////////
// ____ _ //
Expand Down Expand Up @@ -264,6 +267,8 @@ module cv32e40p_decoder
atop_o = 6'b000000;

illegal_insn_o = 1'b0;
illegal_instr_bm = 1'b0;
illegal_instr_non_bm = 1'b0;
ebrk_insn_o = 1'b0;
ecall_insn_o = 1'b0;
wfi_o = 1'b0;
Expand Down Expand Up @@ -493,8 +498,31 @@ module cv32e40p_decoder
3'b111: alu_operator_o = ALU_AND; // And with Immediate

3'b001: begin
alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
if (instr_rdata_i[31:25] != 7'b0)
if (instr_rdata_i[31:25] == 7'b0)
alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate

//Bit-Manip ALU Operations
else if (ZBITMANIP) begin
unique case (instr_rdata_i[31:25])
7'b011_0000: begin
unique case(instr_rdata_i[24:20])
5'b00100: alu_operator_o = ALU_B_SEXTB;
5'b00101: alu_operator_o = ALU_B_SEXTH;
5'b00010: alu_operator_o = ALU_B_CPOP;
5'b00001: alu_operator_o = ALU_B_CTZ;
5'b00000: alu_operator_o = ALU_B_CLZ;
default: illegal_insn_o = 1'b1;
endcase
end
7'b010_0100: alu_operator_o = ALU_B_BCLRI;
7'b011_0100: alu_operator_o = ALU_B_BINVI;
7'b001_0100: alu_operator_o = ALU_B_BSETI;
default: begin
illegal_insn_o = 1'b1;
end
endcase
end
else
illegal_insn_o = 1'b1;
end

Expand All @@ -503,11 +531,23 @@ module cv32e40p_decoder
alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
else if (instr_rdata_i[31:25] == 7'b010_0000)
alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate

//Bit-Manip ALU Operations
else if (ZBITMANIP) begin
if (instr_rdata_i[31:25] == 7'b011_0000)
alu_operator_o = ALU_B_RORI;
else if (instr_rdata_i[31:20] == 12'b001010000111)
alu_operator_o = ALU_B_ORCB;
else if (instr_rdata_i[31:20] == 12'b011010011000)
alu_operator_o = ALU_B_REV8;
else if (instr_rdata_i[31:25] == 7'b010_0100)
alu_operator_o = ALU_B_BEXTI;
else
illegal_insn_o = 1'b1;
end
else
illegal_insn_o = 1'b1;
end


endcase
end

Expand Down Expand Up @@ -992,9 +1032,43 @@ module cv32e40p_decoder
end

default: begin
illegal_insn_o = 1'b1;
illegal_instr_non_bm = 1'b1;
end
endcase

if (ZBITMANIP) begin
unique case ({instr_rdata_i[30:25], instr_rdata_i[14:12]})
// Bit-Manip ALU Operations
{6'b01_0000, 3'b010}: alu_operator_o = ALU_B_SH1ADD;
{6'b01_0000, 3'b100}: alu_operator_o = ALU_B_SH2ADD;
{6'b01_0000, 3'b110}: alu_operator_o = ALU_B_SH3ADD;
{6'b10_0000, 3'b111}: alu_operator_o = ALU_B_ANDN;
{6'b00_0101, 3'b110}: alu_operator_o = ALU_B_MAX;
{6'b00_0101, 3'b100}: alu_operator_o = ALU_B_MIN;
{6'b11_0000, 3'b001}: alu_operator_o = ALU_B_ROL;
{6'b11_0000, 3'b101}: alu_operator_o = ALU_B_ROR;
{6'b10_0000, 3'b100}: alu_operator_o = ALU_B_XNOR;
{6'b10_0000, 3'b110}: alu_operator_o = ALU_B_ORN;
{6'b00_0101, 3'b111}: alu_operator_o = ALU_B_MAXU;
{6'b00_0101, 3'b101}: alu_operator_o = ALU_B_MINU;
{6'b00_0100, 3'b100}: alu_operator_o = ALU_B_ZEXTH;
{6'b00_0101, 3'b001}: alu_operator_o = ALU_B_CLMUL;
{6'b00_0101, 3'b011}: alu_operator_o = ALU_B_CLMULH;
{6'b00_0101, 3'b010}: alu_operator_o = ALU_B_CLMULR;
{6'b10_0100, 3'b001}: alu_operator_o = ALU_B_BCLR;
{6'b10_0100, 3'b101}: alu_operator_o = ALU_B_BEXT;
{6'b11_0100, 3'b001}: alu_operator_o = ALU_B_BINV;
{6'b01_0100, 3'b001}: alu_operator_o = ALU_B_BSET;
default: begin
illegal_instr_bm = 1'b1;
end
endcase
end

unique case (ZBITMANIP)
1'b0: illegal_insn_o = illegal_instr_non_bm;
1'b1: illegal_insn_o = illegal_instr_non_bm & illegal_instr_bm;
endcase
end
end

Expand Down
45 changes: 44 additions & 1 deletion rtl/include/cv32e40p_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ package cv32e40p_pkg;
parameter REGC_S4 = 2'b00;
parameter REGC_RD = 2'b01;
parameter REGC_ZERO = 2'b11;

// To Enable Bitmanip support
parameter ZBITMANIP = 1'b1;

//////////////////////////////////////////////////////////////////////////////
// _ _ _ _ ___ _ _ //
Expand Down Expand Up @@ -156,7 +159,47 @@ package cv32e40p_pkg;
ALU_SHUF = 7'b0111010,
ALU_SHUF2 = 7'b0111011,
ALU_PCKLO = 7'b0111000,
ALU_PCKHI = 7'b0111001
ALU_PCKHI = 7'b0111001,

//Zba: Address generation Instructions
ALU_B_SH1ADD = 7'b0001111,
ALU_B_SH2ADD = 7'b0001110,
ALU_B_SH3ADD = 7'b1110010,

//Zbb: Basic Bit-Manipulation
ALU_B_ANDN = 7'b1100010,
ALU_B_MAX = 7'b0111100,
ALU_B_MIN = 7'b0111101,
ALU_B_ROL = 7'b1010110,
ALU_B_ROR = 7'b1011110,
ALU_B_XNOR = 7'b1011100,
ALU_B_ORN = 7'b1010100,
ALU_B_MAXU = 7'b1100000,
ALU_B_MINU = 7'b1110110,
ALU_B_RORI = 7'b1110111,
ALU_B_ORCB = 7'b1100001,
ALU_B_REV8 = 7'b1100011,
ALU_B_SEXTB = 7'b1100100,
ALU_B_SEXTH = 7'b1100101,
ALU_B_ZEXTH = 7'b1100110,
ALU_B_CPOP = 7'b1100111,
ALU_B_CTZ = 7'b1101001,
ALU_B_CLZ = 7'b1111110,

//Zbc: Carry-less Multiplication
ALU_B_CLMUL = 7'b1101010,
ALU_B_CLMULH = 7'b1101011,
ALU_B_CLMULR = 7'b1101100,

//Zbs: Single-bit Instructions
ALU_B_BCLR = 7'b1101101,
ALU_B_BCLRI = 7'b1101110,
ALU_B_BEXT = 7'b1101111,
ALU_B_BEXTI = 7'b1110000,
ALU_B_BINV = 7'b1110001,
ALU_B_BINVI = 7'b1110011,
ALU_B_BSET = 7'b1110100,
ALU_B_BSETI = 7'b1110101

} alu_opcode_e;

Expand Down

0 comments on commit 5ff2112

Please sign in to comment.