From 5ff21125adbc6f1a3050c93053d6b4cecfddb69a Mon Sep 17 00:00:00 2001 From: gullahmed1 Date: Tue, 7 Nov 2023 14:48:38 +0500 Subject: [PATCH] Add Bitmanipulation Support --- rtl/cv32e40p_alu.sv | 138 +++++++++++++++++++++++++++++++++++- rtl/cv32e40p_decoder.sv | 84 ++++++++++++++++++++-- rtl/include/cv32e40p_pkg.sv | 45 +++++++++++- 3 files changed, 260 insertions(+), 7 deletions(-) diff --git a/rtl/cv32e40p_alu.sv b/rtl/cv32e40p_alu.sv index aa900a787..a13113684 100644 --- a/rtl/cv32e40p_alu.sv +++ b/rtl/cv32e40p_alu.sv @@ -805,7 +805,11 @@ module cv32e40p_alu logic [31:0] bmask_first, bmask_inv; logic [31:0] bextins_and; logic [31:0] bextins_result, bclr_result, bset_result; - + logic [31:0] result_bitmanip; // Store result of bitmanip operations + logic [31:0] clmul_result; // Store carry-less multiplication result + logic [ 5:0] cpop; // Store no of set bits in operand a + logic [ 4:0] ff_one_result; // Return the position of first one + logic ff_one_all_zeros; // Return true if all input is zero // construct bit mask for insert/extract/bclr/bset // bmask looks like this 00..0011..1100..00 @@ -823,6 +827,124 @@ module cv32e40p_alu assign bclr_result = operand_a_i & bmask_inv; assign bset_result = operand_a_i | bmask; + if (ZBITMANIP) begin : gen_zbc_zbb_results + + // Temporary registers + logic [31:0] ff_one_in; + logic [31:0] [31:0] clmul_temp0; + logic [ 7:0] [31:0] clmul_temp1; + logic [ 1:0] [31:0] clmul_temp2; + logic [31:0] operand_b_rev; + + // Decide the input of cv32e40p_ff_one module based on operator_i + assign ff_one_in = (operator_i == ALU_B_CTZ) ? operand_a_i : operand_a_rev; + + // Instantiate cv32e40p_popcnt module, it will return 1's count + cv32e40p_popcnt popcnt_i ( + .in_i (operand_a_i), + .result_o(cpop) + ); + + // Instantiate Find First One Module + cv32e40p_ff_one ff_one_i( + .in_i (ff_one_in ), + .first_one_o(ff_one_result), + .no_ones_o (ff_one_all_zeros) + ); + + // Reverse operand_b_i using streaming operator + assign operand_b_rev = {<<{operand_b_i}}; + + // Create 32 rows like traditional multiplication + for (genvar i = 0; i < 32; i++) begin : gen_32_rows + assign clmul_temp0[i] = (operator_i == ALU_B_CLMUL) ? + operand_b_i[i] ? operand_a_i << i : '0 : + operand_b_rev[i] ? operand_a_rev << i : '0; + end + + // Xor 4 rows 8 times + for (genvar i = 0; i < 8; i++) begin : gen_xor_result_8_rows + assign clmul_temp1[i] = clmul_temp0[i<<2] ^ clmul_temp0[(i<<2)+1] ^ + clmul_temp0[(i<<2)+2] ^ clmul_temp0[(i<<2)+3]; + end + + // XOR 4 rows twice + for (genvar i = 0; i < 2; i++) begin : gen_xor_result_2_rows + assign clmul_temp2[i] = clmul_temp1[i<<2] ^ clmul_temp1[(i<<2)+1] ^ + clmul_temp1[(i<<2)+2] ^ clmul_temp1[(i<<2)+3]; + end + + // Xor on last 2 rows + assign clmul_result = clmul_temp2[0] ^ clmul_temp2[1]; + end + + always_comb begin + if (ZBITMANIP) begin + unique case (operator_i) + + // Zba: Address generation Instructions , Shift left rs1 by 1/2/3 + rs2 + ALU_B_SH1ADD: result_bitmanip = {operand_a_i[30:0],1'b0} + operand_b_i; + ALU_B_SH2ADD: result_bitmanip = {operand_a_i[29:0],2'b0} + operand_b_i; + ALU_B_SH3ADD: result_bitmanip = {operand_a_i[28:0],3'b0} + operand_b_i; + + // Zbb: Basic Bit-Manipulation + // Logical with Negate + ALU_B_ANDN: result_bitmanip = operand_a_i & operand_b_neg; + ALU_B_ORN: result_bitmanip = operand_a_i | operand_b_neg; + ALU_B_XNOR: result_bitmanip = ~(operand_a_i ^ operand_b_i); + + // Count leading/trailing zero bits + ALU_B_CLZ: result_bitmanip = ff_one_all_zeros ? {26'b0,6'b100000} : {26'b0,ff_one_result}; + ALU_B_CTZ: result_bitmanip = ff_one_all_zeros ? {26'b0,6'b100000} : {26'b0,ff_one_result}; + + // Count set bits + ALU_B_CPOP: result_bitmanip = cpop; + + // Integer Minimum/Maximum + ALU_B_MAX: result_bitmanip = ($signed(operand_a_i) < $signed(operand_b_i)) ? operand_b_i : operand_a_i; + ALU_B_MAXU: result_bitmanip = (operand_a_i < operand_b_i) ? operand_b_i : operand_a_i; + ALU_B_MIN: result_bitmanip = ($signed(operand_a_i) < $signed(operand_b_i)) ? operand_a_i : operand_b_i; + ALU_B_MINU: result_bitmanip = (operand_a_i < operand_b_i) ? operand_a_i : operand_b_i; + + // Sign and zero-extension + ALU_B_SEXTB: result_bitmanip = {{24{operand_a_i[7]}}, operand_a_i[7:0]}; + ALU_B_SEXTH: result_bitmanip = {{16{operand_a_i[15]}}, operand_a_i[15:0]}; + ALU_B_ZEXTH: result_bitmanip = {{16{1'b0}}, operand_a_i[15:0]}; + + // Bitwise rotation + ALU_B_ROL: result_bitmanip = (operand_a_i << operand_b_i[4:0]) | (operand_a_i >> (32-operand_b_i[4:0])); + ALU_B_ROR: result_bitmanip = (operand_a_i >> operand_b_i[4:0]) | (operand_a_i << (32-operand_b_i[4:0])); + ALU_B_RORI: result_bitmanip = (operand_a_i >> operand_b_i[4:0]) | (operand_a_i << (32-operand_b_i[4:0])); + + // Bitwise OR-Combine, byte granule + ALU_B_ORCB: result_bitmanip = {{8{|operand_a_i[31:24]}}, {8{|operand_a_i[23:16]}}, {8{|operand_a_i[15:8]}}, {8{|operand_a_i[7:0]}}}; + + // Byte-reverse register + ALU_B_REV8: result_bitmanip = {{operand_a_i[7:0]}, {operand_a_i[15:8]}, {operand_a_i[23:16]}, {operand_a_i[31:24]}}; + + // Zbc: Carry-less Multiplication low/reversed/high part + ALU_B_CLMUL: result_bitmanip = clmul_result; + ALU_B_CLMULR: result_bitmanip = {<<{clmul_result}}; + ALU_B_CLMULH: result_bitmanip = {<<{clmul_result}} >> 1'b1; + + // Zbs: Single-bit Instructions + ALU_B_BCLR: result_bitmanip = operand_a_i & ~(1'b1 << (operand_b_i & 5'b11111)); + ALU_B_BCLRI: result_bitmanip = operand_a_i & ~(1'b1 << (operand_b_i & 5'b11111)); + ALU_B_BEXT: result_bitmanip = (operand_a_i >> (operand_b_i & 5'b11111)) & 1'b1; + ALU_B_BEXTI: result_bitmanip = (operand_a_i >> (operand_b_i & 5'b11111)) & 1'b1; + ALU_B_BINV: result_bitmanip = operand_a_i ^ (1'b1 << (operand_b_i & 5'b11111)); + ALU_B_BINVI: result_bitmanip = operand_a_i ^ (1'b1 << (operand_b_i & 5'b11111)); + ALU_B_BSET: result_bitmanip = operand_a_i | (1'b1 << (operand_b_i & 5'b11111)); + ALU_B_BSETI: result_bitmanip = operand_a_i | (1'b1 << (operand_b_i & 5'b11111)); + + default: result_bitmanip = '0; + endcase + end + else begin + result_bitmanip = '0; + end + end + ///////////////////////////////////////////////////////////////////////////////// // ____ _____ _______ _____ ________ ________ _____ _____ ______ // // | _ \_ _|__ __| | __ \| ____\ \ / / ____| __ \ / ____| ____| // @@ -979,6 +1101,20 @@ module cv32e40p_alu default: ; // default case to suppress unique warning endcase + + if (ZBITMANIP) begin + unique case (operator_i) + // Bit-Manip Operations Result + ALU_B_SH1ADD, ALU_B_MIN, ALU_B_ROL, ALU_B_ROR, ALU_B_XNOR, ALU_B_MAXU, + ALU_B_SH2ADD, ALU_B_ANDN, ALU_B_MAX, ALU_B_ORN, ALU_B_MINU, ALU_B_RORI, + ALU_B_SEXTB, ALU_B_SEXTH, ALU_B_ZEXTH, ALU_B_CPOP, ALU_B_CTZ, ALU_B_BCLR, + ALU_B_BEXT, ALU_B_BEXTI, ALU_B_BINV, ALU_B_BINVI, ALU_B_BSET, ALU_B_REV8, + ALU_B_CLMUL, ALU_B_CLMULH, ALU_B_CLMULR, ALU_B_CLZ, ALU_B_BSETI, ALU_B_ORCB, + ALU_B_BCLRI, ALU_B_SH3ADD : result_o = result_bitmanip; + + default: ; + endcase + end end assign ready_o = div_ready; diff --git a/rtl/cv32e40p_decoder.sv b/rtl/cv32e40p_decoder.sv index d03027bae..199134977 100644 --- a/rtl/cv32e40p_decoder.sv +++ b/rtl/cv32e40p_decoder.sv @@ -185,6 +185,9 @@ module cv32e40p_decoder // unittypes for latencies to help us decode for APU enum logic[1:0] {ADDMUL, DIVSQRT, NONCOMP, CONV} fp_op_group; + // Illegal Instr flags for bitmanip + logic illegal_instr_bm; + logic illegal_instr_non_bm; ///////////////////////////////////////////// // ____ _ // @@ -264,6 +267,8 @@ module cv32e40p_decoder atop_o = 6'b000000; illegal_insn_o = 1'b0; + illegal_instr_bm = 1'b0; + illegal_instr_non_bm = 1'b0; ebrk_insn_o = 1'b0; ecall_insn_o = 1'b0; wfi_o = 1'b0; @@ -493,8 +498,31 @@ module cv32e40p_decoder 3'b111: alu_operator_o = ALU_AND; // And with Immediate 3'b001: begin - alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate - if (instr_rdata_i[31:25] != 7'b0) + if (instr_rdata_i[31:25] == 7'b0) + alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate + + //Bit-Manip ALU Operations + else if (ZBITMANIP) begin + unique case (instr_rdata_i[31:25]) + 7'b011_0000: begin + unique case(instr_rdata_i[24:20]) + 5'b00100: alu_operator_o = ALU_B_SEXTB; + 5'b00101: alu_operator_o = ALU_B_SEXTH; + 5'b00010: alu_operator_o = ALU_B_CPOP; + 5'b00001: alu_operator_o = ALU_B_CTZ; + 5'b00000: alu_operator_o = ALU_B_CLZ; + default: illegal_insn_o = 1'b1; + endcase + end + 7'b010_0100: alu_operator_o = ALU_B_BCLRI; + 7'b011_0100: alu_operator_o = ALU_B_BINVI; + 7'b001_0100: alu_operator_o = ALU_B_BSETI; + default: begin + illegal_insn_o = 1'b1; + end + endcase + end + else illegal_insn_o = 1'b1; end @@ -503,11 +531,23 @@ module cv32e40p_decoder alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate else if (instr_rdata_i[31:25] == 7'b010_0000) alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate + + //Bit-Manip ALU Operations + else if (ZBITMANIP) begin + if (instr_rdata_i[31:25] == 7'b011_0000) + alu_operator_o = ALU_B_RORI; + else if (instr_rdata_i[31:20] == 12'b001010000111) + alu_operator_o = ALU_B_ORCB; + else if (instr_rdata_i[31:20] == 12'b011010011000) + alu_operator_o = ALU_B_REV8; + else if (instr_rdata_i[31:25] == 7'b010_0100) + alu_operator_o = ALU_B_BEXTI; + else + illegal_insn_o = 1'b1; + end else illegal_insn_o = 1'b1; end - - endcase end @@ -992,9 +1032,43 @@ module cv32e40p_decoder end default: begin - illegal_insn_o = 1'b1; + illegal_instr_non_bm = 1'b1; end endcase + + if (ZBITMANIP) begin + unique case ({instr_rdata_i[30:25], instr_rdata_i[14:12]}) + // Bit-Manip ALU Operations + {6'b01_0000, 3'b010}: alu_operator_o = ALU_B_SH1ADD; + {6'b01_0000, 3'b100}: alu_operator_o = ALU_B_SH2ADD; + {6'b01_0000, 3'b110}: alu_operator_o = ALU_B_SH3ADD; + {6'b10_0000, 3'b111}: alu_operator_o = ALU_B_ANDN; + {6'b00_0101, 3'b110}: alu_operator_o = ALU_B_MAX; + {6'b00_0101, 3'b100}: alu_operator_o = ALU_B_MIN; + {6'b11_0000, 3'b001}: alu_operator_o = ALU_B_ROL; + {6'b11_0000, 3'b101}: alu_operator_o = ALU_B_ROR; + {6'b10_0000, 3'b100}: alu_operator_o = ALU_B_XNOR; + {6'b10_0000, 3'b110}: alu_operator_o = ALU_B_ORN; + {6'b00_0101, 3'b111}: alu_operator_o = ALU_B_MAXU; + {6'b00_0101, 3'b101}: alu_operator_o = ALU_B_MINU; + {6'b00_0100, 3'b100}: alu_operator_o = ALU_B_ZEXTH; + {6'b00_0101, 3'b001}: alu_operator_o = ALU_B_CLMUL; + {6'b00_0101, 3'b011}: alu_operator_o = ALU_B_CLMULH; + {6'b00_0101, 3'b010}: alu_operator_o = ALU_B_CLMULR; + {6'b10_0100, 3'b001}: alu_operator_o = ALU_B_BCLR; + {6'b10_0100, 3'b101}: alu_operator_o = ALU_B_BEXT; + {6'b11_0100, 3'b001}: alu_operator_o = ALU_B_BINV; + {6'b01_0100, 3'b001}: alu_operator_o = ALU_B_BSET; + default: begin + illegal_instr_bm = 1'b1; + end + endcase + end + + unique case (ZBITMANIP) + 1'b0: illegal_insn_o = illegal_instr_non_bm; + 1'b1: illegal_insn_o = illegal_instr_non_bm & illegal_instr_bm; + endcase end end diff --git a/rtl/include/cv32e40p_pkg.sv b/rtl/include/cv32e40p_pkg.sv index 319e790b6..a8fb14f38 100644 --- a/rtl/include/cv32e40p_pkg.sv +++ b/rtl/include/cv32e40p_pkg.sv @@ -64,6 +64,9 @@ package cv32e40p_pkg; parameter REGC_S4 = 2'b00; parameter REGC_RD = 2'b01; parameter REGC_ZERO = 2'b11; + + // To Enable Bitmanip support + parameter ZBITMANIP = 1'b1; ////////////////////////////////////////////////////////////////////////////// // _ _ _ _ ___ _ _ // @@ -156,7 +159,47 @@ package cv32e40p_pkg; ALU_SHUF = 7'b0111010, ALU_SHUF2 = 7'b0111011, ALU_PCKLO = 7'b0111000, - ALU_PCKHI = 7'b0111001 + ALU_PCKHI = 7'b0111001, + + //Zba: Address generation Instructions + ALU_B_SH1ADD = 7'b0001111, + ALU_B_SH2ADD = 7'b0001110, + ALU_B_SH3ADD = 7'b1110010, + + //Zbb: Basic Bit-Manipulation + ALU_B_ANDN = 7'b1100010, + ALU_B_MAX = 7'b0111100, + ALU_B_MIN = 7'b0111101, + ALU_B_ROL = 7'b1010110, + ALU_B_ROR = 7'b1011110, + ALU_B_XNOR = 7'b1011100, + ALU_B_ORN = 7'b1010100, + ALU_B_MAXU = 7'b1100000, + ALU_B_MINU = 7'b1110110, + ALU_B_RORI = 7'b1110111, + ALU_B_ORCB = 7'b1100001, + ALU_B_REV8 = 7'b1100011, + ALU_B_SEXTB = 7'b1100100, + ALU_B_SEXTH = 7'b1100101, + ALU_B_ZEXTH = 7'b1100110, + ALU_B_CPOP = 7'b1100111, + ALU_B_CTZ = 7'b1101001, + ALU_B_CLZ = 7'b1111110, + + //Zbc: Carry-less Multiplication + ALU_B_CLMUL = 7'b1101010, + ALU_B_CLMULH = 7'b1101011, + ALU_B_CLMULR = 7'b1101100, + + //Zbs: Single-bit Instructions + ALU_B_BCLR = 7'b1101101, + ALU_B_BCLRI = 7'b1101110, + ALU_B_BEXT = 7'b1101111, + ALU_B_BEXTI = 7'b1110000, + ALU_B_BINV = 7'b1110001, + ALU_B_BINVI = 7'b1110011, + ALU_B_BSET = 7'b1110100, + ALU_B_BSETI = 7'b1110101 } alu_opcode_e;