Skip to content

Commit

Permalink
zknd instructions added
Browse files Browse the repository at this point in the history
  • Loading branch information
munailwaqar committed Jan 27, 2025
1 parent 0347cab commit 82a8f63
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 3 deletions.
19 changes: 18 additions & 1 deletion core/alu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,12 @@ module alu
logic [ 31:0] aes32esmi_gen;
logic [ 63:0] aes64es_gen;
logic [ 63:0] aes64esm_gen;

logic [ 31:0] aes32dsi_gen;
logic [ 31:0] aes32dsmi_gen;
logic [ 63:0] sr_inv;
logic [ 63:0] aes64ds_gen;
logic [ 63:0] aes64dsm_gen;
logic [ 63:0] aes64im_gen;
// logic [ 31:0] tmp1;
// logic [ 31:0] tmp2;
// logic [ 31:0] tmp3;
Expand Down Expand Up @@ -310,13 +315,20 @@ module alu
end
assign aes32esi_gen = (fu_data_i.operand_a ^ ({24'b0, aes_sbox_fwd((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))} << {orig_instr_aes[5:4], 3'b000}) | ({24'b0, aes_sbox_fwd((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))} >> (32 - {orig_instr_aes[5:4], 3'b000})));
assign aes32esmi_gen = fu_data_i.operand_a ^ ((aes_mixcolumn_fwd({24'h000000, aes_sbox_fwd((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))}) << {orig_instr_aes[5:4], 3'b000}) | (aes_mixcolumn_fwd({24'h000000, aes_sbox_fwd((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))}) >> (32 - {orig_instr_aes[5:4], 3'b000})));
assign aes32dsi_gen = (fu_data_i.operand_a ^ ({24'b0, aes_sbox_inv((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))} << {orig_instr_aes[5:4], 3'b000}) | ({24'b0, aes_sbox_inv((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))} >> (32 - {orig_instr_aes[5:4], 3'b000})));
assign aes32dsmi_gen = fu_data_i.operand_a ^ ((aes_mixcolumn_inv({24'h000000, aes_sbox_inv((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))}) << {orig_instr_aes[5:4], 3'b000}) | (aes_mixcolumn_inv({24'h000000, aes_sbox_inv((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))}) >> (32 - {orig_instr_aes[5:4], 3'b000})));
end
else if (CVA6Cfg.IS_XLEN64) begin
// Shift rows step
assign sr = {fu_data_i.operand_a[31:24], fu_data_i.operand_b[55:48], fu_data_i.operand_b[15:8], fu_data_i.operand_a[39:32], fu_data_i.operand_b[63:56], fu_data_i.operand_b[23:16], fu_data_i.operand_a[47:40], fu_data_i.operand_a[7:0]};
assign sr_inv = {fu_data_i.operand_b[31:24], fu_data_i.operand_b[55:48], fu_data_i.operand_a[15:8], fu_data_i.operand_a[39:32], fu_data_i.operand_a[63:56], fu_data_i.operand_b[23:16], fu_data_i.operand_b[47:40], fu_data_i.operand_a[7:0]};
// AES64 encryption results
assign aes64es_gen = {aes_sbox_fwd(sr[63:56]), aes_sbox_fwd(sr[55:48]), aes_sbox_fwd(sr[47:40]), aes_sbox_fwd(sr[39:32]), aes_sbox_fwd(sr[31:24]), aes_sbox_fwd(sr[23:16]), aes_sbox_fwd(sr[15:8]), aes_sbox_fwd(sr[7:0])};
assign aes64esm_gen = {aes_mixcolumn_fwd(aes64es_gen[63:32]), aes_mixcolumn_fwd(aes64es_gen[31:0])};
assign aes64ds_gen = {aes_sbox_inv(sr_inv[63:56]), aes_sbox_inv(sr_inv[55:48]), aes_sbox_inv(sr_inv[47:40]), aes_sbox_inv(sr_inv[39:32]), aes_sbox_inv(sr_inv[31:24]), aes_sbox_inv(sr_inv[23:16]), aes_sbox_inv(sr_inv[15:8]), aes_sbox_inv(sr_inv[7:0])};
assign aes64dsm_gen = {aes_mixcolumn_inv(aes64ds_gen[63:32]), aes_mixcolumn_inv(aes64ds_gen[31:0])};
assign aes64im_gen = {aes_mixcolumn_inv(fu_data_i.operand_a[63:32]), aes_mixcolumn_inv(fu_data_i.operand_a[31:0])};
// AES Key Schedule results
assign aes64ks2_gen = {(fu_data_i.operand_a[63:32] ^ fu_data_i.operand_b[31:0] ^ fu_data_i.operand_b[63:32]), (fu_data_i.operand_a[63:32] ^ fu_data_i.operand_b[31:0])};
// assign tmp1 = fu_data_i.operand_a[63:32];
// assign rc = orig_instr_aes[3:0];
Expand Down Expand Up @@ -443,6 +455,11 @@ module alu
if (fu_data_i.operation == AES32ESMI && CVA6Cfg.IS_XLEN32) result_o = aes32esmi_gen;
if (fu_data_i.operation == AES64ES && CVA6Cfg.IS_XLEN64) result_o = aes64es_gen;
if (fu_data_i.operation == AES64ESM && CVA6Cfg.IS_XLEN64) result_o = aes64esm_gen;
if (fu_data_i.operation == AES32DSI && CVA6Cfg.IS_XLEN32) result_o = aes32dsi_gen;
if (fu_data_i.operation == AES32DSMI && CVA6Cfg.IS_XLEN32) result_o = aes32dsmi_gen;
if (fu_data_i.operation == AES64DS && CVA6Cfg.IS_XLEN64) result_o = aes64ds_gen;
if (fu_data_i.operation == AES64DSM && CVA6Cfg.IS_XLEN64) result_o = aes64dsm_gen;
if (fu_data_i.operation == AES64IM && CVA6Cfg.IS_XLEN64) result_o = aes64im_gen;
//if (fu_data_i.operation == AES64KS1I && CVA6Cfg.IS_XLEN64) result_o = aes64ks1i_gen;
if (fu_data_i.operation == AES64KS2 && CVA6Cfg.IS_XLEN64) result_o = aes64ks2_gen;
end
Expand Down
22 changes: 22 additions & 0 deletions core/decoder.sv
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,26 @@ module decoder
if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES32ESMI; // aes32esmi
else illegal_instr_bm = 1'b1;
end
{7'b0010101, 3'b000}, {7'b0110101, 3'b000}, {7'b1010101, 3'b000}, {7'b1110101, 3'b000}: begin
if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES32DSI; // aes32dsi
else illegal_instr_bm = 1'b1;
end
{7'b0010111, 3'b000}, {7'b0110111, 3'b000}, {7'b1010111, 3'b000}, {7'b1110111, 3'b000}: begin
if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES32DSMI; // aes32dsmi
else illegal_instr_bm = 1'b1;
end
{
7'b001_1101, 3'b000
} : begin
if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES64DS; // aes64ds
else illegal_instr_bm = 1'b1;
end
{
7'b001_1111, 3'b000
} : begin
if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES64DSM; // aes64dsm
else illegal_instr_bm = 1'b1;
end
default: begin
illegal_instr_bm = 1'b1;
end
Expand Down Expand Up @@ -961,6 +981,8 @@ module decoder
instruction_o.op = ariane_pkg::ZIP;
// else if (CVA6Cfg.ZKN && instr.instr[31:24] == 8'b00110001)
// instruction_o.op = ariane_pkg::AES64KS1I;
else if (CVA6Cfg.ZKN && instr.instr[31:20] == 12'b001100000000)
instruction_o.op = ariane_pkg::AES64IM;
else illegal_instr_bm = 1'b1;
end
3'b101: begin
Expand Down
84 changes: 84 additions & 0 deletions core/include/ariane_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,12 @@ package ariane_pkg;
AES32ESMI,
AES64ES,
AES64ESM,
// AES Decryption instructions
AES32DSI,
AES32DSMI,
AES64DS,
AES64DSM,
AES64IM,
// AES Key-Schedule instructions
//AES64KS1I,
AES64KS2
Expand Down Expand Up @@ -906,4 +912,82 @@ package ariane_pkg;
default: aes_decode_rcon = 32'h00000000;
endcase
endfunction
// AES Sbox Inverse
function [7:0] aes_sbox_inv(input [7:0] si);
case (si)
8'h00: aes_sbox_inv = 8'h52; 8'h01: aes_sbox_inv = 8'h09; 8'h02: aes_sbox_inv = 8'h6a; 8'h03: aes_sbox_inv = 8'hd5; 8'h04: aes_sbox_inv = 8'h30; 8'h05: aes_sbox_inv = 8'h36;
8'h06: aes_sbox_inv = 8'ha5; 8'h07: aes_sbox_inv = 8'h38; 8'h08: aes_sbox_inv = 8'hbf; 8'h09: aes_sbox_inv = 8'h40; 8'h0a: aes_sbox_inv = 8'ha3; 8'h0b: aes_sbox_inv = 8'h9e;
8'h0c: aes_sbox_inv = 8'h81; 8'h0d: aes_sbox_inv = 8'hf3; 8'h0e: aes_sbox_inv = 8'hd7; 8'h0f: aes_sbox_inv = 8'hfb; 8'h10: aes_sbox_inv = 8'h7c; 8'h11: aes_sbox_inv = 8'he3;
8'h12: aes_sbox_inv = 8'h39; 8'h13: aes_sbox_inv = 8'h82; 8'h14: aes_sbox_inv = 8'h9b; 8'h15: aes_sbox_inv = 8'h2f; 8'h16: aes_sbox_inv = 8'hff; 8'h17: aes_sbox_inv = 8'h87;
8'h18: aes_sbox_inv = 8'h34; 8'h19: aes_sbox_inv = 8'h8e; 8'h1a: aes_sbox_inv = 8'h43; 8'h1b: aes_sbox_inv = 8'h44; 8'h1c: aes_sbox_inv = 8'hc4; 8'h1d: aes_sbox_inv = 8'hde;
8'h1e: aes_sbox_inv = 8'he9; 8'h1f: aes_sbox_inv = 8'hcb; 8'h20: aes_sbox_inv = 8'h54; 8'h21: aes_sbox_inv = 8'h7b; 8'h22: aes_sbox_inv = 8'h94; 8'h23: aes_sbox_inv = 8'h32;
8'h24: aes_sbox_inv = 8'ha6; 8'h25: aes_sbox_inv = 8'hc2; 8'h26: aes_sbox_inv = 8'h23; 8'h27: aes_sbox_inv = 8'h3d; 8'h28: aes_sbox_inv = 8'hee; 8'h29: aes_sbox_inv = 8'h4c;
8'h2a: aes_sbox_inv = 8'h95; 8'h2b: aes_sbox_inv = 8'h0b; 8'h2c: aes_sbox_inv = 8'h42; 8'h2d: aes_sbox_inv = 8'hfa; 8'h2e: aes_sbox_inv = 8'hc3; 8'h2f: aes_sbox_inv = 8'h4e;
8'h30: aes_sbox_inv = 8'h08; 8'h31: aes_sbox_inv = 8'h2e; 8'h32: aes_sbox_inv = 8'ha1; 8'h33: aes_sbox_inv = 8'h66; 8'h34: aes_sbox_inv = 8'h28; 8'h35: aes_sbox_inv = 8'hd9;
8'h36: aes_sbox_inv = 8'h24; 8'h37: aes_sbox_inv = 8'hb2; 8'h38: aes_sbox_inv = 8'h76; 8'h39: aes_sbox_inv = 8'h5b; 8'h3a: aes_sbox_inv = 8'ha2; 8'h3b: aes_sbox_inv = 8'h49;
8'h3c: aes_sbox_inv = 8'h6d; 8'h3d: aes_sbox_inv = 8'h8b; 8'h3e: aes_sbox_inv = 8'hd1; 8'h3f: aes_sbox_inv = 8'h25; 8'h40: aes_sbox_inv = 8'h72; 8'h41: aes_sbox_inv = 8'hf8;
8'h42: aes_sbox_inv = 8'hf6; 8'h43: aes_sbox_inv = 8'h64; 8'h44: aes_sbox_inv = 8'h86; 8'h45: aes_sbox_inv = 8'h68; 8'h46: aes_sbox_inv = 8'h98; 8'h47: aes_sbox_inv = 8'h16;
8'h48: aes_sbox_inv = 8'hd4; 8'h49: aes_sbox_inv = 8'ha4; 8'h4a: aes_sbox_inv = 8'h5c; 8'h4b: aes_sbox_inv = 8'hcc; 8'h4c: aes_sbox_inv = 8'h5d; 8'h4d: aes_sbox_inv = 8'h65;
8'h4e: aes_sbox_inv = 8'hb6; 8'h4f: aes_sbox_inv = 8'h92; 8'h50: aes_sbox_inv = 8'h6c; 8'h51: aes_sbox_inv = 8'h70; 8'h52: aes_sbox_inv = 8'h48; 8'h53: aes_sbox_inv = 8'h50;
8'h54: aes_sbox_inv = 8'hfd; 8'h55: aes_sbox_inv = 8'hed; 8'h56: aes_sbox_inv = 8'hb9; 8'h57: aes_sbox_inv = 8'hda; 8'h58: aes_sbox_inv = 8'h5e; 8'h59: aes_sbox_inv = 8'h15;
8'h5a: aes_sbox_inv = 8'h46; 8'h5b: aes_sbox_inv = 8'h57; 8'h5c: aes_sbox_inv = 8'ha7; 8'h5d: aes_sbox_inv = 8'h8d; 8'h5e: aes_sbox_inv = 8'h9d; 8'h5f: aes_sbox_inv = 8'h84;
8'h60: aes_sbox_inv = 8'h90; 8'h61: aes_sbox_inv = 8'hd8; 8'h62: aes_sbox_inv = 8'hab; 8'h63: aes_sbox_inv = 8'h00; 8'h64: aes_sbox_inv = 8'h8c; 8'h65: aes_sbox_inv = 8'hbc;
8'h66: aes_sbox_inv = 8'hd3; 8'h67: aes_sbox_inv = 8'h0a; 8'h68: aes_sbox_inv = 8'hf7; 8'h69: aes_sbox_inv = 8'he4; 8'h6a: aes_sbox_inv = 8'h58; 8'h6b: aes_sbox_inv = 8'h05;
8'h6c: aes_sbox_inv = 8'hb8; 8'h6d: aes_sbox_inv = 8'hb3; 8'h6e: aes_sbox_inv = 8'h45; 8'h6f: aes_sbox_inv = 8'h06; 8'h70: aes_sbox_inv = 8'hd0; 8'h71: aes_sbox_inv = 8'h2c;
8'h72: aes_sbox_inv = 8'h1e; 8'h73: aes_sbox_inv = 8'h8f; 8'h74: aes_sbox_inv = 8'hca; 8'h75: aes_sbox_inv = 8'h3f; 8'h76: aes_sbox_inv = 8'h0f; 8'h77: aes_sbox_inv = 8'h02;
8'h78: aes_sbox_inv = 8'hc1; 8'h79: aes_sbox_inv = 8'haf; 8'h7a: aes_sbox_inv = 8'hbd; 8'h7b: aes_sbox_inv = 8'h03; 8'h7c: aes_sbox_inv = 8'h01; 8'h7d: aes_sbox_inv = 8'h13;
8'h7e: aes_sbox_inv = 8'h8a; 8'h7f: aes_sbox_inv = 8'h6b; 8'h80: aes_sbox_inv = 8'h3a; 8'h81: aes_sbox_inv = 8'h91; 8'h82: aes_sbox_inv = 8'h11; 8'h83: aes_sbox_inv = 8'h41;
8'h84: aes_sbox_inv = 8'h4f; 8'h85: aes_sbox_inv = 8'h67; 8'h86: aes_sbox_inv = 8'hdc; 8'h87: aes_sbox_inv = 8'hea; 8'h88: aes_sbox_inv = 8'h97; 8'h89: aes_sbox_inv = 8'hf2;
8'h8a: aes_sbox_inv = 8'hcf; 8'h8b: aes_sbox_inv = 8'hce; 8'h8c: aes_sbox_inv = 8'hf0; 8'h8d: aes_sbox_inv = 8'hb4; 8'h8e: aes_sbox_inv = 8'he6; 8'h8f: aes_sbox_inv = 8'h73;
8'h90: aes_sbox_inv = 8'h96; 8'h91: aes_sbox_inv = 8'hac; 8'h92: aes_sbox_inv = 8'h74; 8'h93: aes_sbox_inv = 8'h22; 8'h94: aes_sbox_inv = 8'he7; 8'h95: aes_sbox_inv = 8'had; 8'h96: aes_sbox_inv = 8'h35; 8'h97: aes_sbox_inv = 8'h85;
8'h98: aes_sbox_inv = 8'he2; 8'h99: aes_sbox_inv = 8'hf9; 8'h9a: aes_sbox_inv = 8'h37; 8'h9b: aes_sbox_inv = 8'he8; 8'h9c: aes_sbox_inv = 8'h1c; 8'h9d: aes_sbox_inv = 8'h75;
8'h9e: aes_sbox_inv = 8'hdf; 8'h9f: aes_sbox_inv = 8'h6e; 8'ha0: aes_sbox_inv = 8'h47; 8'ha1: aes_sbox_inv = 8'hf1; 8'ha2: aes_sbox_inv = 8'h1a; 8'ha3: aes_sbox_inv = 8'h71;
8'ha4: aes_sbox_inv = 8'h1d; 8'ha5: aes_sbox_inv = 8'h29; 8'ha6: aes_sbox_inv = 8'hc5; 8'ha7: aes_sbox_inv = 8'h89; 8'ha8: aes_sbox_inv = 8'h6f; 8'ha9: aes_sbox_inv = 8'hb7;
8'haa: aes_sbox_inv = 8'h62; 8'hab: aes_sbox_inv = 8'h0e; 8'hac: aes_sbox_inv = 8'haa; 8'had: aes_sbox_inv = 8'h18; 8'hae: aes_sbox_inv = 8'hbe; 8'haf: aes_sbox_inv = 8'h1b;
8'hb0: aes_sbox_inv = 8'hfc; 8'hb1: aes_sbox_inv = 8'h56; 8'hb2: aes_sbox_inv = 8'h3e; 8'hb3: aes_sbox_inv = 8'h4b; 8'hb4: aes_sbox_inv = 8'hc6; 8'hb5: aes_sbox_inv = 8'hd2;
8'hb6: aes_sbox_inv = 8'h79; 8'hb7: aes_sbox_inv = 8'h20; 8'hb8: aes_sbox_inv = 8'h9a; 8'hb9: aes_sbox_inv = 8'hdb; 8'hba: aes_sbox_inv = 8'hc0; 8'hbb: aes_sbox_inv = 8'hfe;
8'hbc: aes_sbox_inv = 8'h78; 8'hbd: aes_sbox_inv = 8'hcd; 8'hbe: aes_sbox_inv = 8'h5a; 8'hbf: aes_sbox_inv = 8'hf4; 8'hc0: aes_sbox_inv = 8'h1f; 8'hc1: aes_sbox_inv = 8'hdd;
8'hc2: aes_sbox_inv = 8'ha8; 8'hc3: aes_sbox_inv = 8'h33; 8'hc4: aes_sbox_inv = 8'h88; 8'hc5: aes_sbox_inv = 8'h07; 8'hc6: aes_sbox_inv = 8'hc7; 8'hc7: aes_sbox_inv = 8'h31;
8'hc8: aes_sbox_inv = 8'hb1; 8'hc9: aes_sbox_inv = 8'h12; 8'hca: aes_sbox_inv = 8'h10; 8'hcb: aes_sbox_inv = 8'h59; 8'hcc: aes_sbox_inv = 8'h27; 8'hcd: aes_sbox_inv = 8'h80;
8'hce: aes_sbox_inv = 8'hec; 8'hcf: aes_sbox_inv = 8'h5f; 8'hd0: aes_sbox_inv = 8'h60; 8'hd1: aes_sbox_inv = 8'h51; 8'hd2: aes_sbox_inv = 8'h7f; 8'hd3: aes_sbox_inv = 8'ha9;
8'hd4: aes_sbox_inv = 8'h19; 8'hd5: aes_sbox_inv = 8'hb5; 8'hd6: aes_sbox_inv = 8'h4a; 8'hd7: aes_sbox_inv = 8'h0d; 8'hd8: aes_sbox_inv = 8'h2d; 8'hd9: aes_sbox_inv = 8'he5;
8'hda: aes_sbox_inv = 8'h7a; 8'hdb: aes_sbox_inv = 8'h9f; 8'hdc: aes_sbox_inv = 8'h93; 8'hdd: aes_sbox_inv = 8'hc9; 8'hde: aes_sbox_inv = 8'h9c; 8'hdf: aes_sbox_inv = 8'hef;
8'he0: aes_sbox_inv = 8'ha0; 8'he1: aes_sbox_inv = 8'he0; 8'he2: aes_sbox_inv = 8'h3b; 8'he3: aes_sbox_inv = 8'h4d; 8'he4: aes_sbox_inv = 8'hae; 8'he5: aes_sbox_inv = 8'h2a;
8'he6: aes_sbox_inv = 8'hf5; 8'he7: aes_sbox_inv = 8'hb0; 8'he8: aes_sbox_inv = 8'hc8; 8'he9: aes_sbox_inv = 8'heb; 8'hea: aes_sbox_inv = 8'hbb; 8'heb: aes_sbox_inv = 8'h3c;
8'hec: aes_sbox_inv = 8'h83; 8'hed: aes_sbox_inv = 8'h53; 8'hee: aes_sbox_inv = 8'h99; 8'hef: aes_sbox_inv = 8'h61; 8'hf0: aes_sbox_inv = 8'h17; 8'hf1: aes_sbox_inv = 8'h2b;
8'hf2: aes_sbox_inv = 8'h04; 8'hf3: aes_sbox_inv = 8'h7e; 8'hf4: aes_sbox_inv = 8'hba; 8'hf5: aes_sbox_inv = 8'h77; 8'hf6: aes_sbox_inv = 8'hd6; 8'hf7: aes_sbox_inv = 8'h26;
8'hf8: aes_sbox_inv = 8'he1; 8'hf9: aes_sbox_inv = 8'h69; 8'hfa: aes_sbox_inv = 8'h14; 8'hfb: aes_sbox_inv = 8'h63; 8'hfc: aes_sbox_inv = 8'h55; 8'hfd: aes_sbox_inv = 8'h21;
8'hfe: aes_sbox_inv = 8'h0c; 8'hff: aes_sbox_inv = 8'h7d;
default: aes_sbox_inv = 8'h00;
endcase
endfunction
// AES MixColumns Inverse
function logic [31:0] aes_mixcolumn_inv(input logic [31:0] x);
aes_mixcolumn_inv = {(gfmul(x[7:0], 4'hB) ^ gfmul(x[15:8], 4'hD) ^ gfmul(x[23:16], 4'h9) ^ gfmul(x[31:24], 4'hE)),
(gfmul(x[7:0], 4'hD) ^ gfmul(x[15:8], 4'h9) ^ gfmul(x[23:16], 4'hE) ^ gfmul(x[31:24], 4'hB)),
(gfmul(x[7:0], 4'h9) ^ gfmul(x[15:8], 4'hE) ^ gfmul(x[23:16], 4'hB) ^ gfmul(x[31:24], 4'hD)),
(gfmul(x[7:0], 4'hE) ^ gfmul(x[15:8], 4'hB) ^ gfmul(x[23:16], 4'hD) ^ gfmul(x[31:24], 4'h9))};
endfunction
// GF multiplication
function logic [7:0] gfmul(input logic [7:0] x, input logic [3:0] y);
logic [7:0] result, temp;
result = 8'h00;
if (y[0]) result ^= x;
if (y[1]) begin
temp = (x << 1) ^ ((x[7]) ? 8'h1B : 8'h00);
result ^= (x << 1) ^ ((x[7]) ? 8'h1B : 8'h00);
end
if (y[2]) begin
temp = (x << 1) ^ ((x[7]) ? 8'h1B : 8'h00);
temp = (temp << 1) ^ ((temp[7]) ? 8'h1B : 8'h00);
result ^= temp;
end
if (y[3]) begin
temp = (x << 1) ^ ((x[7]) ? 8'h1B : 8'h00);
temp = (temp << 1) ^ ((temp[7]) ? 8'h1B : 8'h00);
temp = (temp << 1) ^ ((temp[7]) ? 8'h1B : 8'h00);
result ^= temp;
end
return result;
endfunction
endpackage
86 changes: 86 additions & 0 deletions docs/01_cva6_user/RISCV_Instructions_RVZbkx.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
..
Copyright (c) 2023 OpenHW Group
Copyright (c) 2023 10xEngineers
SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

.. Level 1
=======
Level 2
-------
Level 3
~~~~~~~
Level 4
^^^^^^^
.. _cva6_riscv_instructions_RV32Zbkx:

*Applicability of this chapter to configurations:*

.. csv-table::
:widths: auto
:align: left
:header: "Configuration", "Implementation"

"CV32A60AX", "Implemented extension"
"CV64A6_MMU", "Implemented extension"

=============================
RVZbkx: Crossbar permutation instructions
=============================

The following instructions comprise the Zbkx extension:

Xperm instructions
--------------------
The xperm instructions perform permutation operations on a register. They use indices extracted from rs2 to select data chunks (bytes for xperm8 or nibbles for xperm4) from rs1. The selected data is then placed into the destination register (rd) at positions corresponding to the extracted indices in rs2. If an index in rs2 is out of range, the corresponding chunk in rd is set to 0.

+-----------+-----------+-----------------------+
| RV32 | RV64 | Mnemonic |
+===========+===========+=======================+
||| xperm8 rd, rs1, rs2 |
+-----------+-----------+-----------------------+
||| xperm4 rd, rs1, rs2 |
+-----------+-----------+-----------------------+


RV32 and RV64 Instructions
~~~~~~~~~~~~~~~~~~~~~~~~~~


- **XPERM8**: Crossbar permutation (bytes)

**Format**: xperm8 rd, rs1, rs2

**Description**: The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8 8-bit elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 is out of bounds.

**Pseudocode**: foreach (i from 0 to xlen by 8) {
if (rs2[i*8+:8]<(xlen/8))
X(rd)[i*8+:8] = rs1[rs2[i*8+:8]*8+:8];
else
X(rd)[i*8+:8] = 8'b0;
}

**Invalid values**: NONE

**Exception raised**: NONE

- **XPERM4**: Crossbar permutation (nibbles)

**Format**: xperm4 rd, rs1, rs2

**Description**: The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4 4-bit elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 is out of bounds.

**Pseudocode**: foreach (i from 0 to xlen by 4) {
if (rs2[i*4+:4]<(xlen/4))
X(rd)[i*4+:4] = rs1[rs2[i*4+:4]*4+:4];
else
X(rd)[i*4+:4] = 4'b0;
}

**Invalid values**: NONE

**Exception raised**: NONE
4 changes: 2 additions & 2 deletions verif/sim/cva6.py
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ def load_config(args, cwd):
args.isa = "rv64gc_zba_zbb_zbs_zbc"
elif base in ("cv64a6_imafdc_sv39", "cv64a6_imafdc_sv39_hpdcache", "cv64a6_imafdc_sv39_hpdcache_wb"):
args.mabi = "lp64d"
args.isa = "rv64gc_zba_zbb_zbs_zbc_zbkb_zbkx_zkne"
args.isa = "rv64gc_zba_zbb_zbs_zbc_zbkb_zbkx_zkne_zknd"
elif base == "cv32a60x":
args.mabi = "ilp32"
args.isa = "rv32imc_zba_zbb_zbs_zbc"
Expand All @@ -899,7 +899,7 @@ def load_config(args, cwd):
args.isa = "rv32imac"
elif base == "cv32a6_imac_sv32":
args.mabi = "ilp32"
args.isa = "rv32imac_zbkb_zbkx_zkne"
args.isa = "rv32imac_zbkb_zbkx_zkne_zknd"
elif base == "cv32a6_imafc_sv32":
args.mabi = "ilp32f"
args.isa = "rv32imafc"
Expand Down
15 changes: 15 additions & 0 deletions verif/tests/testlist_riscv-arch-test-cv64a6_imafdc_sv39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1017,3 +1017,18 @@ testlist:
# iterations: 1
# <<: *common_test_config
# asm_tests: <path_var>/riscv-arch-test/riscv-test-suite/rv64i_m/K/src/aes64ks1i-01.S

- test: rv64i_m-aes64ds-01
iterations: 1
<<: *common_test_config
asm_tests: <path_var>/riscv-arch-test/riscv-test-suite/rv64i_m/K/src/aes64ds-01.S

- test: rv64i_m-aes64dsm-01
iterations: 1
<<: *common_test_config
asm_tests: <path_var>/riscv-arch-test/riscv-test-suite/rv64i_m/K/src/aes64dsm-01.S

- test: rv64i_m-aes64im-01
iterations: 1
<<: *common_test_config
asm_tests: <path_var>/riscv-arch-test/riscv-test-suite/rv64i_m/K/src/aes64im-01.S

0 comments on commit 82a8f63

Please sign in to comment.